diff --git a/docs/develop/gettingstarted.rst b/docs/develop/gettingstarted.rst index fed632ea1a..9c4d0d9714 100644 --- a/docs/develop/gettingstarted.rst +++ b/docs/develop/gettingstarted.rst @@ -234,6 +234,24 @@ You can also specify which board to use: See `ports/stm32/boards `_ for the available boards. e.g. "PYBV11" or "NUCLEO_WB55". +Compile-time format string checking +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When gcc is used to build MicroPython, a plugin can be used for compile-time +checking of ``mp_printf`` format strings. The plugin is enabled by setting the +Makefile variable ``MICROPY_USE_COMPILER_PLUGIN=gcc`` before including +``py/mkrules.mk``. + +The plugin doesn't work: + * With non-gcc compilers (including clang, which is sometimes installed on + Macs with the name "gcc") + * On Windows systems, where the steps for building a plugin are more complicated + * With MicroPython builds that use cmake rather than traditional make. + * If the necessary files for plugin development are not installed. In Debian + bookworm, for instance, the files to build plugins for ``gcc`` are in + ``gcc-12-plugin-dev``, and the files to build plugins for + ``riscv64-unknown-elf-gcc`` are in ``gcc-12-plugin-dev-riscv64-linux-gnu``. + Building the documentation -------------------------- diff --git a/py/gccplugin.mk b/py/gccplugin.mk new file mode 100644 index 0000000000..3b02be2259 --- /dev/null +++ b/py/gccplugin.mk @@ -0,0 +1,23 @@ +PLUGINDIR := $(shell $(CC) -print-file-name=plugin)/include +ifeq ($(realpath $(PLUGINDIR)/gcc-plugin.h),) +$(error plugin header $(PLUGINDIR)/gcc-plugin.h for $(CC) does not exist. Install the correct gcc-plugins package.) +endif +HOSTCXX ?= g++ + +CHECKS_PLUGIN := $(BUILD)/micropython_checks.so +CFLAGS += -fplugin=$(CHECKS_PLUGIN) + +.PHONY: plugin +plugin: $(CHECKS_PLUGIN) + +$(CHECKS_PLUGIN): $(TOP)/tools/micropython_checks.cc | $(BUILD)/ + $(ECHO) "PLUGIN $@" + $(Q)$(HOSTCXX) -o $@ -shared $^ \ + -std=gnu++11 -fPIC -Wall -O -fno-rtti \ + -I$(PLUGINDIR) + +# All objects depend on the checks plugin +$(OBJ): $(CHECKS_PLUGIN) + +# And so do these very special targets +QSTR_GLOBAL_REQUIREMENTS += $(CHECKS_PLUGIN) diff --git a/py/mkrules.mk b/py/mkrules.mk index 3120066fd4..e0cbd45e3c 100644 --- a/py/mkrules.mk +++ b/py/mkrules.mk @@ -20,6 +20,10 @@ OBJ_EXTRA_ORDER_DEPS = # Generate header files. OBJ_EXTRA_ORDER_DEPS += $(HEADER_BUILD)/moduledefs.h $(HEADER_BUILD)/root_pointers.h +ifeq ($(MICROPY_USE_COMPILER_PLUGIN),gcc) +include $(TOP)/py/gccplugin.mk +endif + ifeq ($(MICROPY_ROM_TEXT_COMPRESSION),1) # If compression is enabled, trigger the build of compressed.data.h... OBJ_EXTRA_ORDER_DEPS += $(HEADER_BUILD)/compressed.data.h diff --git a/tools/micropython_checks.cc b/tools/micropython_checks.cc new file mode 100644 index 0000000000..de794f1b78 --- /dev/null +++ b/tools/micropython_checks.cc @@ -0,0 +1,437 @@ +// SPDX-FileCopyrightText: 2014 Roger Ferrer Ibanez +// SPDX-FileCopyrightText: 2020 Eddy S +// SPDX-FileCopyrightText: 2025 Jeff Epler +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include +#include +#include +#include + +// This is the first gcc header to be included +#include "gcc-plugin.h" +#include "plugin-version.h" + +// gcc headers have order, don't blindly re-order +#include "cp/cp-tree.h" +#include "context.h" +#include "function.h" +#include "internal-fn.h" +#include "is-a.h" +#include "predict.h" +#include "basic-block.h" +#include "tree.h" +#include "tree-ssa-alias.h" +#include "gimple-expr.h" +#include "gimple.h" +#include "gimple-ssa.h" +#include "tree-pretty-print.h" +#include "tree-pass.h" +#include "tree-ssa-operands.h" +#include "tree-phinodes.h" +#include "print-tree.h" +#include "stringpool.h" +#include "attribs.h" +#include "gimple-pretty-print.h" +#include "gimple-iterator.h" +#include "gimple-walk.h" +#include "diagnostic.h" +#include "stringpool.h" + +#include "ssa-iterators.h" + +// We must assert that this plugin is GPL compatible +int plugin_is_GPL_compatible; + +static struct plugin_info my_gcc_plugin_info = { + "1.0", + "Validate MicroPython mp_printf argument types against format string"}; + +namespace { +const pass_data micropython_checks_data = { + GIMPLE_PASS, + "micropython_checks", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_gimple_any, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0 /* todo_flags_finish */ +}; + +struct micropython_checks : gimple_opt_pass { + micropython_checks(gcc::context *ctx) + : gimple_opt_pass(micropython_checks_data, ctx) {} + + virtual unsigned int execute(function *fun) override { + // This phase has two steps, first we remove redundant LHS from + // GIMPLE_CALLs + walk(fun); + + return 0; + } + + virtual micropython_checks *clone() override { + // We do not clone ourselves + return this; + } + + static bool is_mpprint_call(const tree fn) { + if (!fn) + return false; + if (fn->base.code != ADDR_EXPR) + return false; + if (!fn->base.constant_flag) + return false; + const tree op = fn->exp.operands[0]; + if (op->base.code != FUNCTION_DECL) + return false; + const tree name = op->decl_minimal.name; + if (name->base.code != IDENTIFIER_NODE) + return false; + return strcmp((const char *)name->identifier.id.str, "mp_printf") == 0; + } + + static const char *mpprint_format_arg(gimple *stmt) { + tree fn = gimple_call_fn(stmt); + if (!is_mpprint_call(fn)) + return NULL; + unsigned nargs = gimple_call_num_args(stmt); + if (nargs < 2) + return NULL; + tree arg = gimple_call_arg(stmt, 1); + if (!arg->base.constant_flag) + return NULL; + if (arg->base.code != ADDR_EXPR) + return NULL; + tree op = arg->exp.operands[0]; + if (op->base.code != STRING_CST) + return NULL; + return op->string.str; + } + + void walk(function *fun) { + basic_block bb; + FOR_ALL_BB_FN(bb, fun) { + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) { + gimple *stmt = gsi_stmt(gsi); + + // location_t loc = gimple_location (stmt); + switch (gimple_code(stmt)) { + case GIMPLE_CALL: { + const char *fmt = mpprint_format_arg(stmt), + *fmt_start = nullptr; + + if (!fmt) { + break; + } + + unsigned nargs = gimple_call_num_args(stmt); + unsigned argno = 2; + + auto require_double = [&](int argno) { + auto arg = gimple_call_arg(stmt, argno); + location_t loc = gimple_location(stmt); + + auto tp = arg ? arg->exp.typed.type : 0; + if (tp) + tp = TYPE_CANONICAL(tp); + if (!tp + || tp->base.code != REAL_TYPE + || tp->type_common.precision != double_type_node->type_common.precision) { + warning_at(loc, OPT_Wformat_, + "argument %d: Format %q.*s requires a " + "% argument, not %qE", + argno + 1, (int)(fmt - fmt_start + 1), + fmt_start, arg); + } + }; + + auto require_ptr = [&](int argno) { + auto arg = gimple_call_arg(stmt, argno); + if (!arg || + arg->exp.typed.type->base.code != POINTER_TYPE) { + volatile location_t loc = gimple_location(stmt); + warning_at( + loc, OPT_Wformat_, + "argument %d: Format %q.*s requires a pointer " + "argument, not %qE", + argno + 1, (int)(fmt - fmt_start + 1), + fmt_start, arg); + } + }; + + auto require_sized_int = [&](int argno, tree type1, + tree type2) { + auto arg = gimple_call_arg(stmt, argno); + location_t loc = gimple_location(stmt); + + auto tp = arg ? arg->exp.typed.type : 0; + if (tp && (tp->base.code != INTEGER_TYPE || + tp->type_common.precision != + type1->type_common.precision)) { + warning_at( + loc, OPT_Wformat_, + "argument %d: Format %q.*s requires a %qT or " + "%qT (%d bits), not %qT [size %d]", + argno + 1, (int)(fmt - fmt_start + 1), + fmt_start, type1, type2, + type2->type_common.precision, tp, + tp->type_common.precision); + } + }; + + auto require_int = [&require_sized_int](int argno) { + require_sized_int(argno, integer_type_node, + unsigned_type_node); + }; + auto require_long = [&require_sized_int](int argno) { + require_sized_int(argno, long_integer_type_node, + long_unsigned_type_node); + }; + auto require_long_long = [&require_sized_int](int argno) { + require_sized_int(argno, long_long_integer_type_node, + long_long_unsigned_type_node); + }; + auto require_qstr = [&require_sized_int](int argno) { + require_sized_int(argno, size_type_node, + signed_size_type_node); + }; + + for (;;) { + while (*fmt != '\0' && *fmt != '%') { + ++fmt; + } + + if (*fmt == '\0') { + break; + } + + fmt_start = fmt; + + // move past % character + ++fmt; + + if (*fmt == '%') { + ++fmt; + continue; + } + + // parse flags, if they exist + while (*fmt != '\0') { + if (*fmt == '-') { + } else if (*fmt == '+') { + } else if (*fmt == ' ') { + } else if (*fmt == '0') { + } else { + break; + } + ++fmt; + } + + // parse width, if it exists + int width = 0; + for (; '0' <= *fmt && *fmt <= '9'; ++fmt) { + width = width * 10 + *fmt - '0'; + } + + // parse precision, if it exists + int prec = -1; + if (*fmt == '.') { + ++fmt; + if (*fmt == '*') { + ++fmt; + require_int(argno++); + } else { + prec = 0; + for (; '0' <= *fmt && *fmt <= '9'; ++fmt) { + prec = prec * 10 + *fmt - '0'; + } + } + if (prec < 0) { + prec = 0; + } + } + + // parse long specifiers + + bool long_arg = false; + if (*fmt == 'l') { + ++fmt; + long_arg = true; + } + switch (*fmt) { + case 'b': + case 'B': + case 'i': + case 'u': + case 'd': + case 'x': + case 'X': + case 'o': + case 'O': + if (long_arg) + require_long(argno++); + else + require_int(argno++); + break; + + case 'c': + require_int(argno++); + break; + + case 'q': + require_qstr(argno++); + break; + + case 's': + case 'p': + case 'P': + require_ptr(argno++); + break; + + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + require_double(argno++); + break; + + case 'l': + ++fmt; + if (*fmt != 'u' && *fmt != 'd' && *fmt != 'x' && *fmt != 'X') { + location_t loc = gimple_location(stmt); + warning_at(loc, OPT_Wformat_, + "Bad format specification with ll%c", + *fmt); + } + require_long_long(argno++); + break; + default: { + location_t loc = gimple_location(stmt); + warning_at(loc, OPT_Wformat_, + "Bad format specification with %c", + *fmt); + } break; + } + } + if (argno != nargs) { + location_t loc = gimple_location(stmt); + warning_at(loc, OPT_Wformat_, + "Wrong # arguments. Supplied %d, used %d", + nargs, argno); + } + } break; + case GIMPLE_ASSIGN: + case GIMPLE_ASM: + case GIMPLE_COND: + case GIMPLE_GOTO: + case GIMPLE_LABEL: + case GIMPLE_NOP: + case GIMPLE_OMP_ATOMIC_LOAD: + case GIMPLE_OMP_ATOMIC_STORE: + case GIMPLE_OMP_CONTINUE: + case GIMPLE_OMP_CRITICAL: + case GIMPLE_OMP_FOR: + case GIMPLE_OMP_MASTER: + case GIMPLE_OMP_ORDERED: + case GIMPLE_OMP_PARALLEL: + case GIMPLE_OMP_RETURN: + case GIMPLE_OMP_SECTION: + case GIMPLE_OMP_SECTIONS: + case GIMPLE_OMP_SECTIONS_SWITCH: + case GIMPLE_OMP_SINGLE: + case GIMPLE_PHI: + case GIMPLE_RESX: + case GIMPLE_RETURN: + case GIMPLE_SWITCH: + // TODO: complete the remaining trees + break; + default: + // TODO: even more trees in newer gcc versions + break; + gcc_unreachable(); + } + } + } + } + + void micropython_checks_lhs(const std::set &unused_lhs, + function *fun) { + basic_block bb; + FOR_ALL_BB_FN(bb, fun) { + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) { + gimple *stmt = gsi_stmt(gsi); + + switch (gimple_code(stmt)) { + case GIMPLE_CALL: { + tree lhs = gimple_call_lhs(stmt); + if (unused_lhs.find(lhs) != unused_lhs.end()) { + // Deliberately similar to the code in tree-cfg.c + tree fdecl = gimple_call_fndecl(stmt); + tree ftype = gimple_call_fntype(stmt); + + if (lookup_attribute("micropython_checks", + TYPE_ATTRIBUTES(ftype))) { + location_t loc = gimple_location(stmt); + + if (fdecl) + warning_at(loc, OPT_Wunused_result, + "ignoring return value of %qD, " + "declared with attribute warn " + "unused result", + fdecl); + else + warning_at(loc, OPT_Wunused_result, + "ignoring return value of function " + "declared with attribute warn " + "unused result"); + } + } + break; + } + default: + // Do nothing + break; + } + } + } + } +}; +} // namespace + +int plugin_init(struct plugin_name_args *plugin_info, + struct plugin_gcc_version *version) { + // We check the current gcc loading this plugin against the gcc we used to + // created this plugin + if (!plugin_default_version_check(version, &gcc_version)) { + std::cerr << "This GCC plugin is for version " + << GCCPLUGIN_VERSION_MAJOR << "." << GCCPLUGIN_VERSION_MINOR + << "\n"; + return 1; + } + + register_callback(plugin_info->base_name, + /* event */ PLUGIN_INFO, + /* callback */ NULL, /* user_data */ &my_gcc_plugin_info); + + // Register the phase right after cfg + struct register_pass_info pass_info; + + pass_info.pass = new micropython_checks(g); + pass_info.reference_pass_name = "cfg"; + pass_info.ref_pass_instance_number = 1; + pass_info.pos_op = PASS_POS_INSERT_AFTER; + + register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, NULL, + &pass_info); + + return 0; +}