Enable gprof onboard profiling (#2669)
Adds a menu item to enable onboard profiling. This requires significant RAM and really only makes sense on devices with PSRAM to store the state. When the menu item is selected, allocates RAM and tracks function calls and periodically samples the PC to generate a histogram of application usage. The onboard gmon.out file can be written over Semihosting or some other way to transfer to a PC for analysis. Adds a profiling example with command lines.
This commit is contained in:
parent
48bc91af36
commit
0061d3f97f
17 changed files with 1259 additions and 28 deletions
2
.github/workflows/pull-request.yml
vendored
2
.github/workflows/pull-request.yml
vendored
|
|
@ -20,7 +20,7 @@ jobs:
|
||||||
uses: codespell-project/actions-codespell@v2
|
uses: codespell-project/actions-codespell@v2
|
||||||
with:
|
with:
|
||||||
skip: ./ArduinoCore-API,./libraries/ESP8266SdFat,./libraries/Adafruit_TinyUSB_Arduino,./libraries/LittleFS/lib,./tools/pyserial,./pico-sdk,./.github,./docs/i2s.rst,./cores/rp2040/api,./libraries/FreeRTOS,./tools/libbearssl/bearssl,./include,./libraries/WiFi/examples/BearSSL_Server,./ota/uzlib,./libraries/http-parser/lib,./libraries/WebServer/examples/HelloServerBearSSL/HelloServerBearSSL.ino,./libraries/HTTPUpdateServer/examples/SecureBearSSLUpdater/SecureBearSSLUpdater.ino,./.git,./libraries/FatFS/lib/fatfs,./libraries/FatFS/src/diskio.h,./libraries/FatFS/src/ff.cpp,./libraries/FatFS/src/ffconf.h,./libraries/FatFS/src/ffsystem.cpp,./libraries/FatFS/src/ff.h,./libraries/lwIP_WINC1500/src/driver,./libraries/lwIP_WINC1500/src/common,./libraries/lwIP_WINC1500/src/bus_wrapper,./libraries/lwIP_WINC1500/src/spi_flash
|
skip: ./ArduinoCore-API,./libraries/ESP8266SdFat,./libraries/Adafruit_TinyUSB_Arduino,./libraries/LittleFS/lib,./tools/pyserial,./pico-sdk,./.github,./docs/i2s.rst,./cores/rp2040/api,./libraries/FreeRTOS,./tools/libbearssl/bearssl,./include,./libraries/WiFi/examples/BearSSL_Server,./ota/uzlib,./libraries/http-parser/lib,./libraries/WebServer/examples/HelloServerBearSSL/HelloServerBearSSL.ino,./libraries/HTTPUpdateServer/examples/SecureBearSSLUpdater/SecureBearSSLUpdater.ino,./.git,./libraries/FatFS/lib/fatfs,./libraries/FatFS/src/diskio.h,./libraries/FatFS/src/ff.cpp,./libraries/FatFS/src/ffconf.h,./libraries/FatFS/src/ffsystem.cpp,./libraries/FatFS/src/ff.h,./libraries/lwIP_WINC1500/src/driver,./libraries/lwIP_WINC1500/src/common,./libraries/lwIP_WINC1500/src/bus_wrapper,./libraries/lwIP_WINC1500/src/spi_flash
|
||||||
ignore_words_list: ser,dout,shiftIn,acount
|
ignore_words_list: ser,dout,shiftIn,acount,froms
|
||||||
- name: Get submodules for following tests
|
- name: Get submodules for following tests
|
||||||
run: git submodule update --init
|
run: git submodule update --init
|
||||||
- name: Check package references
|
- name: Check package references
|
||||||
|
|
|
||||||
|
|
@ -137,6 +137,8 @@ Read the [Contributing Guide](https://github.com/earlephilhower/arduino-pico/blo
|
||||||
* printf (i.e. debug) output over USB serial
|
* printf (i.e. debug) output over USB serial
|
||||||
* Transparent use of PSRAM globals and heap (RP2350 only)
|
* Transparent use of PSRAM globals and heap (RP2350 only)
|
||||||
* ARM or RISC-V (Hazard3) support for the RP2350
|
* ARM or RISC-V (Hazard3) support for the RP2350
|
||||||
|
* Semihosted serial and file system access
|
||||||
|
* GPROF profiling support
|
||||||
|
|
||||||
The RP2040 PIO state machines (SMs) are used to generate jitter-free:
|
The RP2040 PIO state machines (SMs) are used to generate jitter-free:
|
||||||
* Servos
|
* Servos
|
||||||
|
|
|
||||||
457
boards.txt
457
boards.txt
File diff suppressed because it is too large
Load diff
|
|
@ -18,9 +18,11 @@
|
||||||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <Arduino.h>
|
||||||
|
#include <pico/runtime.h>
|
||||||
|
|
||||||
#ifdef PICO_RP2040
|
#ifdef PICO_RP2040
|
||||||
|
|
||||||
#include <Arduino.h>
|
|
||||||
#include <hardware/structs/psm.h>
|
#include <hardware/structs/psm.h>
|
||||||
|
|
||||||
extern "C" void boot_double_tap_check();
|
extern "C" void boot_double_tap_check();
|
||||||
|
|
@ -35,3 +37,17 @@ void RP2040::enableDoubleResetBootloader() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __PROFILE
|
||||||
|
Stream *__profileFile;
|
||||||
|
int __writeProfileCB(const void *data, int len) {
|
||||||
|
return __profileFile->write((const char *)data, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __PROFILE
|
||||||
|
extern "C" void runtime_init_setup_profiling();
|
||||||
|
#define PICO_RUNTIME_INIT_PROFILING "11011" // Towards the end, after PSRAM
|
||||||
|
PICO_RUNTIME_INIT_FUNC_RUNTIME(runtime_init_setup_profiling, PICO_RUNTIME_INIT_PROFILING);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,8 @@
|
||||||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
#include <hardware/clocks.h>
|
#include <hardware/clocks.h>
|
||||||
#include <hardware/irq.h>
|
#include <hardware/irq.h>
|
||||||
#include <hardware/pio.h>
|
#include <hardware/pio.h>
|
||||||
|
|
@ -45,6 +47,13 @@
|
||||||
|
|
||||||
extern "C" volatile bool __otherCoreIdled;
|
extern "C" volatile bool __otherCoreIdled;
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
#ifdef __PROFILE
|
||||||
|
typedef int (*profileWriteCB)(const void *data, int len);
|
||||||
|
extern void _writeProfile(profileWriteCB writeCB);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
class _MFIFO {
|
class _MFIFO {
|
||||||
public:
|
public:
|
||||||
_MFIFO() { /* noop */ };
|
_MFIFO() { /* noop */ };
|
||||||
|
|
@ -180,7 +189,7 @@ public:
|
||||||
|
|
||||||
void begin() {
|
void begin() {
|
||||||
_epoch = 0;
|
_epoch = 0;
|
||||||
#if !defined(__riscv)
|
#if !defined(__riscv) && !defined(__PROFILE)
|
||||||
if (!__isFreeRTOS) {
|
if (!__isFreeRTOS) {
|
||||||
// Enable SYSTICK exception
|
// Enable SYSTICK exception
|
||||||
exception_set_exclusive_handler(SYSTICK_EXCEPTION, _SystickHandler);
|
exception_set_exclusive_handler(SYSTICK_EXCEPTION, _SystickHandler);
|
||||||
|
|
@ -193,7 +202,7 @@ public:
|
||||||
_ccountPgm->prepare(&_pio, &_sm, &off);
|
_ccountPgm->prepare(&_pio, &_sm, &off);
|
||||||
ccount_program_init(_pio, _sm, off);
|
ccount_program_init(_pio, _sm, off);
|
||||||
pio_sm_set_enabled(_pio, _sm, true);
|
pio_sm_set_enabled(_pio, _sm, true);
|
||||||
#if !defined(__riscv)
|
#if !defined(__riscv) && !defined(__PROFILE)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -217,7 +226,7 @@ public:
|
||||||
// Get CPU cycle count. Needs to do magic to extens 24b HW to something longer
|
// Get CPU cycle count. Needs to do magic to extens 24b HW to something longer
|
||||||
volatile uint64_t _epoch = 0;
|
volatile uint64_t _epoch = 0;
|
||||||
inline uint32_t getCycleCount() {
|
inline uint32_t getCycleCount() {
|
||||||
#if !defined(__riscv)
|
#if !defined(__riscv) && !defined(__PROFILE)
|
||||||
if (!__isFreeRTOS) {
|
if (!__isFreeRTOS) {
|
||||||
uint32_t epoch;
|
uint32_t epoch;
|
||||||
uint32_t ctr;
|
uint32_t ctr;
|
||||||
|
|
@ -229,13 +238,13 @@ public:
|
||||||
} else {
|
} else {
|
||||||
#endif
|
#endif
|
||||||
return ccount_read(_pio, _sm);
|
return ccount_read(_pio, _sm);
|
||||||
#if !defined(__riscv)
|
#if !defined(__riscv) && !defined(__PROFILE)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
inline uint64_t getCycleCount64() {
|
inline uint64_t getCycleCount64() {
|
||||||
#if !defined(__riscv)
|
#if !defined(__riscv) && !defined(__PROFILE)
|
||||||
if (!__isFreeRTOS) {
|
if (!__isFreeRTOS) {
|
||||||
uint64_t epoch;
|
uint64_t epoch;
|
||||||
uint64_t ctr;
|
uint64_t ctr;
|
||||||
|
|
@ -247,7 +256,7 @@ public:
|
||||||
} else {
|
} else {
|
||||||
#endif
|
#endif
|
||||||
return ccount_read(_pio, _sm);
|
return ccount_read(_pio, _sm);
|
||||||
#if !defined(__riscv)
|
#if !defined(__riscv) && !defined(__PROFILE)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -473,6 +482,21 @@ public:
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __PROFILE
|
||||||
|
void writeProfiling(Stream *f) {
|
||||||
|
extern Stream *__profileFile;
|
||||||
|
extern int __writeProfileCB(const void *data, int len);
|
||||||
|
__profileFile = f;
|
||||||
|
_writeProfile(__writeProfileCB);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t getProfileMemoryUsage() {
|
||||||
|
extern int __profileMemSize;
|
||||||
|
return (size_t) __profileMemSize;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static void _SystickHandler() {
|
static void _SystickHandler() {
|
||||||
|
|
|
||||||
|
|
@ -30,32 +30,36 @@ extern bool __isFreeRTOS;
|
||||||
// FreeRTOS has been set up
|
// FreeRTOS has been set up
|
||||||
extern volatile bool __freeRTOSinitted;
|
extern volatile bool __freeRTOSinitted;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
struct QueueDefinition; /* Using old naming convention so as not to break kernel aware debuggers. */
|
#endif // __cplusplus
|
||||||
typedef struct QueueDefinition * QueueHandle_t;
|
struct QueueDefinition; /* Using old naming convention so as not to break kernel aware debuggers. */
|
||||||
typedef QueueHandle_t SemaphoreHandle_t;
|
typedef struct QueueDefinition * QueueHandle_t;
|
||||||
typedef int32_t BaseType_t;
|
typedef QueueHandle_t SemaphoreHandle_t;
|
||||||
|
typedef int32_t BaseType_t;
|
||||||
|
|
||||||
extern bool __freertos_check_if_in_isr() __attribute__((weak));
|
extern bool __freertos_check_if_in_isr() __attribute__((weak));
|
||||||
|
|
||||||
extern SemaphoreHandle_t __freertos_mutex_create() __attribute__((weak));
|
extern SemaphoreHandle_t __freertos_mutex_create() __attribute__((weak));
|
||||||
extern SemaphoreHandle_t _freertos_recursive_mutex_create() __attribute__((weak));
|
extern SemaphoreHandle_t _freertos_recursive_mutex_create() __attribute__((weak));
|
||||||
|
|
||||||
extern void __freertos_mutex_take(SemaphoreHandle_t mtx) __attribute__((weak));
|
extern void __freertos_mutex_take(SemaphoreHandle_t mtx) __attribute__((weak));
|
||||||
|
|
||||||
extern int __freertos_mutex_take_from_isr(SemaphoreHandle_t mtx, BaseType_t* pxHigherPriorityTaskWoken) __attribute__((weak));
|
extern int __freertos_mutex_take_from_isr(SemaphoreHandle_t mtx, BaseType_t* pxHigherPriorityTaskWoken) __attribute__((weak));
|
||||||
extern int __freertos_mutex_try_take(SemaphoreHandle_t mtx) __attribute__((weak));
|
extern int __freertos_mutex_try_take(SemaphoreHandle_t mtx) __attribute__((weak));
|
||||||
extern void __freertos_mutex_give(SemaphoreHandle_t mtx) __attribute__((weak));
|
extern void __freertos_mutex_give(SemaphoreHandle_t mtx) __attribute__((weak));
|
||||||
extern void __freertos_mutex_give_from_isr(SemaphoreHandle_t mtx, BaseType_t* pxHigherPriorityTaskWoken) __attribute__((weak));
|
extern void __freertos_mutex_give_from_isr(SemaphoreHandle_t mtx, BaseType_t* pxHigherPriorityTaskWoken) __attribute__((weak));
|
||||||
|
|
||||||
extern void __freertos_recursive_mutex_take(SemaphoreHandle_t mtx) __attribute__((weak));
|
extern void __freertos_recursive_mutex_take(SemaphoreHandle_t mtx) __attribute__((weak));
|
||||||
extern int __freertos_recursive_mutex_try_take(SemaphoreHandle_t mtx) __attribute__((weak));
|
extern int __freertos_recursive_mutex_try_take(SemaphoreHandle_t mtx) __attribute__((weak));
|
||||||
extern void __freertos_recursive_mutex_give(SemaphoreHandle_t mtx) __attribute__((weak));
|
extern void __freertos_recursive_mutex_give(SemaphoreHandle_t mtx) __attribute__((weak));
|
||||||
|
|
||||||
extern void __freertos_idle_other_core() __attribute__((weak));
|
extern void __freertos_idle_other_core() __attribute__((weak));
|
||||||
extern void __freertos_resume_other_core() __attribute__((weak));
|
extern void __freertos_resume_other_core() __attribute__((weak));
|
||||||
|
|
||||||
extern void __freertos_task_exit_critical() __attribute__((weak));
|
extern void __freertos_task_exit_critical() __attribute__((weak));
|
||||||
extern void __freertos_task_enter_critical() __attribute__((weak));
|
extern void __freertos_task_enter_critical() __attribute__((weak));
|
||||||
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
extern SemaphoreHandle_t __get_freertos_mutex_for_ptr(mutex_t *m, bool recursive = false);
|
extern SemaphoreHandle_t __get_freertos_mutex_for_ptr(mutex_t *m, bool recursive = false);
|
||||||
|
#endif // __cplusplus
|
||||||
|
|
|
||||||
470
cores/rp2040/gprof_gmon.c
Normal file
470
cores/rp2040/gprof_gmon.c
Normal file
|
|
@ -0,0 +1,470 @@
|
||||||
|
/* -
|
||||||
|
Copyright (c) 1983, 1992, 1993
|
||||||
|
The Regents of the University of California. All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
4. Neither the name of the University nor the names of its contributors
|
||||||
|
may be used to endorse or promote products derived from this software
|
||||||
|
without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// This code is built as a C file because otherwise G++ would add profiling
|
||||||
|
// code to the preamble of these functions as well, leading to an infinite
|
||||||
|
// loop in the mcount routine. Because the Arduino IDE can't (easily)
|
||||||
|
// apply different compile parameters to different files, we set all C++
|
||||||
|
// files to "-pg" but leave all C files uninstrumented.
|
||||||
|
|
||||||
|
// Original code and organization taken from https://mcuoneclipse.com/2015/08/23/tutorial-using-gnu-profiling-gprof-with-arm-cortex-m/
|
||||||
|
|
||||||
|
#include <Arduino.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
// Frequency of sampling PC
|
||||||
|
#ifndef GMON_HZ
|
||||||
|
#define GMON_HZ 10000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Fraction of text space to allocate for histogram counters here, 1/2
|
||||||
|
#ifndef HISTFRACTION
|
||||||
|
#ifdef PICO_RP2350
|
||||||
|
#define HISTFRACTION 4 // Every 8 bytes of .text
|
||||||
|
#else
|
||||||
|
#define HISTFRACTION 8 // Every 16 bytes of .text
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Fraction of text space to allocate for from hash buckets.
|
||||||
|
// The value of HASHFRACTION is based on the minimum number of bytes
|
||||||
|
// of separation between two subroutine call points in the object code.
|
||||||
|
// Given MIN_SUBR_SEPARATION bytes of separation the value of
|
||||||
|
// HASHFRACTION is calculated as:
|
||||||
|
//
|
||||||
|
// HASHFRACTION = MIN_SUBR_SEPARATION / (2 * sizeof(short) - 1);
|
||||||
|
//
|
||||||
|
// For example, on the VAX, the shortest two call sequence is:
|
||||||
|
//
|
||||||
|
// calls $0,(r0)
|
||||||
|
// calls $0,(r0)
|
||||||
|
//
|
||||||
|
// which is separated by only three bytes, thus HASHFRACTION is
|
||||||
|
// calculated as:
|
||||||
|
//
|
||||||
|
// HASHFRACTION = 3 / (2 * 2 - 1) = 1
|
||||||
|
//
|
||||||
|
// Note that the division above rounds down, thus if MIN_SUBR_FRACTION
|
||||||
|
// is less than three, this algorithm will not work!
|
||||||
|
//
|
||||||
|
// In practice, however, call instructions are rarely at a minimal
|
||||||
|
// distance. Hence, we will define HASHFRACTION to be 2 across all
|
||||||
|
// architectures. This saves a reasonable amount of space for
|
||||||
|
// profiling data structures without (in practice) sacrificing
|
||||||
|
// any granularity.
|
||||||
|
#ifndef HASHFRACTION
|
||||||
|
#define HASHFRACTION 2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Percent of text space to allocate for tostructs with a minimum.
|
||||||
|
#ifndef ARCDENSITY
|
||||||
|
#define ARCDENSITY 2 // This is in percentage, relative to text size!
|
||||||
|
#endif
|
||||||
|
#define MINARCS 50
|
||||||
|
#define MAXARCS ((1 << (8 * sizeof(HISTCOUNTER))) - 2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Histogram counters are unsigned shorts (according to the kernel)
|
||||||
|
typedef uint16_t HISTCOUNTER; //#define HISTCOUNTER unsigned short
|
||||||
|
|
||||||
|
// In the original profiler code selfpc and count are full 32 bits each
|
||||||
|
// so the structure actually comes to 12 bytes due to padding (with 2
|
||||||
|
// bytes wasted per entry). We don't have that much to spare on the Picos,
|
||||||
|
// so limit the recorded address to 16MB (which is the flash address
|
||||||
|
// window, anyway) and the counts to 16M (saturating). This saves 4 bytes
|
||||||
|
// (33%) per entry at the cost of some logic to expand/pack it.
|
||||||
|
struct tostruct {
|
||||||
|
uint8_t selfpc[3]; // Callee address/program counter. The caller address is in froms[] array which points to tos[] array
|
||||||
|
uint8_t count[3]; // How many times it has been called
|
||||||
|
uint16_t link; // Link to next entry in hash table. For tos[0] this points to the last used entry
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum { PROFILE_NOT_INIT = 0, PROFILE_ON, PROFILE_OFF } PROFILE_State;
|
||||||
|
struct profinfo {
|
||||||
|
PROFILE_State state; // Profiling state
|
||||||
|
uint16_t *counter; // Profiling counters
|
||||||
|
size_t lowpc, highpc; // Range to be profiled
|
||||||
|
uint32_t scale; // Scale value of bins
|
||||||
|
};
|
||||||
|
// Global profinfo for profil() call
|
||||||
|
static struct profinfo prof = { PROFILE_NOT_INIT, 0, 0, 0, 0 };
|
||||||
|
|
||||||
|
|
||||||
|
// Possible states of profiling
|
||||||
|
typedef enum { GMON_PROF_ON = 0, GMON_PROF_BUSY, GMON_PROF_ERROR, GMON_PROF_OFF } GMON_State;
|
||||||
|
|
||||||
|
// The profiling data structures are housed in this structure.
|
||||||
|
struct gmonparam {
|
||||||
|
int state;
|
||||||
|
uint16_t *kcount; // Histogram PC sample array
|
||||||
|
size_t kcountsize; // Size of kcount[] array in bytes
|
||||||
|
uint16_t *froms; // Array of hashed 'from' addresses. The 16bit value is an index into the tos[] array
|
||||||
|
size_t fromssize; // Size of froms[] array in bytes
|
||||||
|
struct tostruct *tos; // to struct, contains histogram counter
|
||||||
|
size_t tossize; // Size of tos[] array in bytes
|
||||||
|
long tolimit;
|
||||||
|
size_t lowpc; // Low program counter of area
|
||||||
|
size_t highpc; // High program counter
|
||||||
|
size_t textsize; // Code size
|
||||||
|
};
|
||||||
|
static struct gmonparam _gmonparam = { GMON_PROF_OFF, NULL, 0, NULL, 0, NULL, 0, 0L, 0, 0, 0};
|
||||||
|
|
||||||
|
|
||||||
|
static bool already_setup = false; // Flag to indicate if we need to init
|
||||||
|
static bool _perf_in_setup = false; // Are we currently trying to initialize? (avoid infinite recursion)
|
||||||
|
int __profileMemSize = 0; // Memory allocated by the profiler to store tables
|
||||||
|
|
||||||
|
static int s_scale = 0;
|
||||||
|
#define SCALE_1_TO_1 0x10000L
|
||||||
|
|
||||||
|
|
||||||
|
// Convert an addr to an index
|
||||||
|
static inline __attribute__((always_inline)) size_t profidx(size_t pc, size_t base, size_t scale) {
|
||||||
|
size_t i = (pc - base) / 2;
|
||||||
|
return (unsigned long long int) i * scale / 65536;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sample the current program counter periodically
|
||||||
|
#if defined(__riscv)
|
||||||
|
// TODO - systick-like handler
|
||||||
|
#else
|
||||||
|
static void __no_inline_not_in_flash_func(_SystickHandler)(void) {
|
||||||
|
static size_t pc, idx; // Ensure in heap, not on stack
|
||||||
|
extern volatile bool __otherCoreIdled;
|
||||||
|
|
||||||
|
if (!__otherCoreIdled && (prof.state == PROFILE_ON)) {
|
||||||
|
pc = ((uint32_t*)(__builtin_frame_address(0)))[14]; // Get SP and use it to get the return address from stack
|
||||||
|
if ((pc >= prof.lowpc) && (pc < prof.highpc)) {
|
||||||
|
idx = profidx(pc, prof.lowpc, prof.scale);
|
||||||
|
prof.counter[idx]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Convert an index into an address
|
||||||
|
static inline __attribute__((always_inline)) size_t profaddr(size_t idx, size_t base, size_t scale) {
|
||||||
|
return base + ((((unsigned long long)(idx) << 16) / (unsigned long long)(scale)) << 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start or stop profiling
|
||||||
|
// Profiling goes into the SAMPLES buffer of size SIZE (which is treated as an array of uint16_ts of size size/2).
|
||||||
|
// Each bin represents a range of pc addresses from OFFSET. The number of pc addresses in a bin depends on SCALE.
|
||||||
|
// (A scale of 65536 maps each bin to two addresses, A scale of 32768 maps each bin to 4 addresses, a scale of
|
||||||
|
// 1 maps each bin to 128k address). Scale may be 1 - 65536, or zero to turn off profiling
|
||||||
|
static int __no_inline_not_in_flash_func(profile_ctl)(char *samples, size_t size, size_t offset, uint32_t scale) {
|
||||||
|
size_t maxbin;
|
||||||
|
|
||||||
|
if (scale > 65536) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
prof.state = PROFILE_OFF;
|
||||||
|
if (scale) {
|
||||||
|
bzero(samples, size);
|
||||||
|
bzero(&prof, sizeof(prof));
|
||||||
|
maxbin = size >> 1;
|
||||||
|
prof.counter = (uint16_t*)samples;
|
||||||
|
prof.lowpc = offset;
|
||||||
|
prof.highpc = profaddr(maxbin, offset, scale);
|
||||||
|
prof.scale = scale;
|
||||||
|
prof.state = PROFILE_ON;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Control profiling. Profiling is what mcount checks to see if all the data structures are ready.
|
||||||
|
static void __no_inline_not_in_flash_func(moncontrol)(int mode) {
|
||||||
|
if (mode) { // Start
|
||||||
|
profile_ctl((char *)_gmonparam.kcount, _gmonparam.kcountsize, _gmonparam.lowpc, s_scale);
|
||||||
|
_gmonparam.state = GMON_PROF_ON;
|
||||||
|
} else { // Stop
|
||||||
|
profile_ctl((char *)NULL, 0, 0, 0);
|
||||||
|
_gmonparam.state = GMON_PROF_OFF;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// General rounding functions
|
||||||
|
static inline __attribute__((always_inline)) size_t rounddown(size_t x, size_t y) {
|
||||||
|
return (x / y) * y;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline)) size_t roundup(size_t x, size_t y) {
|
||||||
|
return ((x + y - 1) / y) * y;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate memory and set boundaries before any sampling is performed
|
||||||
|
void __no_inline_not_in_flash_func(monstartup)(size_t lowpc, size_t highpc) {
|
||||||
|
register size_t o;
|
||||||
|
char *cp;
|
||||||
|
struct gmonparam *p = &_gmonparam;
|
||||||
|
|
||||||
|
// Round lowpc and highpc to multiples of the density we're using so the rest of the scaling (here and in gprof) stays in ints.
|
||||||
|
p->lowpc = rounddown(lowpc, HISTFRACTION * sizeof(HISTCOUNTER));
|
||||||
|
p->highpc = roundup(highpc, HISTFRACTION * sizeof(HISTCOUNTER));
|
||||||
|
p->textsize = p->highpc - p->lowpc;
|
||||||
|
p->kcountsize = p->textsize / HISTFRACTION;
|
||||||
|
p->fromssize = p->textsize / HASHFRACTION;
|
||||||
|
p->tolimit = p->textsize * ARCDENSITY / 100;
|
||||||
|
if (p->tolimit < MINARCS) {
|
||||||
|
p->tolimit = MINARCS;
|
||||||
|
} else if (p->tolimit > MAXARCS) {
|
||||||
|
p->tolimit = MAXARCS;
|
||||||
|
}
|
||||||
|
p->tossize = p->tolimit * sizeof(struct tostruct);
|
||||||
|
__profileMemSize = p->kcountsize + p->fromssize + p->tossize;
|
||||||
|
#ifdef RP2350_PSRAM_CS
|
||||||
|
cp = pmalloc(__profileMemSize);
|
||||||
|
#else
|
||||||
|
cp = malloc(__profileMemSize);
|
||||||
|
#endif
|
||||||
|
if (cp == NULL) {
|
||||||
|
// OOM
|
||||||
|
already_setup = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Zero out cp as value will be added there
|
||||||
|
bzero(cp, p->kcountsize + p->fromssize + p->tossize);
|
||||||
|
|
||||||
|
p->tos = (struct tostruct *)cp;
|
||||||
|
cp += p->tossize;
|
||||||
|
p->kcount = (uint16_t *)cp;
|
||||||
|
cp += p->kcountsize;
|
||||||
|
p->froms = (uint16_t *)cp;
|
||||||
|
|
||||||
|
p->tos[0].link = 0;
|
||||||
|
|
||||||
|
o = p->highpc - p->lowpc;
|
||||||
|
if (p->kcountsize < o) {
|
||||||
|
s_scale = ((float)p->kcountsize / o) * SCALE_1_TO_1;
|
||||||
|
} else {
|
||||||
|
s_scale = SCALE_1_TO_1;
|
||||||
|
}
|
||||||
|
moncontrol(1); // Start
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accessors for the selfpc and count fields
|
||||||
|
static inline __attribute__((always_inline)) void setselfpc(struct tostruct *x, size_t d) {
|
||||||
|
x->selfpc[0] = d & 0xff;
|
||||||
|
x->selfpc[1] = (d >> 8) & 0xff;
|
||||||
|
x->selfpc[2] = (d >> 16) & 0xff;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))void setcount(struct tostruct *x, size_t d) {
|
||||||
|
x->count[0] = d & 0xff;
|
||||||
|
x->count[1] = (d >> 8) & 0xff;
|
||||||
|
x->count[2] = (d >> 16) & 0xff;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline)) uint32_t getselfpc(const struct tostruct *x) {
|
||||||
|
return 0x10000000 | ((uint32_t)x->selfpc[0]) | (((uint32_t)x->selfpc[1]) << 8) | (((uint32_t)x->selfpc[2]) << 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline)) uint32_t getcount(const struct tostruct *x) {
|
||||||
|
return ((uint32_t)x->count[0]) | (((uint32_t)x->count[1]) << 8) | (((uint32_t)x->count[2]) << 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Called by the GCC function shim (gprof_shim.S) on function entry to record an arc hit
|
||||||
|
void __no_inline_not_in_flash_func(_mcount_internal)(uint32_t *frompcindex, uint32_t *selfpc) {
|
||||||
|
register struct tostruct *top;
|
||||||
|
register struct tostruct *prevtop;
|
||||||
|
register long toindex;
|
||||||
|
struct gmonparam *p = &_gmonparam;
|
||||||
|
|
||||||
|
if (_perf_in_setup) {
|
||||||
|
// Avoid infinite recursion
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!already_setup) {
|
||||||
|
extern char __flash_binary_start; // Start of flash
|
||||||
|
extern char __etext; // End of .text
|
||||||
|
already_setup = true;
|
||||||
|
_perf_in_setup = true;
|
||||||
|
monstartup((uint32_t)&__flash_binary_start, (uint32_t)&__etext);
|
||||||
|
_perf_in_setup = false;
|
||||||
|
}
|
||||||
|
// Check that we are profiling and that we aren't recursively invoked.
|
||||||
|
if (p->state != GMON_PROF_ON) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
p->state++;
|
||||||
|
// Check that frompcindex is a reasonable pc value.
|
||||||
|
frompcindex = (uint32_t*)((long)frompcindex - (long)p->lowpc);
|
||||||
|
if ((unsigned long)frompcindex > p->textsize) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
frompcindex = (uint32_t*)&p->froms[((long)frompcindex) / (HASHFRACTION * sizeof(*p->froms))];
|
||||||
|
toindex = *((uint16_t*)frompcindex); // Get froms[] value
|
||||||
|
if (toindex == 0) {
|
||||||
|
// First time traversing this arc
|
||||||
|
toindex = ++p->tos[0].link; // The link of tos[0] points to the last used record in the array
|
||||||
|
if (toindex >= p->tolimit) { // More tos[] entries than we can handle!
|
||||||
|
goto overflow;
|
||||||
|
}
|
||||||
|
*((uint16_t*)frompcindex) = (uint16_t)toindex; // Store new 'to' value into froms[]
|
||||||
|
top = &p->tos[toindex];
|
||||||
|
setselfpc(top, (uint32_t)selfpc);
|
||||||
|
setcount(top, 1);
|
||||||
|
top->link = 0;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
top = &p->tos[toindex];
|
||||||
|
if (getselfpc(top) == (size_t)selfpc) {
|
||||||
|
// Arc at front of chain; usual case.
|
||||||
|
uint32_t cnt = getcount(top) + 1;
|
||||||
|
if (cnt >= 1 << 24) {
|
||||||
|
cnt = (1 << 24) - 1;
|
||||||
|
}
|
||||||
|
setcount(top, cnt);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
// Have to go looking down chain for it. top points to what we are looking at, prevtop points to previous top. We know it is not at the head of the chain.
|
||||||
|
for (; /* goto done */;) {
|
||||||
|
if (top->link == 0) {
|
||||||
|
// top is end of the chain and none of the chain had top->selfpc == selfpc, so we allocate a new tostruct and link it to the head of the chain.
|
||||||
|
toindex = ++p->tos[0].link;
|
||||||
|
if (toindex >= p->tolimit) {
|
||||||
|
goto overflow;
|
||||||
|
}
|
||||||
|
top = &p->tos[toindex];
|
||||||
|
setselfpc(top, (uint32_t)selfpc);
|
||||||
|
setcount(top, 1);
|
||||||
|
top->link = *((uint16_t*)frompcindex);
|
||||||
|
*(uint16_t*)frompcindex = (uint16_t)toindex;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
// Otherwise, check the next arc on the chain.
|
||||||
|
prevtop = top;
|
||||||
|
top = &p->tos[top->link];
|
||||||
|
if (getselfpc(top) == (size_t)selfpc) {
|
||||||
|
// Increment its count, move it to the head of the chain.
|
||||||
|
uint32_t cnt = getcount(top) + 1;
|
||||||
|
if (cnt >= 1 << 24) {
|
||||||
|
cnt = (1 << 24) - 1;
|
||||||
|
}
|
||||||
|
setcount(top, cnt);
|
||||||
|
toindex = prevtop->link;
|
||||||
|
prevtop->link = top->link;
|
||||||
|
top->link = *((uint16_t*)frompcindex);
|
||||||
|
*((uint16_t*)frompcindex) = (uint16_t)toindex;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
done:
|
||||||
|
p->state--;
|
||||||
|
return;
|
||||||
|
|
||||||
|
overflow:
|
||||||
|
p->state++; // Halt further profiling
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Write out the GMON.OUT file using internal state
|
||||||
|
void _writeProfile(int (*writeCB)(const void *data, int len)) {
|
||||||
|
struct gmonhdr { // GMON.OUT header
|
||||||
|
size_t lpc; // base pc address of sample buffer
|
||||||
|
size_t hpc; // max pc address of sampled buffer
|
||||||
|
int ncnt; // size of sample buffer (plus this header)
|
||||||
|
int version; // version number
|
||||||
|
int profrate; // profiling clock rate
|
||||||
|
int spare[3]; // reserved
|
||||||
|
};
|
||||||
|
const unsigned int GMONVERSION = 0x00051879;
|
||||||
|
struct rawarc { // Per-arc on-disk data format
|
||||||
|
size_t raw_frompc;
|
||||||
|
size_t raw_selfpc;
|
||||||
|
long raw_count;
|
||||||
|
};
|
||||||
|
int fromindex;
|
||||||
|
int endfrom;
|
||||||
|
size_t frompc;
|
||||||
|
int toindex;
|
||||||
|
struct rawarc rawarc;
|
||||||
|
const int BS = 64;
|
||||||
|
struct rawarc rawarcbuff[BS];
|
||||||
|
int rawarcbuffptr = 0;
|
||||||
|
struct gmonparam *p = &_gmonparam;
|
||||||
|
struct gmonhdr hdr;
|
||||||
|
|
||||||
|
moncontrol(0); // Stop
|
||||||
|
|
||||||
|
hdr.lpc = p->lowpc;
|
||||||
|
hdr.hpc = p->highpc;
|
||||||
|
hdr.ncnt = p->kcountsize + sizeof(hdr);
|
||||||
|
hdr.version = GMONVERSION;
|
||||||
|
hdr.profrate = GMON_HZ;
|
||||||
|
writeCB((void *)&hdr, sizeof(hdr));
|
||||||
|
writeCB((void *)p->kcount, p->kcountsize);
|
||||||
|
endfrom = p->fromssize / sizeof(*p->froms);
|
||||||
|
for (fromindex = 0; fromindex < endfrom; fromindex++) {
|
||||||
|
if (p->froms[fromindex] == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
frompc = p->lowpc;
|
||||||
|
frompc += fromindex * HASHFRACTION * sizeof(*p->froms);
|
||||||
|
for (toindex = p->froms[fromindex]; toindex != 0; toindex = p->tos[toindex].link) {
|
||||||
|
rawarc.raw_frompc = frompc;
|
||||||
|
rawarc.raw_selfpc = getselfpc(&p->tos[toindex]);
|
||||||
|
rawarc.raw_count = getcount(&p->tos[toindex]);
|
||||||
|
// Buffer up writes because Semihosting is really slow per write call
|
||||||
|
rawarcbuff[rawarcbuffptr++] = rawarc;
|
||||||
|
if (rawarcbuffptr == BS) {
|
||||||
|
writeCB((void *)rawarcbuff, BS * sizeof(struct rawarc));
|
||||||
|
rawarcbuffptr = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Write any remaining bits
|
||||||
|
if (rawarcbuffptr) {
|
||||||
|
writeCB((void *)rawarcbuff, rawarcbuffptr * sizeof(struct rawarc));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// These are referenced by RP2040Support.cpp and called by the runtime init SDK
|
||||||
|
// Install a periodic PC sampler at the specified frequency
|
||||||
|
#if defined(__riscv)
|
||||||
|
void runtime_init_setup_profiling() {
|
||||||
|
// TODO - is there an equivalent? Or do we need to build a timer IRQ here?
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#include <hardware/exception.h>
|
||||||
|
#include <hardware/structs/systick.h>
|
||||||
|
void runtime_init_setup_profiling() {
|
||||||
|
exception_set_exclusive_handler(SYSTICK_EXCEPTION, _SystickHandler);
|
||||||
|
systick_hw->csr = 0x7;
|
||||||
|
systick_hw->rvr = (F_CPU / GMON_HZ) - 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
58
cores/rp2040/gprof_shim.S
Normal file
58
cores/rp2040/gprof_shim.S
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
#if defined(__riscv)
|
||||||
|
// Originally from https://github.com/sbzpro/riscv-gprof
|
||||||
|
# define RSIZE 4
|
||||||
|
|
||||||
|
.section .text
|
||||||
|
.align 2
|
||||||
|
.globl _mcount
|
||||||
|
_mcount:
|
||||||
|
addi sp,sp,-4*RSIZE
|
||||||
|
sw ra, 3*RSIZE(sp)
|
||||||
|
mv a1,ra
|
||||||
|
call _mcount_internal; //jal _mcount_internal
|
||||||
|
lw ra, 3*RSIZE(sp)
|
||||||
|
addi sp,sp,4*RSIZE
|
||||||
|
ret
|
||||||
|
#else
|
||||||
|
/*
|
||||||
|
* profiler.S
|
||||||
|
* Implements the gprof profiler arc counting function.
|
||||||
|
* Created on: 06.08.2015
|
||||||
|
* Author: Erich Styger
|
||||||
|
* Modified for RP2040/RP2350 on Dec 3 2024 by Earle F. Philhower, III.
|
||||||
|
*/
|
||||||
|
.syntax unified
|
||||||
|
.arch armv7-m
|
||||||
|
.cpu cortex-m0plus
|
||||||
|
|
||||||
|
.text
|
||||||
|
.thumb
|
||||||
|
.thumb_func
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
.globl __gnu_mcount_nc
|
||||||
|
.type __gnu_mcount_nc, %function
|
||||||
|
.section .time_critical
|
||||||
|
|
||||||
|
__gnu_mcount_nc:
|
||||||
|
// LR = to return to
|
||||||
|
// SP = to-replace-LR with
|
||||||
|
push {r0, r1, r2, r3}
|
||||||
|
push {lr}
|
||||||
|
|
||||||
|
// Swap 24/0
|
||||||
|
ldr r0, [sp, #20]
|
||||||
|
ldr r1, [sp, #0]
|
||||||
|
str r0, [sp, #0]
|
||||||
|
str r1, [sp, #20]
|
||||||
|
|
||||||
|
mov r1, lr
|
||||||
|
ldr r0, [sp, #0] /* caller - at the top of the stack */
|
||||||
|
bl _mcount_internal /* when __gnu_mcount_nc is called */
|
||||||
|
pop {r0}
|
||||||
|
mov lr, r0
|
||||||
|
pop {r0, r1, r2, r3}
|
||||||
|
pop {pc}
|
||||||
|
|
||||||
|
.end __gnu_mcount_nc
|
||||||
|
#endif
|
||||||
|
|
@ -44,6 +44,7 @@ For the latest version, always check https://github.com/earlephilhower/arduino-p
|
||||||
USB (Arduino and Adafruit_TinyUSB) <usb>
|
USB (Arduino and Adafruit_TinyUSB) <usb>
|
||||||
Multicore Processing <multicore>
|
Multicore Processing <multicore>
|
||||||
Semihosting <semihosting>
|
Semihosting <semihosting>
|
||||||
|
Profiling (GPROF) <profiling>
|
||||||
|
|
||||||
RP2350 Specific Notes <rp2350>
|
RP2350 Specific Notes <rp2350>
|
||||||
RP2350 PSRAM <psram>
|
RP2350 PSRAM <psram>
|
||||||
|
|
|
||||||
76
docs/profiling.rst
Normal file
76
docs/profiling.rst
Normal file
|
|
@ -0,0 +1,76 @@
|
||||||
|
Profiling Applications with GPROF
|
||||||
|
=================================
|
||||||
|
|
||||||
|
Applications running on the Pico can be profiled using GNU GPROF to show where the CPU is using its time
|
||||||
|
on the device and how often certain functions are called. It does this by recompiling the application
|
||||||
|
and adding a small preamble to each function built to identify what functions call what others (and
|
||||||
|
how frequently). It also uses the ``SYSTICK`` exception timer to sample and record the PC 10,000 times
|
||||||
|
per second. When an application is complete, the recorded date can be dumped to the host PC as a
|
||||||
|
``gmon.,out`` file which can be processed by ``arm-none-eabi-gprof`` into useful date.
|
||||||
|
|
||||||
|
s histogram of PCs and tally of function caller/callees can take a significant amount of RAM, from 100KB
|
||||||
|
to 10000KB depending on the size of the application. As such, while the RP2040 **may** be able to
|
||||||
|
profile small applications, this is only really recommended on the RP2350 with external PSRAM. The
|
||||||
|
profiler will automatically use PSRAM when available. Call ``rp2040.getProfileMemoryUsage()`` to get the
|
||||||
|
memory allocated at runtime.
|
||||||
|
|
||||||
|
|
||||||
|
Profiling also adds processing overhead in terms of the periodic sampling and the function preambles.
|
||||||
|
In most cases there is no reason to enable (and many reasons to disable) profiling when an application
|
||||||
|
is deployed to the field.
|
||||||
|
|
||||||
|
To transfer the ``GMON.OUT`` data from the Pico to the host HP can be done by having the application
|
||||||
|
write it out to an SD card or a LittleFS filesystem which is then manually dumped, but for ease of use
|
||||||
|
semihosting can be used to allow the Pico (under the control of OpenOCD and GDB) to write the
|
||||||
|
``gmon.out`` file directly on the host PC, ready for use.
|
||||||
|
|
||||||
|
**NOTE** Semihosting only works when connected to an OpenOCD + GDB debug session. Running an application
|
||||||
|
compiled for Semihosting without the debugger will cause a panic and hang the chip.
|
||||||
|
|
||||||
|
As of now, only ARM has support for Semihosting or GPROF.
|
||||||
|
|
||||||
|
|
||||||
|
Enabling Profiling in an Application
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
The ``Tools->Profiling->Enabled`` menu needs to be selected to enable profiling support in GCC. This will
|
||||||
|
add the necessary preamble to every function compiled (**Note** that the ``libpico`` and ``libc`` will not
|
||||||
|
be instrumented because they are pre-built so calls from them will not be fully instrumented. However,
|
||||||
|
PC data will still be grabbed and decoded from them at runtime.)
|
||||||
|
|
||||||
|
The application will automatically start collecting profiling data even before ``setup`` starts in this
|
||||||
|
mode. It will continue collecting data until you stop and write out the profiling data using
|
||||||
|
``rp2040.writeProfiling()`` to dump to the host, a file, serial port, etc.
|
||||||
|
|
||||||
|
For example, an application which does all its processing in ``setup()`` might look like:
|
||||||
|
|
||||||
|
.. code:: cpp
|
||||||
|
|
||||||
|
#include <SemiFS.h>
|
||||||
|
void setup() {
|
||||||
|
SerialSemi.printf("BEGIN\n");
|
||||||
|
do_some_work_that_takes_a_long_time_with_many_function_calls();
|
||||||
|
// Do lots of other work...
|
||||||
|
// Now all done...
|
||||||
|
SerialSemi.printf("Writing GMON.OUT\n");
|
||||||
|
SemiFS.begin();
|
||||||
|
File gmon = SemiFS.open("gmon.out", "w");
|
||||||
|
rp2040.writeProfiling(&gmon);
|
||||||
|
gmon.close();
|
||||||
|
SerialSemi.printf("END\n");
|
||||||
|
}
|
||||||
|
void loop() {}
|
||||||
|
|
||||||
|
|
||||||
|
Collecting and Analyzing Profile Data
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
|
Running this application under `semihosting <semihosting>`_ GDB and OpenOCD generates a ``gmon.out`` file
|
||||||
|
in the OpenOCD current working directory. This file, combined with the ``ELF`` binary build in the
|
||||||
|
IDE and loaded through GDB, can produce profiler output using
|
||||||
|
|
||||||
|
.. code::
|
||||||
|
|
||||||
|
$ /path/to/arm-none-eabi/bin/arm-none-eabi-gprof /path/to/sketch.ino.elf /path/to/gmon.out
|
||||||
|
|
||||||
|
See the ``rp2040/Profiling.ino`` example for more details.
|
||||||
|
|
@ -65,6 +65,9 @@ getUsedPSRAMHeap KEYWORD2
|
||||||
getTotalPSRAMHeap KEYWORD2
|
getTotalPSRAMHeap KEYWORD2
|
||||||
getTotalPSRAM KEYWORD2
|
getTotalPSRAM KEYWORD2
|
||||||
|
|
||||||
|
getProfileMemoryUsage KEYWORD2
|
||||||
|
writeProfiling KEYWORD2
|
||||||
|
|
||||||
getChipID KEYWORD2
|
getChipID KEYWORD2
|
||||||
|
|
||||||
hwrand32 KEYWORD2
|
hwrand32 KEYWORD2
|
||||||
|
|
|
||||||
104
libraries/rp2040/examples/Profiling/Profiling.ino
Normal file
104
libraries/rp2040/examples/Profiling/Profiling.ino
Normal file
|
|
@ -0,0 +1,104 @@
|
||||||
|
// This example should be run with profiling enabled from the IDE and
|
||||||
|
// under GDB/OpenOCD. It uses semihosting to write a gmon.out file
|
||||||
|
// the host system with the profiled application results.
|
||||||
|
//
|
||||||
|
// Semihosting **ONLY** works with an OpenOCD and GDB setup. If you build
|
||||||
|
// and run a semihosting app without GDB connected, it **WILL CRASH**
|
||||||
|
//
|
||||||
|
// Start OpenOCD normally, but leave the terminal window visible because
|
||||||
|
// is it OpenOCD, not GDB, which will display the semihosting output.
|
||||||
|
// OpenOCD will also create files in the current working directory, so
|
||||||
|
// be sure it is a place you can find and write to.
|
||||||
|
//
|
||||||
|
// In GDB,connect to OpenOCD and then enable semihosting
|
||||||
|
// (gdb) target extended-remote localhost:3333
|
||||||
|
// (gdb) monitor arm semihosting enable
|
||||||
|
// (gdb) file /path/to/sketch.ino.elf
|
||||||
|
// (gdb) load
|
||||||
|
//
|
||||||
|
// Run the app from GDB and watch OpenOCD, it will display messages when
|
||||||
|
// the app is done and "gmon.out" is on the host system.
|
||||||
|
//
|
||||||
|
// (gdb) run
|
||||||
|
// .. pop to OpenOCD window
|
||||||
|
// [OpenOCD] BEGIN
|
||||||
|
// [OpenOCD] Result = 2417697592
|
||||||
|
// [OpenOCD] Writing GMON.OUT
|
||||||
|
// [OpenOCD] END
|
||||||
|
//
|
||||||
|
// From command line, decode the gmon.out using the ELF and gprof tool
|
||||||
|
//
|
||||||
|
// $ /path/to/arm-none-eabi/bin/arm-none-eabi-gprof /path/to/sketch.ino.elf /path/to/gmon.out | less
|
||||||
|
// Flat profile:
|
||||||
|
//
|
||||||
|
// Each sample counts as 0.0001 seconds.
|
||||||
|
// % cumulative self self total
|
||||||
|
// time seconds seconds calls ms/call ms/call name
|
||||||
|
// 50.56 1.74 1.74 3500020 0.00 0.00 __wrap___getreent
|
||||||
|
// 24.05 2.57 0.83 rand
|
||||||
|
// 8.32 2.86 0.29 5 57.36 57.36 fcn1(unsigned long)
|
||||||
|
// ...
|
||||||
|
// index % time self children called name
|
||||||
|
// <spontaneous>
|
||||||
|
// [1] 74.6 0.83 1.74 rand [1]
|
||||||
|
// 1.74 0.00 3500000/3500020 __wrap___getreent [2]
|
||||||
|
// -----------------------------------------------
|
||||||
|
// 0.00 0.00 1/3500020 realloc [106]
|
||||||
|
// 0.00 0.00 3/3500020 vsnprintf [54]
|
||||||
|
// 0.00 0.00 7/3500020 srand [7]
|
||||||
|
// 0.00 0.00 9/3500020 malloc [105]
|
||||||
|
// 1.74 0.00 3500000/3500020 rand [1]
|
||||||
|
// ...
|
||||||
|
|
||||||
|
#ifndef __PROFILE
|
||||||
|
void setup() {
|
||||||
|
Serial.printf("Enable profiling to run this example.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void loop() {
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifdef __riscv
|
||||||
|
void setup() {
|
||||||
|
// No semihosting for RISCV yet
|
||||||
|
}
|
||||||
|
void loop() {
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include <SemiFS.h>
|
||||||
|
|
||||||
|
uint32_t fcn1(uint32_t st) {
|
||||||
|
srand(st);
|
||||||
|
for (int i = 0; i < 500000; i++) {
|
||||||
|
st += rand();
|
||||||
|
}
|
||||||
|
return st;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t fcn2(uint32_t st) {
|
||||||
|
srand(st * st);
|
||||||
|
for (int i = 0; i < 500000; i++) {
|
||||||
|
st += rand();
|
||||||
|
}
|
||||||
|
return st;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setup() {
|
||||||
|
SerialSemi.printf("BEGIN\n");
|
||||||
|
SerialSemi.printf("Result = %lu\n", fcn2(fcn2(fcn1(3)) * fcn1(fcn1(fcn1(fcn1(2))))));
|
||||||
|
SerialSemi.printf("Writing GMON.OUT\n");
|
||||||
|
SemiFS.begin();
|
||||||
|
File gmon = SemiFS.open("gmon.out", "w");
|
||||||
|
rp2040.writeProfiling(&gmon);
|
||||||
|
gmon.close();
|
||||||
|
SerialSemi.printf("END\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void loop() {
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // !__PROFILE
|
||||||
|
|
@ -64,7 +64,7 @@ compiler.c.elf.flags={compiler.warning_flags} {compiler.defines} {compiler.flags
|
||||||
compiler.S.cmd={build.toolchain}-gcc
|
compiler.S.cmd={build.toolchain}-gcc
|
||||||
compiler.S.flags=-c {compiler.warning_flags} {compiler.defines} -g -x assembler-with-cpp -MMD {compiler.includes} {build.toolchainopts} -g
|
compiler.S.flags=-c {compiler.warning_flags} {compiler.defines} -g -x assembler-with-cpp -MMD {compiler.includes} {build.toolchainopts} -g
|
||||||
compiler.cpp.cmd={build.toolchain}-g++
|
compiler.cpp.cmd={build.toolchain}-g++
|
||||||
compiler.cpp.flags=-c {compiler.warning_flags} {compiler.defines} {compiler.flags} -MMD {compiler.includes} {build.flags.rtti} -std=gnu++17 -g -pipe
|
compiler.cpp.flags=-c {compiler.warning_flags} {compiler.defines} {compiler.flags} -MMD {compiler.includes} {build.flags.rtti} {build.flags.profile} -std=gnu++17 -g -pipe
|
||||||
|
|
||||||
compiler.ar.cmd={build.toolchain}-ar
|
compiler.ar.cmd={build.toolchain}-ar
|
||||||
compiler.ar.flags=rcs
|
compiler.ar.flags=rcs
|
||||||
|
|
@ -98,6 +98,7 @@ build.psram_freq=
|
||||||
build.eeprom_start=
|
build.eeprom_start=
|
||||||
build.flags.optimize=-Os
|
build.flags.optimize=-Os
|
||||||
build.flags.rtti=-fno-rtti
|
build.flags.rtti=-fno-rtti
|
||||||
|
build.flags.profile=
|
||||||
build.fs_start=
|
build.fs_start=
|
||||||
build.fs_end=
|
build.fs_end=
|
||||||
build.usbstack_flags=
|
build.usbstack_flags=
|
||||||
|
|
|
||||||
|
|
@ -72,6 +72,8 @@ def compile(tmp_dir, sketch, cache, tools_dir, hardware_dir, ide_path, f, args):
|
||||||
fqbn = fqbn.replace("rpipico", "rpipicow")
|
fqbn = fqbn.replace("rpipico", "rpipicow")
|
||||||
if ('/BT' in sketch) or ('/BLE' in sketch) or ('/Bluetooth' in sketch):
|
if ('/BT' in sketch) or ('/BLE' in sketch) or ('/Bluetooth' in sketch):
|
||||||
fqbn = fqbn + ",ipbtstack=ipv4btcble"
|
fqbn = fqbn + ",ipbtstack=ipv4btcble"
|
||||||
|
if '/Profiling' in sketch:
|
||||||
|
fqbn = fqbn + ",profile=Enabled"
|
||||||
cmd += [fqbn]
|
cmd += [fqbn]
|
||||||
cmd += ['-built-in-libraries', ide_path + '/libraries']
|
cmd += ['-built-in-libraries', ide_path + '/libraries']
|
||||||
cmd += ['-ide-version=10607']
|
cmd += ['-ide-version=10607']
|
||||||
|
|
|
||||||
|
|
@ -93,6 +93,12 @@ def BuildOptimize(name):
|
||||||
print("%s.menu.opt.%s=%s (%s)%s" % (name, l[0], l[1], l[2], l[3]))
|
print("%s.menu.opt.%s=%s (%s)%s" % (name, l[0], l[1], l[2], l[3]))
|
||||||
print("%s.menu.opt.%s.build.flags.optimize=%s" % (name, l[0], l[2]))
|
print("%s.menu.opt.%s.build.flags.optimize=%s" % (name, l[0], l[2]))
|
||||||
|
|
||||||
|
def BuildProfile(name):
|
||||||
|
print("%s.menu.profile.Disabled=Disabled" % (name))
|
||||||
|
print("%s.menu.profile.Disabled.build.flags.profile=" % (name))
|
||||||
|
print("%s.menu.profile.Enabled=Enabled" % (name))
|
||||||
|
print("%s.menu.profile.Enabled.build.flags.profile=-pg -D__PROFILE" % (name))
|
||||||
|
|
||||||
def BuildRTTI(name):
|
def BuildRTTI(name):
|
||||||
print("%s.menu.rtti.Disabled=Disabled" % (name))
|
print("%s.menu.rtti.Disabled=Disabled" % (name))
|
||||||
print("%s.menu.rtti.Disabled.build.flags.rtti=-fno-rtti" % (name))
|
print("%s.menu.rtti.Disabled.build.flags.rtti=-fno-rtti" % (name))
|
||||||
|
|
@ -282,6 +288,7 @@ def BuildGlobalMenuList():
|
||||||
print("menu.freq=CPU Speed")
|
print("menu.freq=CPU Speed")
|
||||||
print("menu.arch=CPU Architecture")
|
print("menu.arch=CPU Architecture")
|
||||||
print("menu.opt=Optimize")
|
print("menu.opt=Optimize")
|
||||||
|
print("menu.profile=Profiling")
|
||||||
print("menu.rtti=RTTI")
|
print("menu.rtti=RTTI")
|
||||||
print("menu.stackprotect=Stack Protector")
|
print("menu.stackprotect=Stack Protector")
|
||||||
print("menu.exceptions=C++ Exceptions")
|
print("menu.exceptions=C++ Exceptions")
|
||||||
|
|
@ -353,6 +360,7 @@ def MakeBoard(name, chip, vendor_name, product_name, vid, pid, pwr, boarddefine,
|
||||||
else:
|
else:
|
||||||
BuildFreq(name, 133)
|
BuildFreq(name, 133)
|
||||||
BuildOptimize(name)
|
BuildOptimize(name)
|
||||||
|
BuildProfile(name)
|
||||||
BuildRTTI(name)
|
BuildRTTI(name)
|
||||||
BuildStackProtect(name)
|
BuildStackProtect(name)
|
||||||
BuildExceptions(name)
|
BuildExceptions(name)
|
||||||
|
|
|
||||||
|
|
@ -120,4 +120,7 @@ static const uint8_t SCK = PIN_SPI0_SCK;
|
||||||
#define CRYPTO_WIRE Wire
|
#define CRYPTO_WIRE Wire
|
||||||
|
|
||||||
#define USB_MAX_POWER (500)
|
#define USB_MAX_POWER (500)
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
#include "nina_pins.h"
|
#include "nina_pins.h"
|
||||||
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
#include <Ilabs2040WiFiClass.h>
|
#include <Ilabs2040WiFiClass.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#define PINS_COUNT (26u)
|
#define PINS_COUNT (26u)
|
||||||
#define NUM_DIGITAL_PINS (26u)
|
#define NUM_DIGITAL_PINS (26u)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue