From 6549bbcb72436f85fca68bbb29c4eed5772ec16d Mon Sep 17 00:00:00 2001
From: Jeff Epler <jepler@gmail.com>
Date: Sat, 22 Oct 2022 20:20:13 -0500
Subject: [PATCH] Add a tiny elfutils like library

.. it can find symbols and determine how to load a binary

This might be useful for CircuitPython "coproc" so that
an elf file can be used (instead of a bin file); symbol offsets
within the shared memory area can be determined, and the right
portion of the elf file can be loaded into the coprocessor memory.
---
 .gitignore     |   2 +
 Makefile       |  15 ++++--
 py/minidump.py |  12 +++++
 py/minielf.py  | 141 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 167 insertions(+), 3 deletions(-)
 create mode 100644 py/minidump.py
 create mode 100644 py/minielf.py

diff --git a/.gitignore b/.gitignore
index 859ba00..8821673 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
+__pycache__
 /link.ld
 /a.out
+/a.out-stripped
diff --git a/Makefile b/Makefile
index 7cf81b5..59157b9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,12 @@
+ifeq ($(origin IDF_PATH),undefined)
+$(error You must "source esp-idf/export.sh" before building)
+endif
+
 COPROC_RESERVE_MEM ?= 8176
 SOC := esp32s3
-CC := riscv32-esp-elf-gcc
+CROSS := riscv32-esp-elf-
+CC := $(CROSS)gcc
+STRIP := $(CROSS)strip
 CFLAGS := -Os -march=rv32imc -mdiv -fdata-sections -ffunction-sections
 CFLAGS += -isystem $(IDF_PATH)/components/ulp/ulp_riscv/include/
 CFLAGS += -isystem $(IDF_PATH)/components/soc/$(SOC)/include
@@ -20,13 +26,16 @@ LDFLAGS += link.ld
 
 
 .PHONY: default
-default: a.out
+default: a.out-stripped
+a.out-stripped: a.out
+	$(STRIP) -g -o $@ $<
+
 a.out: $(SRCS) link.ld
 	$(CC) -flto $(CFLAGS) $^ -o $@ $(LDFLAGS)
 
 .PHONY: clean
 clean:
-	rm -f a.out link.ld
+	rm -f a.out* link.ld
 
 link.ld: ulp.riscv.ld
 	$(CC) -E -P -xc $(CFLAGS) -o $@ $<
diff --git a/py/minidump.py b/py/minidump.py
new file mode 100644
index 0000000..9d70607
--- /dev/null
+++ b/py/minidump.py
@@ -0,0 +1,12 @@
+from minielf import ELFFile, PT_LOAD
+
+a_out = open("a.out-stripped", "rb")
+e = ELFFile(a_out)
+s = e.get_section(1)
+s = e.get_section_by_name('.symtab')
+sy = s.get_symbol_by_name('shared_mem')[0]
+en = sy.entry
+for h in e.iter_headers():
+    if h.p_type == PT_LOAD:
+        print(f"@{h.p_vaddr:04x}: Load {h.p_filesz} bytes starting at {h.p_offset}")
+print(f"shared_mem @ 0x{en.st_value:04x} 0x{en.st_size:04x} bytes")
diff --git a/py/minielf.py b/py/minielf.py
new file mode 100644
index 0000000..1db129d
--- /dev/null
+++ b/py/minielf.py
@@ -0,0 +1,141 @@
+import struct
+from collections import namedtuple
+
+
+class StructMixin:
+    @classmethod
+    def calcsize(cls):
+        return struct.calcsize(cls._fmt)
+
+    @classmethod
+    def frombuffer(cls, buf):
+        return cls(*struct.unpack(cls._fmt, buf))
+
+_ElfHeader32 = namedtuple('_ElfHeader32', """
+    e_ident e_type e_machine e_version e_entry e_phoff e_shoff e_flags e_ehsize e_phentsize e_phnum e_shentsize e_shnum e_shstrndx""".split())
+class ElfHeader32(_ElfHeader32, StructMixin):
+    _fmt='<16s2h5l6h'
+
+_SectionHeader32 = namedtuple('_SectionHeader32', """
+        sh_name sh_type sh_flags sh_addr sh_offset sh_size sh_link sh_info
+        sh_addralign sh_entsize
+        """.split())
+class SectionHeader32(_SectionHeader32, StructMixin):
+    _fmt = '<10l'
+
+class Section:
+    def __init__(self, ef, sh):
+        self._elffile = ef
+        self._header = sh
+
+    def readat(self, offset, sz):
+        return self._elffile._readat(offset + self._header.sh_offset, sz)
+    def constructat(self, offset, cls):
+        return self._elffile._constructat(offset + self._header.sh_offset, cls)
+
+class StringTable(Section):
+    def symbolat(self, offset):
+        result = b''
+        stream = self._elffile.stream
+        stream.seek(self._header.sh_offset + offset)
+        while (c := stream.read(1)) != b'\0' and c != b'':
+            result += c
+        return result
+
+_SymbolTableEntry = namedtuple('_SymbolTableEntry',
+        ['st_name', 'st_value', 'st_size', 'set_info', 'st_other', 'st_shndx'])
+
+class SymbolTableEntry(_SymbolTableEntry, StructMixin):
+    _fmt = '<3l2bh'
+
+class Symbol:
+    def __init__(self, name, entry):
+        self.name = name
+        self.entry = entry
+
+class SymbolTable(Section):
+    def iter_symbols(self):
+        for i in range(0, self._header.sh_size, SymbolTableEntry.calcsize()):
+            yield self.constructat(i, SymbolTableEntry)
+
+    def get_symbol_by_name(self, name):
+        if not isinstance(name, bytes): name = name.encode()
+        strs = self._elffile.get_section_by_name('.strtab')
+        for sy in self.iter_symbols():
+            name2 = strs.symbolat(sy.st_name)
+            if name == name2:
+                return [Symbol(name, sy)]
+
+
+section_constructors = {
+        2: SymbolTable,
+        3: StringTable,
+}
+
+_HeaderTableEntry = namedtuple('_HeaderTableEntry',
+        ['p_type', 'p_offset', 'p_vaddr', 'p_paddr', 'p_filesz', 'p_memsz', 'p_flags', 'p_align'])
+
+PT_LOAD = 1
+
+class HeaderTableEntry(_HeaderTableEntry, StructMixin):
+    _fmt = '<8l'
+
+class ELFFile:
+    def __init__(self, stream):
+        self.stream = stream
+        self._buffer = ()
+        if self._readat(0, 4) != b'\177ELF':
+            raise ValueError("Not an ELF file")
+        if self._readat(4, 3) != b'\1\1\1':
+            raise ValueError("Incompatible ELF file")
+        self._header = self._constructat(0, ElfHeader32)
+
+    def _readat(self, offset, sz):
+        if len(self._buffer) < sz:
+            self._buffer = bytearray(sz)
+            self._view = memoryview(self._buffer)
+        mv = self._view[:sz]
+        self.stream.seek(offset)
+        self.stream.readinto(mv)
+        return mv
+
+    def _decodeat(self, offset, fmt):
+        sz = struct.calcsize(fmt)
+        mb = self._readat(offset, sz)
+        return struct.unpack(fmt, mv)
+
+    def _constructat(self, offset, cls):
+        sz = cls.calcsize()
+        mb = self._readat(offset, sz)
+        return cls.frombuffer(mb)
+
+    def get_section(self, index):
+        if not (0 <= index < self._header.e_shnum):
+            raise IndexError("Invalid section number")
+        offset = self._header.e_shoff + index * self._header.e_shentsize
+        sh = self._constructat(offset, SectionHeader32)
+        constructor = section_constructors.get(sh.sh_type, Section)
+        return constructor(self, sh)
+
+    def iter_sections(self):
+        for i in range(self._header.e_shnum):
+            yield self.get_section(i)
+
+    def get_section_by_name(self, name):
+        if not isinstance(name, bytes): name = name.encode()
+        idx = self.get_section(self._header.e_shstrndx)
+        for sec in self.iter_sections():
+            off = sec._header.sh_name
+            name2 = idx.symbolat(off)
+            if name == name2:
+                return sec
+
+    def get_header(self, index):
+        if not (0 <= index < self._header.e_phnum):
+            raise IndexError("Invalid header number")
+        offset = self._header.e_phoff + index * self._header.e_phentsize
+        return self._constructat(offset, HeaderTableEntry)
+
+    def iter_headers(self):
+        for i in range(self._header.e_phnum):
+            yield self.get_header(i)