arm64: speed up simple memcpy/memset alternatives

We need those simple alternatives to be used during early boot when the
MMU is not yet enabled. However they don't have to be the slowest they
can be. Those functions are mainly used to clear .bss sections and copy
.data to final destination when doing XIP, etc. Therefore it is very
likely for provided pointers to be 64-bit aligned. Let's optimize for
that case.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
This commit is contained in:
Nicolas Pitre 2024-05-31 15:43:39 -04:00 committed by Anas Nashif
parent 94ade2578c
commit 64855973c0

View file

@ -27,6 +27,22 @@ __weak void z_arm64_mm_init(bool is_primary_core) { }
*/
void z_early_memset(void *dst, int c, size_t n)
{
if (((uintptr_t)dst & (sizeof(uint64_t) - 1)) == 0) {
/* speed-up if 64-bit aligned which should be the default */
uint64_t *d8 = dst;
uint64_t c8 = (uint8_t)c;
c8 |= c8 << 8;
c8 |= c8 << 16;
c8 |= c8 << 32;
while (n >= 8) {
*d8++ = c8;
n -= 8;
}
dst = d8;
}
uint8_t *d = dst;
while (n--) {
@ -36,6 +52,19 @@ void z_early_memset(void *dst, int c, size_t n)
void z_early_memcpy(void *dst, const void *src, size_t n)
{
if ((((uintptr_t)dst | (uintptr_t)src) & (sizeof(uint64_t) - 1)) == 0) {
/* speed-up if 64-bit aligned which should be the default */
uint64_t *d8 = dst;
const uint64_t *s8 = src;
while (n >= 8) {
*d8++ = *s8++;
n -= 8;
}
dst = d8;
src = s8;
}
uint8_t *d = dst;
const uint8_t *s = src;