Use custom LWIP checksum for ~13% faster checksums (#2172)
Use -O2 only on the LWIP checksum routine, resulting in a speedup of around 13% (checksumming only, not entire LWIP stack) for 72 add'l bytes of flash.
This commit is contained in:
parent
11814823ed
commit
367200a2c8
8 changed files with 123 additions and 2 deletions
121
cores/rp2040/sdkoverride/inet_chksum.cpp
Normal file
121
cores/rp2040/sdkoverride/inet_chksum.cpp
Normal file
|
|
@ -0,0 +1,121 @@
|
||||||
|
// Taken from LWIP's inet_chksum.c just to set the -O2 flag
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copyright (c) 2001-2004 Swedish Institute of Computer Science.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
3. The name of the author may not be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||||
|
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||||
|
SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
||||||
|
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
||||||
|
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
||||||
|
OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
This file is part of the lwIP TCP/IP stack.
|
||||||
|
|
||||||
|
Author: Adam Dunkels <adam@sics.se>
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include "lwip/opt.h"
|
||||||
|
|
||||||
|
#include "lwip/inet_chksum.h"
|
||||||
|
#include "lwip/def.h"
|
||||||
|
#include "lwip/ip_addr.h"
|
||||||
|
|
||||||
|
#pragma GCC optimize ("O2")
|
||||||
|
/**
|
||||||
|
An optimized checksum routine. Basically, it uses loop-unrolling on
|
||||||
|
the checksum loop, treating the head and tail bytes specially, whereas
|
||||||
|
the inner loop acts on 8 bytes at a time.
|
||||||
|
|
||||||
|
@arg start of buffer to be checksummed. May be an odd byte address.
|
||||||
|
@len number of bytes in the buffer to be checksummed.
|
||||||
|
@return host order (!) lwip checksum (non-inverted Internet sum)
|
||||||
|
|
||||||
|
by Curt McDowell, Broadcom Corp. December 8th, 2005
|
||||||
|
*/
|
||||||
|
extern "C" u16_t lwip_standard_chksum(const void *dataptr, int len) {
|
||||||
|
const u8_t *pb = (const u8_t *)dataptr;
|
||||||
|
const u16_t *ps;
|
||||||
|
u16_t t = 0;
|
||||||
|
const u32_t *pl;
|
||||||
|
u32_t sum = 0, tmp;
|
||||||
|
/* starts at odd byte address? */
|
||||||
|
int odd = ((mem_ptr_t)pb & 1);
|
||||||
|
|
||||||
|
if (odd && len > 0) {
|
||||||
|
((u8_t *)&t)[1] = *pb++;
|
||||||
|
len--;
|
||||||
|
}
|
||||||
|
|
||||||
|
ps = (const u16_t *)(const void *)pb;
|
||||||
|
|
||||||
|
if (((mem_ptr_t)ps & 3) && len > 1) {
|
||||||
|
sum += *ps++;
|
||||||
|
len -= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
pl = (const u32_t *)(const void *)ps;
|
||||||
|
|
||||||
|
while (len > 7) {
|
||||||
|
tmp = sum + *pl++; /* ping */
|
||||||
|
if (tmp < sum) {
|
||||||
|
tmp++; /* add back carry */
|
||||||
|
}
|
||||||
|
|
||||||
|
sum = tmp + *pl++; /* pong */
|
||||||
|
if (sum < tmp) {
|
||||||
|
sum++; /* add back carry */
|
||||||
|
}
|
||||||
|
|
||||||
|
len -= 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* make room in upper bits */
|
||||||
|
sum = FOLD_U32T(sum);
|
||||||
|
|
||||||
|
ps = (const u16_t *)pl;
|
||||||
|
|
||||||
|
/* 16-bit aligned word remaining? */
|
||||||
|
while (len > 1) {
|
||||||
|
sum += *ps++;
|
||||||
|
len -= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* dangling tail byte remaining? */
|
||||||
|
if (len > 0) { /* include odd byte */
|
||||||
|
((u8_t *)&t)[0] = *(const u8_t *)ps;
|
||||||
|
}
|
||||||
|
|
||||||
|
sum += t; /* add end bytes */
|
||||||
|
|
||||||
|
/* Fold 32-bit sum to 16 bits
|
||||||
|
calling this twice is probably faster than if statements... */
|
||||||
|
sum = FOLD_U32T(sum);
|
||||||
|
sum = FOLD_U32T(sum);
|
||||||
|
|
||||||
|
if (odd) {
|
||||||
|
sum = SWAP_BYTES_IN_WORD(sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (u16_t)sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -43,7 +43,7 @@ extern unsigned long __lwip_rand(void);
|
||||||
#define MEMP_STATS 0
|
#define MEMP_STATS 0
|
||||||
#define LINK_STATS 0
|
#define LINK_STATS 0
|
||||||
// #define ETH_PAD_SIZE 2
|
// #define ETH_PAD_SIZE 2
|
||||||
#define LWIP_CHKSUM_ALGORITHM 3
|
#define LWIP_CHKSUM_ALGORITHM 0
|
||||||
#define LWIP_DHCP 1
|
#define LWIP_DHCP 1
|
||||||
#define LWIP_IPV4 1
|
#define LWIP_IPV4 1
|
||||||
#define LWIP_TCP 1
|
#define LWIP_TCP 1
|
||||||
|
|
|
||||||
BIN
lib/libpico.a
BIN
lib/libpico.a
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -43,7 +43,7 @@ extern unsigned long __lwip_rand(void);
|
||||||
#define MEMP_STATS 0
|
#define MEMP_STATS 0
|
||||||
#define LINK_STATS 0
|
#define LINK_STATS 0
|
||||||
// #define ETH_PAD_SIZE 2
|
// #define ETH_PAD_SIZE 2
|
||||||
#define LWIP_CHKSUM_ALGORITHM 3
|
#define LWIP_CHKSUM_ALGORITHM 0
|
||||||
#define LWIP_DHCP 1
|
#define LWIP_DHCP 1
|
||||||
#define LWIP_IPV4 1
|
#define LWIP_IPV4 1
|
||||||
#define LWIP_TCP 1
|
#define LWIP_TCP 1
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue