From 9d639d6dcc8024bff97141d5ceea49e404355c7b Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Thu, 21 Aug 2014 20:22:10 +0300 Subject: [PATCH] genlz77: Generic LZ77 hash compressor. --- src/genlz77.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/genlz77.h | 3 ++ 2 files changed, 119 insertions(+) create mode 100644 src/genlz77.c create mode 100644 src/genlz77.h diff --git a/src/genlz77.c b/src/genlz77.c new file mode 100644 index 0000000..66c7ca5 --- /dev/null +++ b/src/genlz77.c @@ -0,0 +1,116 @@ +/* + * genlz77 - Generic LZ77 compressor + * + * Copyright (c) 2014 by Paul Sokolovsky + * + * This software is provided 'as-is', without any express + * or implied warranty. In no event will the authors be + * held liable for any damages arising from the use of + * this software. + * + * Permission is granted to anyone to use this software + * for any purpose, including commercial applications, + * and to alter it and redistribute it freely, subject to + * the following restrictions: + * + * 1. The origin of this software must not be + * misrepresented; you must not claim that you + * wrote the original software. If you use this + * software in a product, an acknowledgment in + * the product documentation would be appreciated + * but is not required. + * + * 2. Altered source versions must be plainly marked + * as such, and must not be misrepresented as + * being the original software. + * + * 3. This notice may not be removed or altered from + * any source distribution. + */ +#include +#include +#include +#include "defl_static.h" + +#define HASH_BITS 12 +#define HASH_SIZE (1<> (3*8 - HASH_BITS)) - v) & (HASH_SIZE - 1); + return hash; +} + +#ifdef DUMP_LZTXT + +/* Counter for approximate compressed length in LZTXT mode. */ +/* Literal is counted as 1, copy as 2 bytes. */ +unsigned approx_compressed_len; + +void literal(void *data, uint8_t val) +{ + printf("L%02x # %c\n", val, (val >= 0x20 && val <= 0x7e) ? val : '?'); + approx_compressed_len++; +} + +void copy(void *data, unsigned offset, unsigned len) +{ + printf("C-%u,%u\n", offset, len); + approx_compressed_len += 2; +} + +#else + +static inline void literal(void *data, uint8_t val) +{ + zlib_literal(data, val); +} + +static inline void copy(void *data, unsigned offset, unsigned len) +{ + zlib_match(data, offset, len); +} + +#endif + + +void tinf_compress(void *data, const uint8_t *src, unsigned slen) +{ + const uint8_t *hashtable[HASH_SIZE] = {0}; + + const uint8_t *top = src + slen - MIN_MATCH; + while (src < top) { + int h = HASH(src); + const uint8_t **bucket = &hashtable[h & (HASH_SIZE - 1)]; + const uint8_t *subs = *bucket; + *bucket = src; + if (subs && src > subs && (src - subs) <= MAX_OFFSET && !memcmp(src, subs, MIN_MATCH)) { + src += MIN_MATCH; + const uint8_t *m = subs + MIN_MATCH; + int len = MIN_MATCH; + while (*src == *m && len < MAX_MATCH) { + src++; m++; len++; + } + copy(data, src - len - subs, len); + } else { + literal(data, *src++); + } + } + // Process buffer tail, which is less than MIN_MATCH + // (and so it doesn't make sense to look for matches there) + top += MIN_MATCH; + while (src < top) { + literal(data, *src++); + } +} diff --git a/src/genlz77.h b/src/genlz77.h new file mode 100644 index 0000000..0f8e6f5 --- /dev/null +++ b/src/genlz77.h @@ -0,0 +1,3 @@ +#include + +void tinf_compress(void *data, const uint8_t *src, unsigned slen);