Compare commits

...

2 commits
main ... win

Author SHA1 Message Date
Russell Keith-Magee
3383197b13
Added comment linking the original source, and a comment about recent changes. 2025-02-20 11:00:06 +08:00
d420bdbc06 Enable building on Windows 2025-02-19 20:44:57 -06:00
3 changed files with 29 additions and 250 deletions

View file

@ -30,7 +30,7 @@ jobs:
strategy:
fail-fast: false
matrix:
name: [ "macOS (x86_64)", "macOS (arm64)", "iOS", "Linux (x86_64)", "Linux (arm64)" ] # Android, Windows (x86_64), Windows (arm64)
name: [ "macOS (x86_64)", "macOS (arm64)", "iOS", "Linux (x86_64)", "Linux (arm64)", "Windows (x86_64)" ] # Android, Windows (x86_64), Windows (arm64)
include:
- name: macOS (x86_64)
platform: macos
@ -56,10 +56,10 @@ jobs:
# platform: android
# runs-on: macos-latest
# archs: auto
# - name: Windows (x86_64)
# platform: windows
# runs-on: windows-latest
# archs: auto
- name: Windows (x86_64)
platform: windows
runs-on: windows-latest
archs: auto
# - name: Windows (arm64)
# platform: windows
# runs-on: windows-latest-arm??

View file

@ -14,7 +14,6 @@
*/
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
/* edit_dist -- returns the minimum edit distance between two strings

View file

@ -1,30 +1,39 @@
/*
this is a checksum routine that is specifically designed for spam.
Copyright Andrew Tridgell <tridge@samba.org> 2002
this is a checksum routine that is specifically designed for spam. Copyright
Andrew Tridgell <tridge@samba.org> 2002
This code is released under the GNU General Public License version 2
or later. Alternatively, you may also use this code under the terms
of the Perl Artistic license.
This code is released under the GNU General Public License version 2 or later.
Alternatively, you may also use this code under the terms of the Perl Artistic
license.
If you wish to distribute this code under the terms of a different
free software license then please ask me. If there is a good reason
then I will probably say yes.
If you wish to distribute this code under the terms of a different free
software license then please ask me. If there is a good reason then I will
probably say yes.
---
Modified by Russell Keith-Magee, 20 Jan 2009:
* removed the condition preventing comparison of small block sizes
(lines 364-366)
Original sources can be found at:
https://www.samba.org/ftp/unpacked/junkcode/spamsum/
Changes from the original:
20 Jan 2009:
* Removed the condition preventing comparison of small block sizes (lines
364-366)
* Modified the help string to be legal cross platform C.
20 Feb 2025:
* Removed the spamsum_match_db, spamsum_stdin, spamsum_file, and mainline
entry points, plus imports required by those methods. These methods were
not exposed by the Python binding, and were problematic for Windows support.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <ctype.h>
/* the output is a string of length 64 in base64 */
@ -448,232 +457,3 @@ u32 spamsum_match(const char *str1, const char *str2)
return score;
}
/*
return the maximum match for a file containing a list of spamsums
*/
u32 spamsum_match_db(const char *fname, const char *sum, u32 threshold)
{
FILE *f;
char line[100];
u32 best = 0;
f = fopen(fname, "r");
if (!f) return 0;
/* on each line of the database we compute the spamsum match
score. We then pick the best score */
while (fgets(line, sizeof(line)-1, f)) {
u32 score;
int len;
len = strlen(line);
if (line[len-1] == '\n') line[len-1] = 0;
score = spamsum_match(sum, line);
if (score > best) {
best = score;
if (best >= threshold) break;
}
}
fclose(f);
return best;
}
/*
return the spamsum on stdin
*/
static char *spamsum_stdin(u32 flags, u32 block_size)
{
uchar buf[10*1024];
uchar *msg;
u32 length = 0;
int n;
char *sum;
msg = malloc(sizeof(buf));
if (!msg) return NULL;
/* load the file, expanding the allocation as needed. */
while (1) {
n = read(0, buf, sizeof(buf));
if (n == -1 && errno == EINTR) continue;
if (n <= 0) break;
msg = realloc(msg, length + n);
if (!msg) return NULL;
memcpy(msg+length, buf, n);
length += n;
}
sum = spamsum(msg, length, flags, block_size);
free(msg);
return sum;
}
/*
return the spamsum on a file
*/
char *spamsum_file(const char *fname, u32 flags, u32 block_size)
{
int fd;
char *sum;
struct stat st;
uchar *msg;
if (strcmp(fname, "-") == 0) {
return spamsum_stdin(flags, block_size);
}
fd = open(fname, O_RDONLY);
if (fd == -1) {
perror(fname);
return NULL;
}
if (fstat(fd, &st) == -1) {
perror("fstat");
return NULL;
}
msg = mmap(NULL, st.st_size, PROT_READ, MAP_FILE|MAP_PRIVATE, fd, 0);
if (msg == (uchar *)-1) {
perror("mmap");
return NULL;
}
close(fd);
sum = spamsum(msg, st.st_size, flags, block_size);
munmap(msg, st.st_size);
return sum;
}
static void show_help(void)
{
printf("\n\
spamsum v1.1 written by Andrew Tridgell <tridge@samba.org>\n\
\n\
spamsum computes a signature string that is particular good for detecting if two emails\n\
are very similar. This can be used to detect SPAM.\n\
\n\
Syntax:\n\
spamsum [options] <files>\n\
or\n\
spamsum [options] -d sigs.txt -c SIG\n\
or\n\
spamsum [options] -d sigs.txt -C file\n\
\n\
When called with a list of filenames spamsum will write out the\n\
signatures of each file on a separate line. You can specify the\n\
filename '-' for standard input.\n\
\n\
When called with the second form, spamsum will print the best score\n\
for the given signature with the signatures in the given database. A\n\
score of 100 means a perfect match, and a score of 0 means a complete\n\
mismatch.\n\
\n\
When checking, spamsum returns 0 (success) when the message *is* spam,\n\
1 for internal errors, and 2 for messages whose signature is not\n\
found.\n\
\n\
The 3rd form is just like the second form, but you pass a file\n\
containing a message instead of a pre-computed signature.\n\
\n\
Options:\n\
-W ignore whitespace\n\
-H skip past mail headers\n\
-B <bsize> force a block size of bsize\n\
-T <threshold> set the threshold above which spamsum will stop\n\
looking (default 90)\n\
");
}
int main(int argc, char *argv[])
{
char *sum;
extern char *optarg;
extern int optind;
int c;
char *dbname = NULL;
u32 score;
int i;
u32 flags = 0;
u32 block_size = 0;
u32 threshold = 90;
while ((c = getopt(argc, argv, "B:WHd:c:C:hT:")) != -1) {
switch (c) {
case 'W':
flags |= FLAG_IGNORE_WHITESPACE;
break;
case 'H':
flags |= FLAG_IGNORE_HEADERS;
break;
case 'd':
dbname = optarg;
break;
case 'B':
block_size = atoi(optarg);
break;
case 'T':
threshold = atoi(optarg);
break;
case 'c':
if (!dbname) {
show_help();
exit(1);
}
score = spamsum_match_db(dbname, optarg,
threshold);
printf("%u\n", score);
exit(score >= threshold ? 0 : 2);
case 'C':
if (!dbname) {
show_help();
exit(1);
}
score = spamsum_match_db(dbname,
spamsum_file(optarg, flags,
block_size),
threshold);
printf("%u\n", score);
exit(score >= threshold ? 0 : 2);
case 'h':
default:
show_help();
exit(0);
}
}
argc -= optind;
argv += optind;
if (argc == 0) {
show_help();
return 0;
}
/* compute the spamsum on a list of files */
for (i=0;i<argc;i++) {
sum = spamsum_file(argv[i], flags, block_size);
printf("%s\n", sum);
free(sum);
}
return 0;
}