Added test suite.

This commit is contained in:
Russell Keith-Magee 2018-05-30 11:28:37 +08:00
parent d4e9a087ef
commit 563d6019ed
No known key found for this signature in database
GPG key ID: 3D2DAB6A37BB5BC3
2 changed files with 79 additions and 38 deletions

View file

@ -1,47 +1,36 @@
from distutils.core import setup, Extension
#!/usr/bin/env python
import io
setup(name = "pyspamsum",
version = "1.0.4",
author = "Russell Keith-Magee",
author_email = "russell@keith-magee.com",
url = 'http://github.com/freakboy3742/pyspamsum/',
license = "New BSD",
classifiers = [
from setuptools import find_packages, setup, Extension
with io.open('README.rst', encoding='utf8') as readme:
long_description = readme.read()
setup(
name="pyspamsum",
version="1.0.4",
description="A Python wrapper for Andrew Tridgell's spamsum algorithm",
long_description=long_description,
author="Russell Keith-Magee",
author_email="russell@keith-magee.com",
url='http://github.com/freakboy3742/pyspamsum/',
license="New BSD",
classifiers=[
'Development Status :: 5 - Production/Stable',
'License :: OSI Approved :: BSD License',
'Operating System :: OS Independent',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Topic :: Text Processing',
'Topic :: Utilities',
],
platforms = ["any"],
description = "A Python wrapper for Andrew Tridgell's spamsum algorithm",
long_description = """
spamsum is a fuzzy hash specifically designed for hashing email messages
to detect if they are SPAM. The spamsum utility includes the ability to
generate the spamsum hash and check a new message against a existing set
of hashes to find a match.
pyspamsum is a Python wrapper for the core API of spamsum.
The original spamsum code has been licensed under the terms of the
the Perl Artistic License. It has been slightly modified to remove
a special case for small window size checksums.
The original code is Copyright Andrew Tridgell <tridge@samba.org> 2002.
It forms part of Andrew's junkcode, and is available here:
http://www.samba.org/junkcode/#spamsum
The spamsum code in this project is derived from an updated version that
was published at Linux.conf.au 2004:
http://linux.anu.edu.au/linux.conf.au/2004/papers/junkcode/spamsum
For details on spamsum itself, please see the spamsum README:
http://samba.org/ftp/unpacked/junkcode/spamsum/README
""",
ext_modules = [
ext_modules=[
Extension(
"spamsum", [
"pyspamsum.c",
@ -49,5 +38,6 @@ For details on spamsum itself, please see the spamsum README:
"edit_dist.c",
]
)
]
],
test_suite='tests',
)

51
tests/__init__.py Normal file
View file

@ -0,0 +1,51 @@
import unittest
import spamsum
class SpamSumTest(unittest.TestCase):
def setUp(self):
self.s1 = "I am the very model of a modern Major-General, I've information animal and vegetable and mineral"
self.s2 = "I am the very model of a modern Brigadier, I've information animal and vegetable and something else"
self.s3 = "Huh? Gilbert and Who?"
def test_edit_distance(self):
self.assertEqual(spamsum.edit_distance(self.s1, self.s2), 27)
self.assertEqual(spamsum.edit_distance(self.s2, self.s1), 27)
self.assertEqual(spamsum.edit_distance(self.s1, self.s3), 93)
self.assertEqual(spamsum.edit_distance(self.s2, self.s3), 96)
def test_spamsum(self):
self.assertEqual(
spamsum.spamsum(self.s1),
'3:kEvyc/sFIKwYclQY4MKLFE4Igu0uLzIKygn:kE6Ai3KQ/MKOgDKZn'
)
self.assertEqual(
spamsum.spamsum(self.s2),
'3:kEvyc/sFIKwpErXLsCTApY4MKLFE4Igu0uLzWKIAYjtn:kE6Ai3jjTU/MKOgdK9Yjt'
)
self.assertEqual(
spamsum.spamsum(self.s3),
'3:uZ3B:uZx'
)
def test_match(self):
self.assertEqual(
spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s1)),
100
)
self.assertEqual(
spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s2)),
72
)
self.assertEqual(
spamsum.match(spamsum.spamsum(self.s2), spamsum.spamsum(self.s1)),
72
)
self.assertEqual(
spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s3)),
0
)
self.assertEqual(
spamsum.match(spamsum.spamsum(self.s2), spamsum.spamsum(self.s3)),
0
)