PEP621 project config refresh.

This commit is contained in:
Russell Keith-Magee 2025-02-18 14:06:59 +08:00
parent c0467a5c51
commit 08cebef85e
No known key found for this signature in database
GPG key ID: 3D2DAB6A37BB5BC3
12 changed files with 195 additions and 139 deletions

3
.gitignore vendored
View file

@ -7,6 +7,7 @@ dist
build
_build
distribute-*
.ruff_cache/
.tox/
.vscode/
venv/
venv/

29
.pre-commit-config.yaml Normal file
View file

@ -0,0 +1,29 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: check-toml
- id: check-yaml
- id: check-case-conflict
- id: check-docstring-first
- id: end-of-file-fixer
- id: trailing-whitespace
# Docformatter 1.7.5 isn't compatible with Pre-commit 4.0
# - repo: https://github.com/PyCQA/docformatter
# rev: v1.7.5
# hooks:
# - id: docformatter
# args: [--in-place, --black]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.6
hooks:
# Run the linter.
- id: ruff
# Run the formatter.
- id: ruff-format
- repo: https://github.com/codespell-project/codespell
rev: v2.4.1
hooks:
- id: codespell
# remove toml extra once Python 3.10 is no longer supported
additional_dependencies: ['.[toml]']

View file

@ -1,3 +0,0 @@
include LICENSE
include tox.ini
recursive-include tests *.py

86
pyproject.toml Normal file
View file

@ -0,0 +1,86 @@
[build-system]
requires = [
"setuptools==75.8.0",
"setuptools_scm==8.1.0",
]
build-backend = "setuptools.build_meta"
[project]
dynamic = ["version"]
name = "pyspamsum"
description = "A Python wrapper for Andrew Tridgell's spamsum algorithm"
readme = "README.rst"
requires-python = ">= 3.9"
authors = [
{name="Russell Keith-Magee", email="russell@keith-magee.com"}
]
maintainers = [
{name="Russell Keith-Magee", email="russell@keith-magee.com"}
]
keywords = [
"spamsum",
]
license.text = "New BSD"
classifiers=[
"Development Status :: 5 - Production/Stable",
"License :: OSI Approved :: BSD License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Topic :: Text Processing",
"Topic :: Utilities",
]
[project.optional-dependencies]
# Extras used by developers *of* briefcase are pinned to specific versions to
# ensure environment consistency.
dev = [
"pre-commit == 4.1.0",
"pytest == 8.3.4",
"ruff == 0.9.6",
"setuptools_scm == 8.1.0",
"tox == 4.24.1",
]
[project.urls]
Homepage = "https://github.com/freakboy3742/pyspamsum/"
Tracker = "https://github.com/freakboy3742/pyspamsum/issues"
Source = "https://github.com/freakboy3742/pyspamsum/"
[tool.pytest.ini_options]
testpaths = ["tests"]
filterwarnings = [
"error",
]
[tool.ruff.lint]
select = [
# pycodestyle
"E",
# Pyflakes
"F",
# pyupgrade
"UP",
# flake8-bugbear
"B",
# flake8-simplify
"SIM",
# isort
"I",
]
[tool.ruff.lint.isort]
known-first-party = ["spamsum"]
[tool.setuptools]
ext-modules = [
{name="spamsum", sources=["src/pyspamsum.c", "src/spamsum.c", "src/edit_dist.c"]},
]
[tool.setuptools_scm]
# To enable SCM versioning, we need an empty tool configuration for setuptools_scm

View file

@ -1,11 +0,0 @@
[flake8]
# https://flake8.readthedocs.org/en/latest/
exclude=\
*/.eggs/*,\
*/build/*,\
.tox/*,\
local/*,\
venv*
max-complexity = 25
max-line-length = 119

View file

@ -1,43 +0,0 @@
#!/usr/bin/env python
import io
from setuptools import setup, Extension
with io.open('README.rst', encoding='utf8') as readme:
long_description = readme.read()
setup(
name="pyspamsum",
version="1.0.5",
description="A Python wrapper for Andrew Tridgell's spamsum algorithm",
long_description=long_description,
long_description_content_type='text/x-rst',
author="Russell Keith-Magee",
author_email="russell@keith-magee.com",
url='http://github.com/freakboy3742/pyspamsum/',
license="New BSD",
classifiers=[
'Development Status :: 5 - Production/Stable',
'License :: OSI Approved :: BSD License',
'Operating System :: OS Independent',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Topic :: Text Processing',
'Topic :: Utilities',
],
ext_modules=[
Extension(
"spamsum", [
"pyspamsum.c",
"spamsum.c",
"edit_dist.c",
]
)
],
test_suite='tests',
)

View file

@ -159,7 +159,7 @@ register int from_len, to_len;
infinity))
Since this only looks at most two rows and three columns back, we need
only store the values for the two preceeding rows. In this
only store the values for the two preceding rows. In this
implementation, we do not explicitly store the zero column, so only 2 *
from_len + 2 words are needed. However, in the implementation of the
swap_cost check, the current matrix value is used as a buffer; we
@ -192,8 +192,8 @@ register int from_len, to_len;
strings are nonempty. We also don't need to consider swap costs in row
1.
COMMENT: the indicies row and col below point into the STRING, so
the corresponding MATRIX indicies are row+1 and col+1.
COMMENT: the indices row and col below point into the STRING, so
the corresponding MATRIX indices are row+1 and col+1.
*/
buffer[index++] = min2(ins + del, (from[0] == to[0] ? 0 : ch));
@ -266,4 +266,3 @@ register int from_len, to_len;
free((char *) buffer);
return row;
} /* edit_distn */

View file

@ -156,4 +156,3 @@ initspamsum(void)
return module;
#endif
}

View file

@ -3,7 +3,7 @@
Copyright Andrew Tridgell <tridge@samba.org> 2002
This code is released under the GNU General Public License version 2
or later. Alteratively, you may also use this code under the terms
or later. Alternatively, you may also use this code under the terms
of the Perl Artistic license.
If you wish to distribute this code under the terms of a different
@ -231,7 +231,7 @@ again:
we only accept a match if we have at least one common substring in
the signature of length ROLLING_WINDOW. This dramatically drops the
false positive rate for low score thresholds while having
negligable affect on the rate of spam detection.
negligible effect on the rate of spam detection.
return 1 if the two strings do have a common substring, 0 otherwise
*/
@ -242,7 +242,7 @@ static int has_common_substring(const char *s1, const char *s2)
u32 hashes[SPAMSUM_LENGTH];
/* there are many possible algorithms for common substring
detection. In this case I am re-using the rolling hash code
detection. In this case I am reusing the rolling hash code
to act as a filter for possible substring matches */
roll_reset();
@ -676,4 +676,4 @@ int main(int argc, char *argv[])
}
return 0;
}
}

View file

@ -1,52 +0,0 @@
import unittest
import spamsum
class SpamSumTest(unittest.TestCase):
def setUp(self):
self.s1 = "I am the very model of a modern Major-General, I've information animal and vegetable and mineral"
self.s2 = "I am the very model of a modern Brigadier, I've information animal and vegetable and something else"
self.s3 = "Huh? Gilbert and Who?"
def test_edit_distance(self):
self.assertEqual(spamsum.edit_distance(self.s1, self.s2), 27)
self.assertEqual(spamsum.edit_distance(self.s2, self.s1), 27)
self.assertEqual(spamsum.edit_distance(self.s1, self.s3), 93)
self.assertEqual(spamsum.edit_distance(self.s2, self.s3), 96)
def test_spamsum(self):
self.assertEqual(
spamsum.spamsum(self.s1),
'3:kEvyc/sFIKwYclQY4MKLFE4Igu0uLzIKygn:kE6Ai3KQ/MKOgDKZn'
)
self.assertEqual(
spamsum.spamsum(self.s2),
'3:kEvyc/sFIKwpErXLsCTApY4MKLFE4Igu0uLzWKIAYjtn:kE6Ai3jjTU/MKOgdK9Yjt'
)
self.assertEqual(
spamsum.spamsum(self.s3),
'3:uZ3B:uZx'
)
def test_match(self):
self.assertEqual(
spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s1)),
100
)
self.assertEqual(
spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s2)),
72
)
self.assertEqual(
spamsum.match(spamsum.spamsum(self.s2), spamsum.spamsum(self.s1)),
72
)
self.assertEqual(
spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s3)),
0
)
self.assertEqual(
spamsum.match(spamsum.spamsum(self.s2), spamsum.spamsum(self.s3)),
0
)

61
tests/test_spamsum.py Normal file
View file

@ -0,0 +1,61 @@
import pytest
import spamsum
GILBERT = (
"I am the very model of a modern Major-General, "
"I've information animal and vegetable and mineral"
)
NOT_GILBERT = (
"I am the very model of a modern Brigadier, "
"I've information animal and vegetable and something else"
)
IGNORANCE = "Huh? Gilbert and Who?"
@pytest.mark.parametrize(
"s1, s2, distance",
[
(GILBERT, NOT_GILBERT, 27),
(NOT_GILBERT, GILBERT, 27),
(GILBERT, IGNORANCE, 93),
(NOT_GILBERT, IGNORANCE, 96),
],
)
def test_edit_distance(s1, s2, distance):
assert spamsum.edit_distance(s1, s2) == distance
@pytest.mark.parametrize(
"value, expected",
[
(
GILBERT,
"3:kEvyc/sFIKwYclQY4MKLFE4Igu0uLzIKygn:kE6Ai3KQ/MKOgDKZn",
),
(
NOT_GILBERT,
"3:kEvyc/sFIKwpErXLsCTApY4MKLFE4Igu0uLzWKIAYjtn:kE6Ai3jjTU/MKOgdK9Yjt",
),
(
IGNORANCE,
"3:uZ3B:uZx",
),
],
)
def test_spamsum(value, expected):
assert spamsum.spamsum(value) == expected
@pytest.mark.parametrize(
"s1, s2, match",
[
(GILBERT, GILBERT, 100),
(GILBERT, NOT_GILBERT, 72),
(NOT_GILBERT, GILBERT, 72),
(GILBERT, IGNORANCE, 0),
(NOT_GILBERT, IGNORANCE, 0),
],
)
def test_match(s1, s2, match):
assert spamsum.match(spamsum.spamsum(s1), spamsum.spamsum(s2)) == match

30
tox.ini
View file

@ -1,25 +1,15 @@
[tox]
envlist = flake8,package-py{36,37,38,39},py{36,37,38,39}
envlist = pre-commit,py{39,310,311,312,313,314}
skip_missing_interpreters = true
[testenv]
commands =
python setup.py test
[testenv:pre-commit]
package = wheel
wheel_build_env = .pkg
extras = dev
commands = pre-commit run --all-files --show-diff-on-failure --color=always
[testenv:flake8]
skip_install = True
deps =
flake8
commands = flake8 {posargs}
[testenv:package-py{36,37,38,39}]
skip_install = True
deps =
check_manifest
wheel
twine
[testenv:py{,39,310,311,312,313,314}]
depends = pre-commit
extras = dev
commands =
check-manifest -v
python setup.py sdist bdist_wheel
python -m twine check dist/*
python -m pytest {posargs:-vv --color yes}