PEP621 project config refresh.
This commit is contained in:
parent
c0467a5c51
commit
08cebef85e
12 changed files with 195 additions and 139 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -7,6 +7,7 @@ dist
|
|||
build
|
||||
_build
|
||||
distribute-*
|
||||
.ruff_cache/
|
||||
.tox/
|
||||
.vscode/
|
||||
venv/
|
||||
29
.pre-commit-config.yaml
Normal file
29
.pre-commit-config.yaml
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: check-toml
|
||||
- id: check-yaml
|
||||
- id: check-case-conflict
|
||||
- id: check-docstring-first
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
# Docformatter 1.7.5 isn't compatible with Pre-commit 4.0
|
||||
# - repo: https://github.com/PyCQA/docformatter
|
||||
# rev: v1.7.5
|
||||
# hooks:
|
||||
# - id: docformatter
|
||||
# args: [--in-place, --black]
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.9.6
|
||||
hooks:
|
||||
# Run the linter.
|
||||
- id: ruff
|
||||
# Run the formatter.
|
||||
- id: ruff-format
|
||||
- repo: https://github.com/codespell-project/codespell
|
||||
rev: v2.4.1
|
||||
hooks:
|
||||
- id: codespell
|
||||
# remove toml extra once Python 3.10 is no longer supported
|
||||
additional_dependencies: ['.[toml]']
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
include LICENSE
|
||||
include tox.ini
|
||||
recursive-include tests *.py
|
||||
86
pyproject.toml
Normal file
86
pyproject.toml
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
[build-system]
|
||||
requires = [
|
||||
"setuptools==75.8.0",
|
||||
"setuptools_scm==8.1.0",
|
||||
]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
dynamic = ["version"]
|
||||
name = "pyspamsum"
|
||||
description = "A Python wrapper for Andrew Tridgell's spamsum algorithm"
|
||||
readme = "README.rst"
|
||||
requires-python = ">= 3.9"
|
||||
authors = [
|
||||
{name="Russell Keith-Magee", email="russell@keith-magee.com"}
|
||||
]
|
||||
maintainers = [
|
||||
{name="Russell Keith-Magee", email="russell@keith-magee.com"}
|
||||
]
|
||||
keywords = [
|
||||
"spamsum",
|
||||
]
|
||||
license.text = "New BSD"
|
||||
classifiers=[
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"License :: OSI Approved :: BSD License",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Programming Language :: Python :: 3.14",
|
||||
"Topic :: Text Processing",
|
||||
"Topic :: Utilities",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
# Extras used by developers *of* briefcase are pinned to specific versions to
|
||||
# ensure environment consistency.
|
||||
dev = [
|
||||
"pre-commit == 4.1.0",
|
||||
"pytest == 8.3.4",
|
||||
"ruff == 0.9.6",
|
||||
"setuptools_scm == 8.1.0",
|
||||
"tox == 4.24.1",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/freakboy3742/pyspamsum/"
|
||||
Tracker = "https://github.com/freakboy3742/pyspamsum/issues"
|
||||
Source = "https://github.com/freakboy3742/pyspamsum/"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
filterwarnings = [
|
||||
"error",
|
||||
]
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = [
|
||||
# pycodestyle
|
||||
"E",
|
||||
# Pyflakes
|
||||
"F",
|
||||
# pyupgrade
|
||||
"UP",
|
||||
# flake8-bugbear
|
||||
"B",
|
||||
# flake8-simplify
|
||||
"SIM",
|
||||
# isort
|
||||
"I",
|
||||
]
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
known-first-party = ["spamsum"]
|
||||
|
||||
[tool.setuptools]
|
||||
ext-modules = [
|
||||
{name="spamsum", sources=["src/pyspamsum.c", "src/spamsum.c", "src/edit_dist.c"]},
|
||||
]
|
||||
|
||||
[tool.setuptools_scm]
|
||||
# To enable SCM versioning, we need an empty tool configuration for setuptools_scm
|
||||
11
setup.cfg
11
setup.cfg
|
|
@ -1,11 +0,0 @@
|
|||
|
||||
[flake8]
|
||||
# https://flake8.readthedocs.org/en/latest/
|
||||
exclude=\
|
||||
*/.eggs/*,\
|
||||
*/build/*,\
|
||||
.tox/*,\
|
||||
local/*,\
|
||||
venv*
|
||||
max-complexity = 25
|
||||
max-line-length = 119
|
||||
43
setup.py
43
setup.py
|
|
@ -1,43 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
import io
|
||||
|
||||
from setuptools import setup, Extension
|
||||
|
||||
|
||||
with io.open('README.rst', encoding='utf8') as readme:
|
||||
long_description = readme.read()
|
||||
|
||||
|
||||
setup(
|
||||
name="pyspamsum",
|
||||
version="1.0.5",
|
||||
description="A Python wrapper for Andrew Tridgell's spamsum algorithm",
|
||||
long_description=long_description,
|
||||
long_description_content_type='text/x-rst',
|
||||
author="Russell Keith-Magee",
|
||||
author_email="russell@keith-magee.com",
|
||||
url='http://github.com/freakboy3742/pyspamsum/',
|
||||
license="New BSD",
|
||||
classifiers=[
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'License :: OSI Approved :: BSD License',
|
||||
'Operating System :: OS Independent',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
'Topic :: Text Processing',
|
||||
'Topic :: Utilities',
|
||||
],
|
||||
ext_modules=[
|
||||
Extension(
|
||||
"spamsum", [
|
||||
"pyspamsum.c",
|
||||
"spamsum.c",
|
||||
"edit_dist.c",
|
||||
]
|
||||
)
|
||||
],
|
||||
test_suite='tests',
|
||||
)
|
||||
|
|
@ -159,7 +159,7 @@ register int from_len, to_len;
|
|||
infinity))
|
||||
|
||||
Since this only looks at most two rows and three columns back, we need
|
||||
only store the values for the two preceeding rows. In this
|
||||
only store the values for the two preceding rows. In this
|
||||
implementation, we do not explicitly store the zero column, so only 2 *
|
||||
from_len + 2 words are needed. However, in the implementation of the
|
||||
swap_cost check, the current matrix value is used as a buffer; we
|
||||
|
|
@ -192,8 +192,8 @@ register int from_len, to_len;
|
|||
strings are nonempty. We also don't need to consider swap costs in row
|
||||
1.
|
||||
|
||||
COMMENT: the indicies row and col below point into the STRING, so
|
||||
the corresponding MATRIX indicies are row+1 and col+1.
|
||||
COMMENT: the indices row and col below point into the STRING, so
|
||||
the corresponding MATRIX indices are row+1 and col+1.
|
||||
*/
|
||||
|
||||
buffer[index++] = min2(ins + del, (from[0] == to[0] ? 0 : ch));
|
||||
|
|
@ -266,4 +266,3 @@ register int from_len, to_len;
|
|||
free((char *) buffer);
|
||||
return row;
|
||||
} /* edit_distn */
|
||||
|
||||
|
|
@ -156,4 +156,3 @@ initspamsum(void)
|
|||
return module;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -3,7 +3,7 @@
|
|||
Copyright Andrew Tridgell <tridge@samba.org> 2002
|
||||
|
||||
This code is released under the GNU General Public License version 2
|
||||
or later. Alteratively, you may also use this code under the terms
|
||||
or later. Alternatively, you may also use this code under the terms
|
||||
of the Perl Artistic license.
|
||||
|
||||
If you wish to distribute this code under the terms of a different
|
||||
|
|
@ -231,7 +231,7 @@ again:
|
|||
we only accept a match if we have at least one common substring in
|
||||
the signature of length ROLLING_WINDOW. This dramatically drops the
|
||||
false positive rate for low score thresholds while having
|
||||
negligable affect on the rate of spam detection.
|
||||
negligible effect on the rate of spam detection.
|
||||
|
||||
return 1 if the two strings do have a common substring, 0 otherwise
|
||||
*/
|
||||
|
|
@ -242,7 +242,7 @@ static int has_common_substring(const char *s1, const char *s2)
|
|||
u32 hashes[SPAMSUM_LENGTH];
|
||||
|
||||
/* there are many possible algorithms for common substring
|
||||
detection. In this case I am re-using the rolling hash code
|
||||
detection. In this case I am reusing the rolling hash code
|
||||
to act as a filter for possible substring matches */
|
||||
|
||||
roll_reset();
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
import unittest
|
||||
|
||||
import spamsum
|
||||
|
||||
|
||||
class SpamSumTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.s1 = "I am the very model of a modern Major-General, I've information animal and vegetable and mineral"
|
||||
self.s2 = "I am the very model of a modern Brigadier, I've information animal and vegetable and something else"
|
||||
self.s3 = "Huh? Gilbert and Who?"
|
||||
|
||||
def test_edit_distance(self):
|
||||
self.assertEqual(spamsum.edit_distance(self.s1, self.s2), 27)
|
||||
self.assertEqual(spamsum.edit_distance(self.s2, self.s1), 27)
|
||||
self.assertEqual(spamsum.edit_distance(self.s1, self.s3), 93)
|
||||
self.assertEqual(spamsum.edit_distance(self.s2, self.s3), 96)
|
||||
|
||||
def test_spamsum(self):
|
||||
self.assertEqual(
|
||||
spamsum.spamsum(self.s1),
|
||||
'3:kEvyc/sFIKwYclQY4MKLFE4Igu0uLzIKygn:kE6Ai3KQ/MKOgDKZn'
|
||||
)
|
||||
self.assertEqual(
|
||||
spamsum.spamsum(self.s2),
|
||||
'3:kEvyc/sFIKwpErXLsCTApY4MKLFE4Igu0uLzWKIAYjtn:kE6Ai3jjTU/MKOgdK9Yjt'
|
||||
)
|
||||
self.assertEqual(
|
||||
spamsum.spamsum(self.s3),
|
||||
'3:uZ3B:uZx'
|
||||
)
|
||||
|
||||
def test_match(self):
|
||||
self.assertEqual(
|
||||
spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s1)),
|
||||
100
|
||||
)
|
||||
self.assertEqual(
|
||||
spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s2)),
|
||||
72
|
||||
)
|
||||
self.assertEqual(
|
||||
spamsum.match(spamsum.spamsum(self.s2), spamsum.spamsum(self.s1)),
|
||||
72
|
||||
)
|
||||
self.assertEqual(
|
||||
spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s3)),
|
||||
0
|
||||
)
|
||||
self.assertEqual(
|
||||
spamsum.match(spamsum.spamsum(self.s2), spamsum.spamsum(self.s3)),
|
||||
0
|
||||
)
|
||||
61
tests/test_spamsum.py
Normal file
61
tests/test_spamsum.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
import pytest
|
||||
|
||||
import spamsum
|
||||
|
||||
GILBERT = (
|
||||
"I am the very model of a modern Major-General, "
|
||||
"I've information animal and vegetable and mineral"
|
||||
)
|
||||
NOT_GILBERT = (
|
||||
"I am the very model of a modern Brigadier, "
|
||||
"I've information animal and vegetable and something else"
|
||||
)
|
||||
IGNORANCE = "Huh? Gilbert and Who?"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"s1, s2, distance",
|
||||
[
|
||||
(GILBERT, NOT_GILBERT, 27),
|
||||
(NOT_GILBERT, GILBERT, 27),
|
||||
(GILBERT, IGNORANCE, 93),
|
||||
(NOT_GILBERT, IGNORANCE, 96),
|
||||
],
|
||||
)
|
||||
def test_edit_distance(s1, s2, distance):
|
||||
assert spamsum.edit_distance(s1, s2) == distance
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value, expected",
|
||||
[
|
||||
(
|
||||
GILBERT,
|
||||
"3:kEvyc/sFIKwYclQY4MKLFE4Igu0uLzIKygn:kE6Ai3KQ/MKOgDKZn",
|
||||
),
|
||||
(
|
||||
NOT_GILBERT,
|
||||
"3:kEvyc/sFIKwpErXLsCTApY4MKLFE4Igu0uLzWKIAYjtn:kE6Ai3jjTU/MKOgdK9Yjt",
|
||||
),
|
||||
(
|
||||
IGNORANCE,
|
||||
"3:uZ3B:uZx",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_spamsum(value, expected):
|
||||
assert spamsum.spamsum(value) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"s1, s2, match",
|
||||
[
|
||||
(GILBERT, GILBERT, 100),
|
||||
(GILBERT, NOT_GILBERT, 72),
|
||||
(NOT_GILBERT, GILBERT, 72),
|
||||
(GILBERT, IGNORANCE, 0),
|
||||
(NOT_GILBERT, IGNORANCE, 0),
|
||||
],
|
||||
)
|
||||
def test_match(s1, s2, match):
|
||||
assert spamsum.match(spamsum.spamsum(s1), spamsum.spamsum(s2)) == match
|
||||
30
tox.ini
30
tox.ini
|
|
@ -1,25 +1,15 @@
|
|||
|
||||
[tox]
|
||||
envlist = flake8,package-py{36,37,38,39},py{36,37,38,39}
|
||||
envlist = pre-commit,py{39,310,311,312,313,314}
|
||||
skip_missing_interpreters = true
|
||||
|
||||
[testenv]
|
||||
commands =
|
||||
python setup.py test
|
||||
[testenv:pre-commit]
|
||||
package = wheel
|
||||
wheel_build_env = .pkg
|
||||
extras = dev
|
||||
commands = pre-commit run --all-files --show-diff-on-failure --color=always
|
||||
|
||||
[testenv:flake8]
|
||||
skip_install = True
|
||||
deps =
|
||||
flake8
|
||||
commands = flake8 {posargs}
|
||||
|
||||
[testenv:package-py{36,37,38,39}]
|
||||
skip_install = True
|
||||
deps =
|
||||
check_manifest
|
||||
wheel
|
||||
twine
|
||||
[testenv:py{,39,310,311,312,313,314}]
|
||||
depends = pre-commit
|
||||
extras = dev
|
||||
commands =
|
||||
check-manifest -v
|
||||
python setup.py sdist bdist_wheel
|
||||
python -m twine check dist/*
|
||||
python -m pytest {posargs:-vv --color yes}
|
||||
|
|
|
|||
Loading…
Reference in a new issue