PEP621 project config refresh.
This commit is contained in:
parent
c0467a5c51
commit
08cebef85e
12 changed files with 195 additions and 139 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -7,6 +7,7 @@ dist
|
||||||
build
|
build
|
||||||
_build
|
_build
|
||||||
distribute-*
|
distribute-*
|
||||||
|
.ruff_cache/
|
||||||
.tox/
|
.tox/
|
||||||
.vscode/
|
.vscode/
|
||||||
venv/
|
venv/
|
||||||
|
|
|
||||||
29
.pre-commit-config.yaml
Normal file
29
.pre-commit-config.yaml
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
repos:
|
||||||
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
|
rev: v5.0.0
|
||||||
|
hooks:
|
||||||
|
- id: check-toml
|
||||||
|
- id: check-yaml
|
||||||
|
- id: check-case-conflict
|
||||||
|
- id: check-docstring-first
|
||||||
|
- id: end-of-file-fixer
|
||||||
|
- id: trailing-whitespace
|
||||||
|
# Docformatter 1.7.5 isn't compatible with Pre-commit 4.0
|
||||||
|
# - repo: https://github.com/PyCQA/docformatter
|
||||||
|
# rev: v1.7.5
|
||||||
|
# hooks:
|
||||||
|
# - id: docformatter
|
||||||
|
# args: [--in-place, --black]
|
||||||
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
|
rev: v0.9.6
|
||||||
|
hooks:
|
||||||
|
# Run the linter.
|
||||||
|
- id: ruff
|
||||||
|
# Run the formatter.
|
||||||
|
- id: ruff-format
|
||||||
|
- repo: https://github.com/codespell-project/codespell
|
||||||
|
rev: v2.4.1
|
||||||
|
hooks:
|
||||||
|
- id: codespell
|
||||||
|
# remove toml extra once Python 3.10 is no longer supported
|
||||||
|
additional_dependencies: ['.[toml]']
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
include LICENSE
|
|
||||||
include tox.ini
|
|
||||||
recursive-include tests *.py
|
|
||||||
86
pyproject.toml
Normal file
86
pyproject.toml
Normal file
|
|
@ -0,0 +1,86 @@
|
||||||
|
[build-system]
|
||||||
|
requires = [
|
||||||
|
"setuptools==75.8.0",
|
||||||
|
"setuptools_scm==8.1.0",
|
||||||
|
]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
dynamic = ["version"]
|
||||||
|
name = "pyspamsum"
|
||||||
|
description = "A Python wrapper for Andrew Tridgell's spamsum algorithm"
|
||||||
|
readme = "README.rst"
|
||||||
|
requires-python = ">= 3.9"
|
||||||
|
authors = [
|
||||||
|
{name="Russell Keith-Magee", email="russell@keith-magee.com"}
|
||||||
|
]
|
||||||
|
maintainers = [
|
||||||
|
{name="Russell Keith-Magee", email="russell@keith-magee.com"}
|
||||||
|
]
|
||||||
|
keywords = [
|
||||||
|
"spamsum",
|
||||||
|
]
|
||||||
|
license.text = "New BSD"
|
||||||
|
classifiers=[
|
||||||
|
"Development Status :: 5 - Production/Stable",
|
||||||
|
"License :: OSI Approved :: BSD License",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3.9",
|
||||||
|
"Programming Language :: Python :: 3.10",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12",
|
||||||
|
"Programming Language :: Python :: 3.13",
|
||||||
|
"Programming Language :: Python :: 3.14",
|
||||||
|
"Topic :: Text Processing",
|
||||||
|
"Topic :: Utilities",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
# Extras used by developers *of* briefcase are pinned to specific versions to
|
||||||
|
# ensure environment consistency.
|
||||||
|
dev = [
|
||||||
|
"pre-commit == 4.1.0",
|
||||||
|
"pytest == 8.3.4",
|
||||||
|
"ruff == 0.9.6",
|
||||||
|
"setuptools_scm == 8.1.0",
|
||||||
|
"tox == 4.24.1",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.urls]
|
||||||
|
Homepage = "https://github.com/freakboy3742/pyspamsum/"
|
||||||
|
Tracker = "https://github.com/freakboy3742/pyspamsum/issues"
|
||||||
|
Source = "https://github.com/freakboy3742/pyspamsum/"
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
testpaths = ["tests"]
|
||||||
|
filterwarnings = [
|
||||||
|
"error",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
select = [
|
||||||
|
# pycodestyle
|
||||||
|
"E",
|
||||||
|
# Pyflakes
|
||||||
|
"F",
|
||||||
|
# pyupgrade
|
||||||
|
"UP",
|
||||||
|
# flake8-bugbear
|
||||||
|
"B",
|
||||||
|
# flake8-simplify
|
||||||
|
"SIM",
|
||||||
|
# isort
|
||||||
|
"I",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.ruff.lint.isort]
|
||||||
|
known-first-party = ["spamsum"]
|
||||||
|
|
||||||
|
[tool.setuptools]
|
||||||
|
ext-modules = [
|
||||||
|
{name="spamsum", sources=["src/pyspamsum.c", "src/spamsum.c", "src/edit_dist.c"]},
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.setuptools_scm]
|
||||||
|
# To enable SCM versioning, we need an empty tool configuration for setuptools_scm
|
||||||
11
setup.cfg
11
setup.cfg
|
|
@ -1,11 +0,0 @@
|
||||||
|
|
||||||
[flake8]
|
|
||||||
# https://flake8.readthedocs.org/en/latest/
|
|
||||||
exclude=\
|
|
||||||
*/.eggs/*,\
|
|
||||||
*/build/*,\
|
|
||||||
.tox/*,\
|
|
||||||
local/*,\
|
|
||||||
venv*
|
|
||||||
max-complexity = 25
|
|
||||||
max-line-length = 119
|
|
||||||
43
setup.py
43
setup.py
|
|
@ -1,43 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
import io
|
|
||||||
|
|
||||||
from setuptools import setup, Extension
|
|
||||||
|
|
||||||
|
|
||||||
with io.open('README.rst', encoding='utf8') as readme:
|
|
||||||
long_description = readme.read()
|
|
||||||
|
|
||||||
|
|
||||||
setup(
|
|
||||||
name="pyspamsum",
|
|
||||||
version="1.0.5",
|
|
||||||
description="A Python wrapper for Andrew Tridgell's spamsum algorithm",
|
|
||||||
long_description=long_description,
|
|
||||||
long_description_content_type='text/x-rst',
|
|
||||||
author="Russell Keith-Magee",
|
|
||||||
author_email="russell@keith-magee.com",
|
|
||||||
url='http://github.com/freakboy3742/pyspamsum/',
|
|
||||||
license="New BSD",
|
|
||||||
classifiers=[
|
|
||||||
'Development Status :: 5 - Production/Stable',
|
|
||||||
'License :: OSI Approved :: BSD License',
|
|
||||||
'Operating System :: OS Independent',
|
|
||||||
'Programming Language :: Python :: 3',
|
|
||||||
'Programming Language :: Python :: 3.6',
|
|
||||||
'Programming Language :: Python :: 3.7',
|
|
||||||
'Programming Language :: Python :: 3.8',
|
|
||||||
'Programming Language :: Python :: 3.9',
|
|
||||||
'Topic :: Text Processing',
|
|
||||||
'Topic :: Utilities',
|
|
||||||
],
|
|
||||||
ext_modules=[
|
|
||||||
Extension(
|
|
||||||
"spamsum", [
|
|
||||||
"pyspamsum.c",
|
|
||||||
"spamsum.c",
|
|
||||||
"edit_dist.c",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
],
|
|
||||||
test_suite='tests',
|
|
||||||
)
|
|
||||||
|
|
@ -159,7 +159,7 @@ register int from_len, to_len;
|
||||||
infinity))
|
infinity))
|
||||||
|
|
||||||
Since this only looks at most two rows and three columns back, we need
|
Since this only looks at most two rows and three columns back, we need
|
||||||
only store the values for the two preceeding rows. In this
|
only store the values for the two preceding rows. In this
|
||||||
implementation, we do not explicitly store the zero column, so only 2 *
|
implementation, we do not explicitly store the zero column, so only 2 *
|
||||||
from_len + 2 words are needed. However, in the implementation of the
|
from_len + 2 words are needed. However, in the implementation of the
|
||||||
swap_cost check, the current matrix value is used as a buffer; we
|
swap_cost check, the current matrix value is used as a buffer; we
|
||||||
|
|
@ -192,8 +192,8 @@ register int from_len, to_len;
|
||||||
strings are nonempty. We also don't need to consider swap costs in row
|
strings are nonempty. We also don't need to consider swap costs in row
|
||||||
1.
|
1.
|
||||||
|
|
||||||
COMMENT: the indicies row and col below point into the STRING, so
|
COMMENT: the indices row and col below point into the STRING, so
|
||||||
the corresponding MATRIX indicies are row+1 and col+1.
|
the corresponding MATRIX indices are row+1 and col+1.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
buffer[index++] = min2(ins + del, (from[0] == to[0] ? 0 : ch));
|
buffer[index++] = min2(ins + del, (from[0] == to[0] ? 0 : ch));
|
||||||
|
|
@ -266,4 +266,3 @@ register int from_len, to_len;
|
||||||
free((char *) buffer);
|
free((char *) buffer);
|
||||||
return row;
|
return row;
|
||||||
} /* edit_distn */
|
} /* edit_distn */
|
||||||
|
|
||||||
|
|
@ -156,4 +156,3 @@ initspamsum(void)
|
||||||
return module;
|
return module;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
Copyright Andrew Tridgell <tridge@samba.org> 2002
|
Copyright Andrew Tridgell <tridge@samba.org> 2002
|
||||||
|
|
||||||
This code is released under the GNU General Public License version 2
|
This code is released under the GNU General Public License version 2
|
||||||
or later. Alteratively, you may also use this code under the terms
|
or later. Alternatively, you may also use this code under the terms
|
||||||
of the Perl Artistic license.
|
of the Perl Artistic license.
|
||||||
|
|
||||||
If you wish to distribute this code under the terms of a different
|
If you wish to distribute this code under the terms of a different
|
||||||
|
|
@ -231,7 +231,7 @@ again:
|
||||||
we only accept a match if we have at least one common substring in
|
we only accept a match if we have at least one common substring in
|
||||||
the signature of length ROLLING_WINDOW. This dramatically drops the
|
the signature of length ROLLING_WINDOW. This dramatically drops the
|
||||||
false positive rate for low score thresholds while having
|
false positive rate for low score thresholds while having
|
||||||
negligable affect on the rate of spam detection.
|
negligible effect on the rate of spam detection.
|
||||||
|
|
||||||
return 1 if the two strings do have a common substring, 0 otherwise
|
return 1 if the two strings do have a common substring, 0 otherwise
|
||||||
*/
|
*/
|
||||||
|
|
@ -242,7 +242,7 @@ static int has_common_substring(const char *s1, const char *s2)
|
||||||
u32 hashes[SPAMSUM_LENGTH];
|
u32 hashes[SPAMSUM_LENGTH];
|
||||||
|
|
||||||
/* there are many possible algorithms for common substring
|
/* there are many possible algorithms for common substring
|
||||||
detection. In this case I am re-using the rolling hash code
|
detection. In this case I am reusing the rolling hash code
|
||||||
to act as a filter for possible substring matches */
|
to act as a filter for possible substring matches */
|
||||||
|
|
||||||
roll_reset();
|
roll_reset();
|
||||||
|
|
@ -676,4 +676,4 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -1,52 +0,0 @@
|
||||||
import unittest
|
|
||||||
|
|
||||||
import spamsum
|
|
||||||
|
|
||||||
|
|
||||||
class SpamSumTest(unittest.TestCase):
|
|
||||||
def setUp(self):
|
|
||||||
self.s1 = "I am the very model of a modern Major-General, I've information animal and vegetable and mineral"
|
|
||||||
self.s2 = "I am the very model of a modern Brigadier, I've information animal and vegetable and something else"
|
|
||||||
self.s3 = "Huh? Gilbert and Who?"
|
|
||||||
|
|
||||||
def test_edit_distance(self):
|
|
||||||
self.assertEqual(spamsum.edit_distance(self.s1, self.s2), 27)
|
|
||||||
self.assertEqual(spamsum.edit_distance(self.s2, self.s1), 27)
|
|
||||||
self.assertEqual(spamsum.edit_distance(self.s1, self.s3), 93)
|
|
||||||
self.assertEqual(spamsum.edit_distance(self.s2, self.s3), 96)
|
|
||||||
|
|
||||||
def test_spamsum(self):
|
|
||||||
self.assertEqual(
|
|
||||||
spamsum.spamsum(self.s1),
|
|
||||||
'3:kEvyc/sFIKwYclQY4MKLFE4Igu0uLzIKygn:kE6Ai3KQ/MKOgDKZn'
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
spamsum.spamsum(self.s2),
|
|
||||||
'3:kEvyc/sFIKwpErXLsCTApY4MKLFE4Igu0uLzWKIAYjtn:kE6Ai3jjTU/MKOgdK9Yjt'
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
spamsum.spamsum(self.s3),
|
|
||||||
'3:uZ3B:uZx'
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_match(self):
|
|
||||||
self.assertEqual(
|
|
||||||
spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s1)),
|
|
||||||
100
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s2)),
|
|
||||||
72
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
spamsum.match(spamsum.spamsum(self.s2), spamsum.spamsum(self.s1)),
|
|
||||||
72
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s3)),
|
|
||||||
0
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
spamsum.match(spamsum.spamsum(self.s2), spamsum.spamsum(self.s3)),
|
|
||||||
0
|
|
||||||
)
|
|
||||||
61
tests/test_spamsum.py
Normal file
61
tests/test_spamsum.py
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import spamsum
|
||||||
|
|
||||||
|
GILBERT = (
|
||||||
|
"I am the very model of a modern Major-General, "
|
||||||
|
"I've information animal and vegetable and mineral"
|
||||||
|
)
|
||||||
|
NOT_GILBERT = (
|
||||||
|
"I am the very model of a modern Brigadier, "
|
||||||
|
"I've information animal and vegetable and something else"
|
||||||
|
)
|
||||||
|
IGNORANCE = "Huh? Gilbert and Who?"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"s1, s2, distance",
|
||||||
|
[
|
||||||
|
(GILBERT, NOT_GILBERT, 27),
|
||||||
|
(NOT_GILBERT, GILBERT, 27),
|
||||||
|
(GILBERT, IGNORANCE, 93),
|
||||||
|
(NOT_GILBERT, IGNORANCE, 96),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_edit_distance(s1, s2, distance):
|
||||||
|
assert spamsum.edit_distance(s1, s2) == distance
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"value, expected",
|
||||||
|
[
|
||||||
|
(
|
||||||
|
GILBERT,
|
||||||
|
"3:kEvyc/sFIKwYclQY4MKLFE4Igu0uLzIKygn:kE6Ai3KQ/MKOgDKZn",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
NOT_GILBERT,
|
||||||
|
"3:kEvyc/sFIKwpErXLsCTApY4MKLFE4Igu0uLzWKIAYjtn:kE6Ai3jjTU/MKOgdK9Yjt",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
IGNORANCE,
|
||||||
|
"3:uZ3B:uZx",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_spamsum(value, expected):
|
||||||
|
assert spamsum.spamsum(value) == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"s1, s2, match",
|
||||||
|
[
|
||||||
|
(GILBERT, GILBERT, 100),
|
||||||
|
(GILBERT, NOT_GILBERT, 72),
|
||||||
|
(NOT_GILBERT, GILBERT, 72),
|
||||||
|
(GILBERT, IGNORANCE, 0),
|
||||||
|
(NOT_GILBERT, IGNORANCE, 0),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_match(s1, s2, match):
|
||||||
|
assert spamsum.match(spamsum.spamsum(s1), spamsum.spamsum(s2)) == match
|
||||||
30
tox.ini
30
tox.ini
|
|
@ -1,25 +1,15 @@
|
||||||
|
|
||||||
[tox]
|
[tox]
|
||||||
envlist = flake8,package-py{36,37,38,39},py{36,37,38,39}
|
envlist = pre-commit,py{39,310,311,312,313,314}
|
||||||
skip_missing_interpreters = true
|
skip_missing_interpreters = true
|
||||||
|
|
||||||
[testenv]
|
[testenv:pre-commit]
|
||||||
commands =
|
package = wheel
|
||||||
python setup.py test
|
wheel_build_env = .pkg
|
||||||
|
extras = dev
|
||||||
|
commands = pre-commit run --all-files --show-diff-on-failure --color=always
|
||||||
|
|
||||||
[testenv:flake8]
|
[testenv:py{,39,310,311,312,313,314}]
|
||||||
skip_install = True
|
depends = pre-commit
|
||||||
deps =
|
extras = dev
|
||||||
flake8
|
|
||||||
commands = flake8 {posargs}
|
|
||||||
|
|
||||||
[testenv:package-py{36,37,38,39}]
|
|
||||||
skip_install = True
|
|
||||||
deps =
|
|
||||||
check_manifest
|
|
||||||
wheel
|
|
||||||
twine
|
|
||||||
commands =
|
commands =
|
||||||
check-manifest -v
|
python -m pytest {posargs:-vv --color yes}
|
||||||
python setup.py sdist bdist_wheel
|
|
||||||
python -m twine check dist/*
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue