PEP621 project config refresh.

2025-02-18 14:06:59 +08:00 · 2025-02-18 14:06:59 +08:00 · 08cebef85e
commit 08cebef85e
parent c0467a5c51
12 changed files with 195 additions and 139 deletions
--- a/.gitignore
+++ b/.gitignore
@ -7,6 +7,7 @@ dist
 build
 _build
 distribute-*
+.ruff_cache/
 .tox/
 .vscode/
 venv/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,29 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: check-toml
+      - id: check-yaml
+      - id: check-case-conflict
+      - id: check-docstring-first
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+  # Docformatter 1.7.5 isn't compatible with Pre-commit 4.0
+  # - repo: https://github.com/PyCQA/docformatter
+  #   rev: v1.7.5
+  #   hooks:
+  #     - id: docformatter
+  #       args: [--in-place, --black]
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.9.6
+    hooks:
+      # Run the linter.
+      - id: ruff
+      # Run the formatter.
+      - id: ruff-format
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.4.1
+    hooks:
+    - id: codespell
+      # remove toml extra once Python 3.10 is no longer supported
+      additional_dependencies: ['.[toml]']
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,3 +0,0 @@
-include LICENSE
-include tox.ini
-recursive-include tests *.py
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,86 @@
+[build-system]
+requires = [
+    "setuptools==75.8.0",
+    "setuptools_scm==8.1.0",
+]
+build-backend = "setuptools.build_meta"
+
+[project]
+dynamic = ["version"]
+name = "pyspamsum"
+description = "A Python wrapper for Andrew Tridgell's spamsum algorithm"
+readme = "README.rst"
+requires-python = ">= 3.9"
+authors = [
+    {name="Russell Keith-Magee", email="russell@keith-magee.com"}
+]
+maintainers = [
+    {name="Russell Keith-Magee", email="russell@keith-magee.com"}
+]
+keywords = [
+    "spamsum",
+]
+license.text = "New BSD"
+classifiers=[
+    "Development Status :: 5 - Production/Stable",
+    "License :: OSI Approved :: BSD License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
+    "Topic :: Text Processing",
+    "Topic :: Utilities",
+]
+
+[project.optional-dependencies]
+# Extras used by developers *of* briefcase are pinned to specific versions to
+# ensure environment consistency.
+dev = [
+    "pre-commit == 4.1.0",
+    "pytest == 8.3.4",
+    "ruff == 0.9.6",
+    "setuptools_scm == 8.1.0",
+    "tox == 4.24.1",
+]
+
+[project.urls]
+Homepage = "https://github.com/freakboy3742/pyspamsum/"
+Tracker = "https://github.com/freakboy3742/pyspamsum/issues"
+Source = "https://github.com/freakboy3742/pyspamsum/"
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+filterwarnings = [
+    "error",
+]
+
+[tool.ruff.lint]
+select = [
+    # pycodestyle
+    "E",
+    # Pyflakes
+    "F",
+    # pyupgrade
+    "UP",
+    # flake8-bugbear
+    "B",
+    # flake8-simplify
+    "SIM",
+    # isort
+    "I",
+]
+
+[tool.ruff.lint.isort]
+known-first-party = ["spamsum"]
+
+[tool.setuptools]
+ext-modules = [
+    {name="spamsum", sources=["src/pyspamsum.c", "src/spamsum.c", "src/edit_dist.c"]},
+]
+
+[tool.setuptools_scm]
+# To enable SCM versioning, we need an empty tool configuration for setuptools_scm
--- a/setup.cfg
+++ b/setup.cfg
@ -1,11 +0,0 @@
-
-[flake8]
-# https://flake8.readthedocs.org/en/latest/
-exclude=\
-    */.eggs/*,\
-    */build/*,\
-    .tox/*,\
-    local/*,\
-    venv*
-max-complexity = 25
-max-line-length = 119
--- a/setup.py
+++ b/setup.py
@ -1,43 +0,0 @@
-#!/usr/bin/env python
-import io
-
-from setuptools import setup, Extension
-
-
-with io.open('README.rst', encoding='utf8') as readme:
-    long_description = readme.read()
-
-
-setup(
-    name="pyspamsum",
-    version="1.0.5",
-    description="A Python wrapper for Andrew Tridgell's spamsum algorithm",
-    long_description=long_description,
-    long_description_content_type='text/x-rst',
-    author="Russell Keith-Magee",
-    author_email="russell@keith-magee.com",
-    url='http://github.com/freakboy3742/pyspamsum/',
-    license="New BSD",
-    classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        'License :: OSI Approved :: BSD License',
-        'Operating System :: OS Independent',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: 3.7',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: 3.9',
-        'Topic :: Text Processing',
-        'Topic :: Utilities',
-    ],
-    ext_modules=[
-        Extension(
-            "spamsum", [
-                "pyspamsum.c",
-                "spamsum.c",
-                "edit_dist.c",
-            ]
-        )
-    ],
-    test_suite='tests',
-)
--- a/src/edit_dist.c
+++ b/src/edit_dist.c
@ -159,7 +159,7 @@ register int from_len, to_len;
 					  infinity))

   Since this only looks at most two rows and three columns back, we need
-   only store the values for the two preceeding rows.  In this
+   only store the values for the two preceding rows.  In this
   implementation, we do not explicitly store the zero column, so only 2 *
   from_len + 2   words are needed.  However, in the implementation of the
   swap_cost   check, the current matrix value is used as a buffer; we
@ -192,8 +192,8 @@ register int from_len, to_len;
   strings are nonempty.  We also don't need to consider swap costs in row
   1.

-   COMMENT:  the indicies   row and col   below point into the STRING, so
-   the corresponding MATRIX indicies are   row+1 and col+1.
+   COMMENT: the indices row and col below point into the STRING, so
+   the corresponding MATRIX indices are row+1 and col+1.
 */

    buffer[index++] = min2(ins + del, (from[0] == to[0] ? 0 : ch));
@ -266,4 +266,3 @@ register int from_len, to_len;
 	free((char *) buffer);
    return row;
 } /* edit_distn */
-
--- a/src/pyspamsum.c
+++ b/src/pyspamsum.c
@ -156,4 +156,3 @@ initspamsum(void)
    return module;
 #endif
 }
-
--- a/src/spamsum.c
+++ b/src/spamsum.c
@ -3,7 +3,7 @@
  Copyright Andrew Tridgell <tridge@samba.org> 2002

  This code is released under the GNU General Public License version 2
-  or later.  Alteratively, you may also use this code under the terms
+  or later.  Alternatively, you may also use this code under the terms
  of the Perl Artistic license.

  If you wish to distribute this code under the terms of a different
@ -231,7 +231,7 @@ again:
   we only accept a match if we have at least one common substring in
   the signature of length ROLLING_WINDOW. This dramatically drops the
   false positive rate for low score thresholds while having
-   negligable affect on the rate of spam detection.
+   negligible effect on the rate of spam detection.

   return 1 if the two strings do have a common substring, 0 otherwise
 */
@ -242,7 +242,7 @@ static int has_common_substring(const char *s1, const char *s2)
 	u32 hashes[SPAMSUM_LENGTH];

 	/* there are many possible algorithms for common substring
-	   detection. In this case I am re-using the rolling hash code
+	   detection. In this case I am reusing the rolling hash code
 	   to act as a filter for possible substring matches */

 	roll_reset();
--- a/tests/init.py
+++ b/tests/init.py
@ -1,52 +0,0 @@
-import unittest
-
-import spamsum
-
-
-class SpamSumTest(unittest.TestCase):
-    def setUp(self):
-        self.s1 = "I am the very model of a modern Major-General, I've information animal and vegetable and mineral"
-        self.s2 = "I am the very model of a modern Brigadier, I've information animal and vegetable and something else"
-        self.s3 = "Huh? Gilbert and Who?"
-
-    def test_edit_distance(self):
-        self.assertEqual(spamsum.edit_distance(self.s1, self.s2), 27)
-        self.assertEqual(spamsum.edit_distance(self.s2, self.s1), 27)
-        self.assertEqual(spamsum.edit_distance(self.s1, self.s3), 93)
-        self.assertEqual(spamsum.edit_distance(self.s2, self.s3), 96)
-
-    def test_spamsum(self):
-        self.assertEqual(
-            spamsum.spamsum(self.s1),
-            '3:kEvyc/sFIKwYclQY4MKLFE4Igu0uLzIKygn:kE6Ai3KQ/MKOgDKZn'
-        )
-        self.assertEqual(
-            spamsum.spamsum(self.s2),
-            '3:kEvyc/sFIKwpErXLsCTApY4MKLFE4Igu0uLzWKIAYjtn:kE6Ai3jjTU/MKOgdK9Yjt'
-        )
-        self.assertEqual(
-            spamsum.spamsum(self.s3),
-            '3:uZ3B:uZx'
-        )
-
-    def test_match(self):
-        self.assertEqual(
-            spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s1)),
-            100
-        )
-        self.assertEqual(
-            spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s2)),
-            72
-        )
-        self.assertEqual(
-            spamsum.match(spamsum.spamsum(self.s2), spamsum.spamsum(self.s1)),
-            72
-        )
-        self.assertEqual(
-            spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s3)),
-            0
-        )
-        self.assertEqual(
-            spamsum.match(spamsum.spamsum(self.s2), spamsum.spamsum(self.s3)),
-            0
-        )
--- a/tests/test_spamsum.py
+++ b/tests/test_spamsum.py
@ -0,0 +1,61 @@
+import pytest
+
+import spamsum
+
+GILBERT = (
+    "I am the very model of a modern Major-General, "
+    "I've information animal and vegetable and mineral"
+)
+NOT_GILBERT = (
+    "I am the very model of a modern Brigadier, "
+    "I've information animal and vegetable and something else"
+)
+IGNORANCE = "Huh? Gilbert and Who?"
+
+
+@pytest.mark.parametrize(
+    "s1, s2, distance",
+    [
+        (GILBERT, NOT_GILBERT, 27),
+        (NOT_GILBERT, GILBERT, 27),
+        (GILBERT, IGNORANCE, 93),
+        (NOT_GILBERT, IGNORANCE, 96),
+    ],
+)
+def test_edit_distance(s1, s2, distance):
+    assert spamsum.edit_distance(s1, s2) == distance
+
+
+@pytest.mark.parametrize(
+    "value, expected",
+    [
+        (
+            GILBERT,
+            "3:kEvyc/sFIKwYclQY4MKLFE4Igu0uLzIKygn:kE6Ai3KQ/MKOgDKZn",
+        ),
+        (
+            NOT_GILBERT,
+            "3:kEvyc/sFIKwpErXLsCTApY4MKLFE4Igu0uLzWKIAYjtn:kE6Ai3jjTU/MKOgdK9Yjt",
+        ),
+        (
+            IGNORANCE,
+            "3:uZ3B:uZx",
+        ),
+    ],
+)
+def test_spamsum(value, expected):
+    assert spamsum.spamsum(value) == expected
+
+
+@pytest.mark.parametrize(
+    "s1, s2, match",
+    [
+        (GILBERT, GILBERT, 100),
+        (GILBERT, NOT_GILBERT, 72),
+        (NOT_GILBERT, GILBERT, 72),
+        (GILBERT, IGNORANCE, 0),
+        (NOT_GILBERT, IGNORANCE, 0),
+    ],
+)
+def test_match(s1, s2, match):
+    assert spamsum.match(spamsum.spamsum(s1), spamsum.spamsum(s2)) == match
--- a/tox.ini
+++ b/tox.ini
@ -1,25 +1,15 @@
-
 [tox]
-envlist = flake8,package-py{36,37,38,39},py{36,37,38,39}
+envlist = pre-commit,py{39,310,311,312,313,314}
 skip_missing_interpreters = true

-[testenv]
-commands =
-    python setup.py test
+[testenv:pre-commit]
+package = wheel
+wheel_build_env = .pkg
+extras = dev
+commands = pre-commit run --all-files --show-diff-on-failure --color=always

-[testenv:flake8]
-skip_install = True
-deps =
-    flake8
-commands = flake8 {posargs}
-
-[testenv:package-py{36,37,38,39}]
-skip_install = True
-deps =
-    check_manifest
-    wheel
-    twine
+[testenv:py{,39,310,311,312,313,314}]
+depends = pre-commit
+extras = dev
 commands =
-    check-manifest -v
-    python setup.py sdist bdist_wheel
-    python -m twine check dist/*
+    python -m pytest {posargs:-vv --color yes}