#!/usr/bin/env python3 # SPDX-FileCopyrightText: 2023 Dan Halbert for Adafruit Industries LLC # # SPDX-License-Identifier: MIT import click from cryptography.hazmat.primitives.serialization import Encoding from cryptography.x509.oid import NameOID import cryptography.x509 import requests import re @click.command() @click.option( "--sources", multiple=True, help=".pem filenames or URLs to filter", default=("https://curl.se/ca/cacert.pem", "extra.pem"), show_default=True, ) @click.option( "--out-full", default="../data/roots-full.pem", help="full unfiltered combined .pem file", type=click.File("w"), show_default=True, ) @click.option( "--out-filtered", default="../data/roots-filtered.pem", help="filtered combined .pem file", type=click.File("w"), show_default=True, ) @click.option( "--include", help="file of cert O and CN names to select; one regexp per line; substring match; case-insensitive; # comments OK", default="include.txt", type=click.File("r"), show_default=True, ) @click.option( "--exclude", help="file of cert O and CN names to exclude (after --include); one regexp per line; substring match; case-insensitive; # comments OK", default="exclude.txt", type=click.File("r"), show_default=True, ) def run(sources, out_full, out_filtered, include, exclude): concatenated_pem = b"" for source in sources: if source.startswith("http"): concatenated_pem += requests.get(source).content else: with open(source, "rb") as input: concatenated_pem += input.read() # Read a list of regexps to substr-match against Issuer O and CN names. def read_patterns(f): patterns = [] for line in f.readlines(): line = line.strip() if line.startswith("#"): continue patterns.append(re.compile(line, flags=re.IGNORECASE)) return patterns include_patterns = read_patterns(include) exclude_patterns = read_patterns(exclude) # Read in all the certs at once. input_certs = cryptography.x509.load_pem_x509_certificates(concatenated_pem) # For each cert, see if its O or CN name matches against the list of include and exclude patterns. for cert in input_certs: issuer = cert.issuer org_name_attributes = issuer.get_attributes_for_oid( NameOID.ORGANIZATION_NAME ) org_name = org_name_attributes[0].value if org_name_attributes else "" common_name_attributes = issuer.get_attributes_for_oid(NameOID.COMMON_NAME) common_name = ( common_name_attributes[0].value if common_name_attributes else "" ) if not any((org_name, common_name)): raise ValueError(f"no O or CN available for {issuer}") include_cert = False for pattern in include_patterns: if pattern.search(org_name) or pattern.search(common_name): include_cert = True break if include_cert: for pattern in exclude_patterns: print(pattern, org_name, common_name) if pattern.search(org_name) or pattern.search(common_name): print("EXCLUDED", cert) include_cert = False break # Filtered output includes only certificates that pass through the filters. if include_cert: # Add a comment with the O and CN names. out_filtered.write(f"# O={org_name}, CN={common_name}\n") out_filtered.write(cert.public_bytes(Encoding.PEM).decode("ascii")) # Unfiltered output includes all certificates. print(org_name, common_name) out_full.write(f"# O={org_name}, CN={common_name}\n") out_full.write(cert.public_bytes(Encoding.PEM).decode("ascii")) if __name__ == "__main__": run()