zephyr/scripts/build
Lingao Meng 302422ad9d everywhere: replace double words
import os
import re

common_words = set([
    'about', 'after', 'all', 'also', 'an', 'and',
     'any', 'are', 'as', 'at',
    'be', 'because', 'but', 'by', 'can', 'come',
    'could', 'day', 'do', 'even',
    'first', 'for', 'get', 'give', 'go', 'has',
    'have', 'he', 'her',
    'him', 'his', 'how', 'I', 'in', 'into', 'it',
    'its', 'just',
    'know', 'like', 'look', 'make', 'man', 'many',
    'me', 'more', 'my', 'new',
    'no', 'not', 'now', 'of', 'one', 'only', 'or',
    'other', 'our', 'out',
    'over', 'people', 'say', 'see', 'she', 'so',
    'some', 'take', 'tell', 'than',
    'their', 'them', 'then', 'there', 'these',
    'they', 'think',
    'this', 'time', 'two', 'up', 'use', 'very',
    'want', 'was', 'way',
    'we', 'well', 'what', 'when', 'which', 'who',
    'will', 'with', 'would',
    'year', 'you', 'your'
])

valid_extensions = set([
    'c', 'h', 'yaml', 'cmake', 'conf', 'txt', 'overlay',
    'rst', 'dtsi',
    'Kconfig', 'dts', 'defconfig', 'yml', 'ld', 'sh', 'py',
    'soc', 'cfg'
])

def filter_repeated_words(text):
    # Split the text into lines
    lines = text.split('\n')

    # Combine lines into a single string with unique separator
    combined_text = '/*sep*/'.join(lines)

    # Replace repeated words within a line
    def replace_within_line(match):
        return match.group(1)

    # Regex for matching repeated words within a line
    within_line_pattern =
	re.compile(r'\b(' +
		'|'.join(map(re.escape, common_words)) +
		r')\b\s+\b\1\b')
    combined_text = within_line_pattern.
		sub(replace_within_line, combined_text)

    # Replace repeated words across line boundaries
    def replace_across_lines(match):
        return match.group(1) + match.group(2)

    # Regex for matching repeated words across line boundaries
    across_lines_pattern = re.
		compile(r'\b(' + '|'.join(
			map(re.escape, common_words)) +
			r')\b(\s*[*\/\n\s]*)\b\1\b')
    combined_text = across_lines_pattern.
		sub(replace_across_lines, combined_text)

    # Split the text back into lines
    filtered_text = combined_text.split('/*sep*/')

    return '\n'.join(filtered_text)

def process_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()

    new_text = filter_repeated_words(text)

    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(new_text)

def process_directory(directory_path):
    for root, dirs, files in os.walk(directory_path):
        dirs[:] = [d for d in dirs if not d.startswith('.')]
        for file in files:
            # Filter out hidden files
            if file.startswith('.'):
                continue
            file_extension = file.split('.')[-1]
            if
	file_extension in valid_extensions:  # 只处理指定后缀的文件
                file_path = os.path.join(root, file)
                print(f"Processed file: {file_path}")
                process_file(file_path)

directory_to_process = "/home/mi/works/github/zephyrproject/zephyr"
process_directory(directory_to_process)

Signed-off-by: Lingao Meng <menglingao@xiaomi.com>
2024-06-25 06:05:35 -04:00
..
check_init_priorities.py everywhere: replace double words 2024-06-22 05:40:22 -04:00
check_init_priorities_test.py scripts: check_init_priorities: rework the error messages 2023-11-03 11:45:23 +01:00
dir_is_writeable.py
elf_parser.py scripts/build/elf_parser.py: make dependency graph output deterministic 2023-12-04 14:24:32 +01:00
file2hex.py scripts: build: file2hex: Add optional offset and length parameters 2023-09-15 13:23:30 +02:00
gen_app_partitions.py
gen_cfb_font_header.py scripts: Update CFB font generator 2023-09-13 14:40:00 -07:00
gen_device_deps.py
gen_image_info.py
gen_isr_tables.py script: gen_isr_tables: remove unnecessary warning log 2024-02-26 11:55:59 +00:00
gen_isr_tables_parser_carrays.py scripts: build: gen_isr_tables: Implement local ISR generation 2024-02-02 19:49:36 +01:00
gen_isr_tables_parser_local.py scripts: build: gen_isr_tables: Implement local ISR generation 2024-02-02 19:49:36 +01:00
gen_kobject_list.py scripts: build: gen_kobject_list.py check _driver_api suffix 2024-05-03 14:44:41 +01:00
gen_kobject_placeholders.py
gen_offset_header.py
gen_relocate_app.py code_relocation: Add NOKEEP option 2024-01-15 13:20:17 +01:00
gen_strerror_table.py
gen_strsignal_table.py posix: signal: implement strsignal 2023-07-18 15:35:42 -04:00
gen_symtab.py debug: symtab: fix linking issue due to variable redefinition 2024-05-30 16:52:01 -05:00
gen_syscalls.py build: namespace the generated headers with zephyr/ 2024-05-28 22:03:55 +02:00
llext_inject_slids.py llext: add support for SLID-based linking 2024-06-03 15:29:34 -04:00
llext_prepare_exptab.py llext: add support for SLID-based linking 2024-06-03 15:29:34 -04:00
llext_slidlib.py llext: add support for SLID-based linking 2024-06-03 15:29:34 -04:00
mergehex.py
parse_syscalls.py scripts/build: make struct_tags.json deterministic 2023-12-11 09:51:22 +01:00
process_gperf.py syscall: rename z_object_find -> k_object_find 2023-11-03 11:46:52 +01:00
subfolder_list.py everywhere: replace double words 2024-06-25 06:05:35 -04:00
uf2conv.py scripts: Update uf2conv.py from upstream git. 2024-05-08 09:31:07 -04:00
uf2families.json scripts: Update uf2conv.py from upstream git. 2024-05-08 09:31:07 -04:00
user_wordsize.py