import os
import re
common_words = set([
'about', 'after', 'all', 'also', 'an', 'and',
'any', 'are', 'as', 'at',
'be', 'because', 'but', 'by', 'can', 'come',
'could', 'day', 'do', 'even',
'first', 'for', 'get', 'give', 'go', 'has',
'have', 'he', 'her',
'him', 'his', 'how', 'I', 'in', 'into', 'it',
'its', 'just',
'know', 'like', 'look', 'make', 'man', 'many',
'me', 'more', 'my', 'new',
'no', 'not', 'now', 'of', 'one', 'only', 'or',
'other', 'our', 'out',
'over', 'people', 'say', 'see', 'she', 'so',
'some', 'take', 'tell', 'than',
'their', 'them', 'then', 'there', 'these',
'they', 'think',
'this', 'time', 'two', 'up', 'use', 'very',
'want', 'was', 'way',
'we', 'well', 'what', 'when', 'which', 'who',
'will', 'with', 'would',
'year', 'you', 'your'
])
valid_extensions = set([
'c', 'h', 'yaml', 'cmake', 'conf', 'txt', 'overlay',
'rst', 'dtsi',
'Kconfig', 'dts', 'defconfig', 'yml', 'ld', 'sh', 'py',
'soc', 'cfg'
])
def filter_repeated_words(text):
# Split the text into lines
lines = text.split('\n')
# Combine lines into a single string with unique separator
combined_text = '/*sep*/'.join(lines)
# Replace repeated words within a line
def replace_within_line(match):
return match.group(1)
# Regex for matching repeated words within a line
within_line_pattern =
re.compile(r'\b(' +
'|'.join(map(re.escape, common_words)) +
r')\b\s+\b\1\b')
combined_text = within_line_pattern.
sub(replace_within_line, combined_text)
# Replace repeated words across line boundaries
def replace_across_lines(match):
return match.group(1) + match.group(2)
# Regex for matching repeated words across line boundaries
across_lines_pattern = re.
compile(r'\b(' + '|'.join(
map(re.escape, common_words)) +
r')\b(\s*[*\/\n\s]*)\b\1\b')
combined_text = across_lines_pattern.
sub(replace_across_lines, combined_text)
# Split the text back into lines
filtered_text = combined_text.split('/*sep*/')
return '\n'.join(filtered_text)
def process_file(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
text = file.read()
new_text = filter_repeated_words(text)
with open(file_path, 'w', encoding='utf-8') as file:
file.write(new_text)
def process_directory(directory_path):
for root, dirs, files in os.walk(directory_path):
dirs[:] = [d for d in dirs if not d.startswith('.')]
for file in files:
# Filter out hidden files
if file.startswith('.'):
continue
file_extension = file.split('.')[-1]
if
file_extension in valid_extensions: # 只处理指定后缀的文件
file_path = os.path.join(root, file)
print(f"Processed file: {file_path}")
process_file(file_path)
directory_to_process = "/home/mi/works/github/zephyrproject/zephyr"
process_directory(directory_to_process)
Signed-off-by: Lingao Meng <menglingao@xiaomi.com>
119 lines
3.6 KiB
Python
119 lines
3.6 KiB
Python
#!/usr/bin/env python3
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
"""Write subfolder list to a file
|
|
|
|
This script will walk the specified directory and write the file specified with
|
|
the list of all sub-directories found. If the output file already exists, the
|
|
file will only be updated in case sub-directories have been added or removed
|
|
since the previous invocation.
|
|
|
|
"""
|
|
|
|
import os
|
|
import argparse
|
|
|
|
|
|
def parse_args():
|
|
"""Parse command line arguments and options"""
|
|
parser = argparse.ArgumentParser(
|
|
description=__doc__,
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
allow_abbrev=False)
|
|
|
|
parser.add_argument('-d', '--directory', required=True,
|
|
help='Directory to walk for sub-directory discovery')
|
|
parser.add_argument('-c', '--create-links', required=False,
|
|
help='Create links for each directory found in \
|
|
directory given')
|
|
parser.add_argument('-o', '--out-file', required=True,
|
|
help='File to write containing a list of all \
|
|
directories found')
|
|
parser.add_argument('-t', '--trigger-file', required=False,
|
|
help='Trigger file to be touched to re-run CMake')
|
|
|
|
args = parser.parse_args()
|
|
|
|
return args
|
|
|
|
|
|
def get_subfolder_list(directory, create_links=None):
|
|
"""Return subfolder list of a directory"""
|
|
dirlist = []
|
|
|
|
if create_links is not None:
|
|
if not os.path.exists(create_links):
|
|
os.makedirs(create_links)
|
|
symbase = os.path.basename(directory)
|
|
symlink = create_links + os.path.sep + symbase
|
|
if not os.path.exists(symlink):
|
|
os.symlink(directory, symlink)
|
|
dirlist.append(symlink)
|
|
else:
|
|
dirlist.append(directory)
|
|
|
|
for root, dirs, _ in os.walk(directory, topdown=True):
|
|
dirs.sort()
|
|
for subdir in dirs:
|
|
if create_links is not None:
|
|
targetdirectory = os.path.join(root, subdir)
|
|
reldir = os.path.relpath(targetdirectory, directory)
|
|
linkname = symbase + '_' + reldir.replace(os.path.sep, '_')
|
|
symlink = create_links + os.path.sep + linkname
|
|
if not os.path.exists(symlink):
|
|
os.symlink(targetdirectory, symlink)
|
|
dirlist.append(symlink)
|
|
else:
|
|
dirlist.append(os.path.join(root, subdir))
|
|
|
|
return dirlist
|
|
|
|
|
|
def gen_out_file(out_file, dirs):
|
|
"""Generate file with the list of directories
|
|
|
|
File won't be updated if it already exists and has the same content
|
|
|
|
"""
|
|
dirs_nl = "\n".join(dirs) + "\n"
|
|
|
|
if os.path.exists(out_file):
|
|
with open(out_file, 'r', encoding="utf-8") as out_file_fo:
|
|
out_file_dirs = out_file_fo.read()
|
|
|
|
if out_file_dirs == dirs_nl:
|
|
return
|
|
|
|
with open(out_file, 'w', encoding="utf-8") as out_file_fo:
|
|
out_file_fo.writelines(dirs_nl)
|
|
|
|
|
|
def touch(trigger):
|
|
"""Touch the trigger file
|
|
|
|
If no trigger file is provided then do a return.
|
|
|
|
"""
|
|
if trigger is None:
|
|
return
|
|
|
|
if os.path.exists(trigger):
|
|
os.utime(trigger, None)
|
|
else:
|
|
with open(trigger, 'w') as trigger_fo:
|
|
trigger_fo.write("")
|
|
|
|
|
|
def main():
|
|
"""Parse command line arguments and take respective actions"""
|
|
args = parse_args()
|
|
|
|
dirs = get_subfolder_list(args.directory, args.create_links)
|
|
gen_out_file(args.out_file, dirs)
|
|
|
|
# Always touch trigger file to ensure json files are updated
|
|
touch(args.trigger_file)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|