| 12345678910111213141516171819202122232425262728293031 |
- import os
- import re
- import json
- directories = ['d:/workspace/AuraK/web', 'd:/workspace/AuraK/server/src']
- exclude_dirs = ['node_modules', '.git', 'dist', '.next']
- extensions = ['.ts', '.tsx', '.js', '.jsx']
- cjk_pattern = re.compile(r'[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]+')
- cjk_lines = {}
- for d in directories:
- for root, dirs, files in os.walk(d):
- dirs[:] = [dir for dir in dirs if dir not in exclude_dirs]
- for file in files:
- if any(file.endswith(ext) for ext in extensions):
- file_path = os.path.join(root, file)
- try:
- with open(file_path, 'r', encoding='utf-8') as f:
- lines = f.readlines()
- for i, line in enumerate(lines):
- if cjk_pattern.search(line):
- if file_path not in cjk_lines:
- cjk_lines[file_path] = []
- cjk_lines[file_path].append({"line": i + 1, "text": line.strip()})
- except Exception as e:
- print(f"Error reading {file_path}: {e}")
- with open('cjk_extract.json', 'w', encoding='utf-8') as f:
- json.dump(cjk_lines, f, ensure_ascii=False, indent=2)
- print("Extracted to cjk_extract.json")
|