import os import re import json directories = ['d:/workspace/AuraK/web', 'd:/workspace/AuraK/server/src'] exclude_dirs = ['node_modules', '.git', 'dist', '.next'] extensions = ['.ts', '.tsx', '.js', '.jsx'] cjk_pattern = re.compile(r'[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]+') cjk_lines = {} for d in directories: for root, dirs, files in os.walk(d): dirs[:] = [dir for dir in dirs if dir not in exclude_dirs] for file in files: if any(file.endswith(ext) for ext in extensions): file_path = os.path.join(root, file) try: with open(file_path, 'r', encoding='utf-8') as f: lines = f.readlines() for i, line in enumerate(lines): if cjk_pattern.search(line): if file_path not in cjk_lines: cjk_lines[file_path] = [] cjk_lines[file_path].append({"line": i + 1, "text": line.strip()}) except Exception as e: print(f"Error reading {file_path}: {e}") with open('cjk_extract.json', 'w', encoding='utf-8') as f: json.dump(cjk_lines, f, ensure_ascii=False, indent=2) print("Extracted to cjk_extract.json")