import sys import re def clean_translations(file_path): with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # Split into blocks blocks = re.split(r'(\s+\w+: \{)', content) # Header is blocks[0] # Then blocks[1] is " zh: {", blocks[2] is content of zh # blocks[3] is " en: {", blocks[4] is content of en # blocks[5] is " ja: {", blocks[6] is content of ja header = blocks[0] processed_blocks = [] # Missing keys to ensure (with basic English values) missing_keys = [ "kbSettingsSaved", "failedToSaveSettings", "actionFailed", "userAddedToOrganization", "featureUpdated", "roleTenantAdmin", "roleRegularUser", "creatingRegularUser", "editUserRole", "targetRole", "editCategory", "totalTenants", "systemUsers", "systemHealth", "operational", "orgManagement", "globalTenantControl", "newTenant", "domainOptional", "saveChanges", "modelConfiguration", "defaultLLMModel", "selectLLM", "selectEmbedding", "rerankModel", "none", "indexingChunkingConfig", "chatHyperparameters", "temperature", "precise", "creative", "maxResponseTokens", "retrievalSearchSettings", "topK", "similarityThreshold", "enableHybridSearch", "hybridSearchDesc", "hybridWeight", "pureText", "pureVector", "enableQueryExpansion", "queryExpansionDesc", "enableHyDE", "hydeDesc", "enableReranking", "rerankingDesc", "broad", "strict", "maxInput", "dimensions", "defaultBadge", "dims", "ctx", "baseApi", "configured", "groupUpdated", "groupDeleted", "groupCreated", "navCatalog", "allDocuments", "categories", "uncategorizedFiles", "category", "statusReadyDesc", "statusIndexingDesc", "selectCategory", "noneUncategorized", "previous", "next", "createCategory", "categoryDesc", "categoryName", "createCategoryBtn", "newGroup", "noKnowledgeGroups", "createGroupDesc", "noDescriptionProvided", "browseManageFiles", "filterGroupFiles" ] for i in range(1, len(blocks), 2): block_header = blocks[i] block_content = blocks[i+1] # Parse keys and values lines = block_content.split('\n') keys_seen = set() new_lines = [] # Regex to match "key: value," or "key: `value`," # Support multiline strings too? Let's be careful. # Most are single line: " key: \"value\"," for line in lines: match = re.search(r'^\s+([a-zA-Z0-9_-]+):', line) if match: key = match.group(1) if key in keys_seen: continue # Skip duplicate keys_seen.add(key) new_lines.append(line) # Add missing keys if they are not in keys_seen # Remove trailing " }," or "}," to append if new_lines and re.search(r'^\s+},?$', new_lines[-1]): last_line = new_lines.pop() elif new_lines and re.search(r'^\s+},?$', new_lines[-2]): # Check if last is empty last_line = new_lines.pop(-2) else: last_line = " }," for key in missing_keys: if key not in keys_seen: # Add a descriptive placeholder or common translation val = f'"{key}"' # Default to key name new_lines.append(f' {key}: {val},') new_lines.append(last_line) processed_blocks.append(block_header + '\n'.join(new_lines)) new_content = header + ''.join(processed_blocks) with open(file_path, 'w', encoding='utf-8') as f: f.write(new_content) if __name__ == "__main__": clean_translations(sys.argv[1])