| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687 |
- import sys
- import re
- def clean_translations(file_path):
- with open(file_path, 'r', encoding='utf-8') as f:
- content = f.read()
- # Split into blocks
- blocks = re.split(r'(\s+\w+: \{)', content)
- # Header is blocks[0]
- # Then blocks[1] is " zh: {", blocks[2] is content of zh
- # blocks[3] is " en: {", blocks[4] is content of en
- # blocks[5] is " ja: {", blocks[6] is content of ja
-
- header = blocks[0]
- processed_blocks = []
-
- # Missing keys to ensure (with basic English values)
- missing_keys = [
- "kbSettingsSaved", "failedToSaveSettings", "actionFailed", "userAddedToOrganization",
- "featureUpdated", "roleTenantAdmin", "roleRegularUser", "creatingRegularUser",
- "editUserRole", "targetRole", "editCategory", "totalTenants", "systemUsers",
- "systemHealth", "operational", "orgManagement", "globalTenantControl",
- "newTenant", "domainOptional", "saveChanges", "modelConfiguration",
- "defaultLLMModel", "selectLLM", "selectEmbedding", "rerankModel", "none",
- "indexingChunkingConfig", "chatHyperparameters", "temperature", "precise",
- "creative", "maxResponseTokens", "retrievalSearchSettings", "topK",
- "similarityThreshold", "enableHybridSearch", "hybridSearchDesc", "hybridWeight",
- "pureText", "pureVector", "enableQueryExpansion", "queryExpansionDesc",
- "enableHyDE", "hydeDesc", "enableReranking", "rerankingDesc", "broad",
- "strict", "maxInput", "dimensions", "defaultBadge", "dims", "ctx",
- "baseApi", "configured", "groupUpdated", "groupDeleted", "groupCreated",
- "navCatalog", "allDocuments", "categories", "uncategorizedFiles", "category",
- "statusReadyDesc", "statusIndexingDesc", "selectCategory", "noneUncategorized",
- "previous", "next", "createCategory", "categoryDesc", "categoryName",
- "createCategoryBtn", "newGroup", "noKnowledgeGroups", "createGroupDesc",
- "noDescriptionProvided", "browseManageFiles", "filterGroupFiles"
- ]
- for i in range(1, len(blocks), 2):
- block_header = blocks[i]
- block_content = blocks[i+1]
-
- # Parse keys and values
- lines = block_content.split('\n')
- keys_seen = set()
- new_lines = []
-
- # Regex to match "key: value," or "key: `value`,"
- # Support multiline strings too? Let's be careful.
- # Most are single line: " key: \"value\","
-
- for line in lines:
- match = re.search(r'^\s+([a-zA-Z0-9_-]+):', line)
- if match:
- key = match.group(1)
- if key in keys_seen:
- continue # Skip duplicate
- keys_seen.add(key)
- new_lines.append(line)
-
- # Add missing keys if they are not in keys_seen
- # Remove trailing " }," or "}," to append
- if new_lines and re.search(r'^\s+},?$', new_lines[-1]):
- last_line = new_lines.pop()
- elif new_lines and re.search(r'^\s+},?$', new_lines[-2]): # Check if last is empty
- last_line = new_lines.pop(-2)
- else:
- last_line = " },"
- for key in missing_keys:
- if key not in keys_seen:
- # Add a descriptive placeholder or common translation
- val = f'"{key}"' # Default to key name
- new_lines.append(f' {key}: {val},')
-
- new_lines.append(last_line)
- processed_blocks.append(block_header + '\n'.join(new_lines))
- new_content = header + ''.join(processed_blocks)
-
- with open(file_path, 'w', encoding='utf-8') as f:
- f.write(new_content)
- if __name__ == "__main__":
- clean_translations(sys.argv[1])
|