PORT=3001 # IMPORTANT: Must change the following in production environment # Database path DATABASE_PATH=./data/metadata.db # Service host configuration (use actual server address in production) ELASTICSEARCH_HOST=http://127.0.0.1:9200 # Change for production TIKA_HOST=http://127.0.0.1:9998 # Change for production LIBREOFFICE_URL=http://127.0.0.1:8100 # Change for production JWT_SECRET=your-super-secure-jwt-secret-key-change-it-in-production # File Upload Configuration UPLOAD_FILE_PATH=./uploads MAX_FILE_SIZE=104857600 TEMP_DIR=./temp # Vector Dimensions Configuration # Must match the output dimensions of your embedding model # Common values: 2560 (text-embedding-3-large), 1536 (text-embedding-3-small), 2048 (custom) DEFAULT_VECTOR_DIMENSIONS=2048 # Chunk Size Limits Configuration # Maximum chunk size (tokens) - env var limit (highest priority) # Set according to your embedding model # OpenAI text-embedding-3-large: 8191 # OpenAI text-embedding-3-small: 8191 # Google Gemini embedding-001: 2048 MAX_CHUNK_SIZE=8191 # Maximum chunk overlap (tokens) - env var limit # Recommended: 10-20% of chunk size MAX_OVERLAP_SIZE=200 # Memory Management Configuration # Default language for the system (zh, en, ja) # If not set, defaults to 'en' DEFAULT_LANGUAGE=en # Memory usage limit (MB). Triggers wait or forced GC when exceeded MAX_MEMORY_USAGE_MB=1024 # Batch processing size (chunk count) CHUNK_BATCH_SIZE=100 # Forced GC threshold (MB) GC_THRESHOLD_MB=800 # Frontend settings # ALLOWED_HOSTS - Comma-separated list of hosts allowed for API connection # Set in frontend .env file as REACT_APP_ALLOWED_HOSTS