| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748 |
- PORT=3001
- # IMPORTANT: Must change the following in production environment
- # Database path
- DATABASE_PATH=./data/metadata.db
- # Service host configuration (use actual server address in production)
- ELASTICSEARCH_HOST=http://127.0.0.1:9200 # Change for production
- TIKA_HOST=http://127.0.0.1:9998 # Change for production
- LIBREOFFICE_URL=http://127.0.0.1:8100 # Change for production
- JWT_SECRET=your-super-secure-jwt-secret-key-change-it-in-production
- # File Upload Configuration
- UPLOAD_FILE_PATH=./uploads
- MAX_FILE_SIZE=104857600
- TEMP_DIR=./temp
- # Vector Dimensions Configuration
- # Must match the output dimensions of your embedding model
- # Common values: 2560 (text-embedding-3-large), 1536 (text-embedding-3-small), 2048 (custom)
- DEFAULT_VECTOR_DIMENSIONS=2048
- # Chunk Size Limits Configuration
- # Maximum chunk size (tokens) - env var limit (highest priority)
- # Set according to your embedding model
- # OpenAI text-embedding-3-large: 8191
- # OpenAI text-embedding-3-small: 8191
- # Google Gemini embedding-001: 2048
- MAX_CHUNK_SIZE=8191
- # Maximum chunk overlap (tokens) - env var limit
- # Recommended: 10-20% of chunk size
- MAX_OVERLAP_SIZE=200
- # Memory Management Configuration
- # Default language for the system (zh, en, ja)
- # If not set, defaults to 'en'
- DEFAULT_LANGUAGE=en
- # Memory usage limit (MB). Triggers wait or forced GC when exceeded
- MAX_MEMORY_USAGE_MB=1024
- # Batch processing size (chunk count)
- CHUNK_BATCH_SIZE=100
- # Forced GC threshold (MB)
- GC_THRESHOLD_MB=800
- # Frontend settings
- # ALLOWED_HOSTS - Comma-separated list of hosts allowed for API connection
- # Set in frontend .env file as REACT_APP_ALLOWED_HOSTS
|