.env.sample 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. PORT=3001
  2. # IMPORTANT: Must change the following in production environment
  3. # Database path
  4. DATABASE_PATH=./data/metadata.db
  5. # Service host configuration (use actual server address in production)
  6. ELASTICSEARCH_HOST=http://127.0.0.1:9200 # Change for production
  7. TIKA_HOST=http://127.0.0.1:9998 # Change for production
  8. LIBREOFFICE_URL=http://127.0.0.1:8100 # Change for production
  9. JWT_SECRET=your-super-secure-jwt-secret-key-change-it-in-production
  10. # File Upload Configuration
  11. UPLOAD_FILE_PATH=./uploads
  12. MAX_FILE_SIZE=104857600
  13. TEMP_DIR=./temp
  14. # Vector Dimensions Configuration
  15. # Must match the output dimensions of your embedding model
  16. # Common values: 2560 (text-embedding-3-large), 1536 (text-embedding-3-small), 2048 (custom)
  17. DEFAULT_VECTOR_DIMENSIONS=2048
  18. # Chunk Size Limits Configuration
  19. # Maximum chunk size (tokens) - env var limit (highest priority)
  20. # Set according to your embedding model
  21. # OpenAI text-embedding-3-large: 8191
  22. # OpenAI text-embedding-3-small: 8191
  23. # Google Gemini embedding-001: 2048
  24. MAX_CHUNK_SIZE=8191
  25. # Maximum chunk overlap (tokens) - env var limit
  26. # Recommended: 10-20% of chunk size
  27. MAX_OVERLAP_SIZE=200
  28. # Memory Management Configuration
  29. # Default language for the system (zh, en, ja)
  30. # If not set, defaults to 'en'
  31. DEFAULT_LANGUAGE=en
  32. # Memory usage limit (MB). Triggers wait or forced GC when exceeded
  33. MAX_MEMORY_USAGE_MB=1024
  34. # Batch processing size (chunk count)
  35. CHUNK_BATCH_SIZE=100
  36. # Forced GC threshold (MB)
  37. GC_THRESHOLD_MB=800
  38. # Frontend settings
  39. # ALLOWED_HOSTS - Comma-separated list of hosts allowed for API connection
  40. # Set in frontend .env file as REACT_APP_ALLOWED_HOSTS