anhuiqiang
/
KnowledgeBase


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
							PORT=3001
# IMPORTANT: Must change the following in production environment
# Database path
DATABASE_PATH=./data/metadata.db

# Service host configuration (use actual server address in production)
ELASTICSEARCH_HOST=http://127.0.0.1:9200    # Change for production
TIKA_HOST=http://127.0.0.1:9998            # Change for production
LIBREOFFICE_URL=http://127.0.0.1:8100       # Change for production
JWT_SECRET=your-super-secure-jwt-secret-key-change-it-in-production

# File Upload Configuration
UPLOAD_FILE_PATH=./uploads
MAX_FILE_SIZE=104857600
TEMP_DIR=./temp

# Vector Dimensions Configuration
# Must match the output dimensions of your embedding model
# Common values: 2560 (text-embedding-3-large), 1536 (text-embedding-3-small), 2048 (custom)
DEFAULT_VECTOR_DIMENSIONS=2048

# Chunk Size Limits Configuration
# Maximum chunk size (tokens) - env var limit (highest priority)
# Set according to your embedding model
# OpenAI text-embedding-3-large: 8191
# OpenAI text-embedding-3-small: 8191
# Google Gemini embedding-001: 2048
MAX_CHUNK_SIZE=8191

# Maximum chunk overlap (tokens) - env var limit
# Recommended: 10-20% of chunk size
MAX_OVERLAP_SIZE=200

# Memory Management Configuration
# Default language for the system (zh, en, ja)
# If not set, defaults to 'en'
DEFAULT_LANGUAGE=en

# Memory usage limit (MB). Triggers wait or forced GC when exceeded
MAX_MEMORY_USAGE_MB=1024
# Batch processing size (chunk count)
CHUNK_BATCH_SIZE=100
# Forced GC threshold (MB)
GC_THRESHOLD_MB=800

# Frontend settings
# ALLOWED_HOSTS - Comma-separated list of hosts allowed for API connection
# Set in frontend .env file as REACT_APP_ALLOWED_HOSTS