| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413 |
- import { Injectable, Logger, BadRequestException } from '@nestjs/common';
- import { ConfigService } from '@nestjs/config';
- import { ModelConfigService } from '../model-config/model-config.service';
- import { TenantService } from '../tenant/tenant.service';
- // import { UserSettingService } from '../user-setting/user-setting.service';
- /**
- * Chunk config service
- * Responsible for validating and managing chunk parameters to ensure they conform to model limits and environment variable settings
- *
- * Priority of limits:
- * 1. Environment variables (MAX_CHUNK_SIZE, MAX_OVERLAP_SIZE)
- * 2. Model settings in database (maxInputTokens, maxBatchSize)
- * 3. Default values
- */
- import {
- DEFAULT_CHUNK_SIZE,
- MIN_CHUNK_SIZE,
- DEFAULT_CHUNK_OVERLAP,
- MIN_CHUNK_OVERLAP,
- DEFAULT_MAX_OVERLAP_RATIO,
- DEFAULT_MAX_BATCH_SIZE,
- DEFAULT_VECTOR_DIMENSIONS,
- } from '../common/constants';
- import { I18nService } from '../i18n/i18n.service';
- @Injectable()
- export class ChunkConfigService {
- private readonly logger = new Logger(ChunkConfigService.name);
- // Default settings
- private readonly DEFAULTS = {
- chunkSize: DEFAULT_CHUNK_SIZE,
- chunkOverlap: DEFAULT_CHUNK_OVERLAP,
- minChunkSize: MIN_CHUNK_SIZE,
- minChunkOverlap: MIN_CHUNK_OVERLAP,
- maxOverlapRatio: DEFAULT_MAX_OVERLAP_RATIO, // Overlap up to 50% of chunk size
- maxBatchSize: DEFAULT_MAX_BATCH_SIZE, // Default batch limit
- expectedDimensions: DEFAULT_VECTOR_DIMENSIONS, // Default vector dimensions
- };
- // Upper limits set by environment variables (used first)
- private readonly envMaxChunkSize: number;
- private readonly envMaxOverlapSize: number;
- constructor(
- private configService: ConfigService,
- private modelConfigService: ModelConfigService,
- private i18nService: I18nService,
- private tenantService: TenantService,
- ) {
- // Load global limit settings from environment variables
- this.envMaxChunkSize = parseInt(
- this.configService.get<string>('MAX_CHUNK_SIZE', '8191'),
- );
- this.envMaxOverlapSize = parseInt(
- this.configService.get<string>('MAX_OVERLAP_SIZE', '2000'),
- );
- this.logger.log(
- `Environment variable limits: MAX_CHUNK_SIZE=${this.envMaxChunkSize}, MAX_OVERLAP_SIZE=${this.envMaxOverlapSize}`,
- );
- }
- /**
- * Get model limit settings (read from database)
- */
- async getModelLimits(
- modelId: string,
- userId: string,
- tenantId?: string,
- ): Promise<{
- maxInputTokens: number;
- maxBatchSize: number;
- expectedDimensions: number;
- providerName: string;
- isVectorModel: boolean;
- }> {
- const modelConfig = await this.modelConfigService.findOne(
- modelId,
- userId,
- tenantId || '',
- );
- if (!modelConfig || modelConfig.type !== 'embedding') {
- throw new BadRequestException(
- this.i18nService.formatMessage('embeddingModelNotFound', {
- id: modelId,
- }),
- );
- }
- // Get limits from database fields and fill with defaults
- const maxInputTokens = modelConfig.maxInputTokens || this.envMaxChunkSize;
- const maxBatchSize = modelConfig.maxBatchSize || this.DEFAULTS.maxBatchSize;
- const expectedDimensions =
- modelConfig.dimensions ||
- parseInt(
- this.configService.get(
- 'DEFAULT_VECTOR_DIMENSIONS',
- String(this.DEFAULTS.expectedDimensions),
- ),
- );
- const providerName = modelConfig.providerName || 'unknown';
- const isVectorModel = modelConfig.isVectorModel || false;
- this.logger.log(
- this.i18nService.formatMessage('configLoaded', {
- name: modelConfig.name,
- id: modelConfig.modelId,
- }) +
- '\n' +
- ` - Provider: ${providerName}\n` +
- ` - Token limit: ${maxInputTokens}\n` +
- ` - Batch limit: ${maxBatchSize}\n` +
- ` - Vector dimensions: ${expectedDimensions}\n` +
- ` - Is vector model: ${isVectorModel}`,
- );
- return {
- maxInputTokens,
- maxBatchSize,
- expectedDimensions,
- providerName,
- isVectorModel,
- };
- }
- /**
- * Validate and fix chunk config
- * Priority: Environment variable limits > Model limits > User settings
- */
- async validateChunkConfig(
- chunkSize: number,
- chunkOverlap: number,
- modelId: string,
- userId: string,
- tenantId?: string,
- ): Promise<{
- chunkSize: number;
- chunkOverlap: number;
- warnings: string[];
- effectiveMaxChunkSize: number;
- effectiveMaxOverlapSize: number;
- }> {
- const warnings: string[] = [];
- const limits = await this.getModelLimits(modelId, userId, tenantId);
- // 1. Calculate final limits (choose smaller of env var and model limit)
- const effectiveMaxChunkSize = Math.min(
- this.envMaxChunkSize,
- limits.maxInputTokens,
- );
- const effectiveMaxOverlapSize = Math.min(
- this.envMaxOverlapSize,
- Math.floor(effectiveMaxChunkSize * this.DEFAULTS.maxOverlapRatio),
- );
- // 2. Validate chunk size upper limit
- if (chunkSize > effectiveMaxChunkSize) {
- const reason =
- this.envMaxChunkSize < limits.maxInputTokens
- ? `${this.i18nService.getMessage('environmentLimit')} ${this.envMaxChunkSize}`
- : `${this.i18nService.getMessage('modelLimit')} ${limits.maxInputTokens}`;
- warnings.push(
- this.i18nService.formatMessage('chunkOverflow', {
- size: chunkSize,
- max: effectiveMaxChunkSize,
- reason,
- }),
- );
- chunkSize = effectiveMaxChunkSize;
- }
- // 3. Validate chunk size lower limit
- if (chunkSize < this.DEFAULTS.minChunkSize) {
- warnings.push(
- this.i18nService.formatMessage('chunkUnderflow', {
- size: chunkSize,
- min: this.DEFAULTS.minChunkSize,
- }),
- );
- chunkSize = this.DEFAULTS.minChunkSize;
- }
- // 4. Validate overlap size upper limit (env var first)
- if (chunkOverlap > effectiveMaxOverlapSize) {
- warnings.push(
- this.i18nService.formatMessage('overlapOverflow', {
- size: chunkOverlap,
- max: effectiveMaxOverlapSize,
- }),
- );
- chunkOverlap = effectiveMaxOverlapSize;
- }
- // 5. Validate overlap doesn't exceed 50% of chunk size
- const maxOverlapByRatio = Math.floor(
- chunkSize * this.DEFAULTS.maxOverlapRatio,
- );
- if (chunkOverlap > maxOverlapByRatio) {
- warnings.push(
- this.i18nService.formatMessage('overlapRatioExceeded', {
- size: chunkOverlap,
- max: maxOverlapByRatio,
- }),
- );
- chunkOverlap = maxOverlapByRatio;
- }
- if (chunkOverlap < this.DEFAULTS.minChunkOverlap) {
- warnings.push(
- this.i18nService.formatMessage('overlapUnderflow', {
- size: chunkOverlap,
- min: this.DEFAULTS.minChunkOverlap,
- }),
- );
- chunkOverlap = this.DEFAULTS.minChunkOverlap;
- }
- // 6. Add safety check for batch processing
- // During batch processing, ensure total length of multiple texts doesn't exceed model limits
- const safetyMargin = 0.8; // 80% safety margin to leave space for batch processing
- const safeChunkSize = Math.floor(effectiveMaxChunkSize * safetyMargin);
- if (chunkSize > safeChunkSize) {
- warnings.push(
- this.i18nService.formatMessage('batchOverflowWarning', {
- safeSize: safeChunkSize,
- size: chunkSize,
- percent: Math.round(safetyMargin * 100),
- }),
- );
- }
- // 7. Check if estimated chunk count is reasonable
- const estimatedChunkCount = this.estimateChunkCount(
- 1000000, // Assume 1MB text
- chunkSize,
- );
- if (estimatedChunkCount > 50000) {
- warnings.push(
- this.i18nService.formatMessage('estimatedChunkCountExcessive', {
- count: estimatedChunkCount,
- }),
- );
- }
- return {
- chunkSize,
- chunkOverlap,
- warnings,
- effectiveMaxChunkSize,
- effectiveMaxOverlapSize,
- };
- }
- /**
- * Get recommended batch size
- */
- async getRecommendedBatchSize(
- modelId: string,
- userId: string,
- tenantId?: string,
- currentBatchSize: number = 100,
- ): Promise<number> {
- const limits = await this.getModelLimits(modelId, userId, tenantId);
- // Choose smaller of configured value and model limit
- const recommended = Math.min(
- currentBatchSize,
- limits.maxBatchSize,
- 200, // Safety upper limit
- );
- if (recommended < currentBatchSize) {
- this.logger.warn(
- this.i18nService.formatMessage('batchSizeAdjusted', {
- old: currentBatchSize,
- new: recommended,
- limit: limits.maxBatchSize,
- }),
- );
- }
- return Math.max(10, recommended); // Minimum 10
- }
- /**
- * Estimate chunk count
- */
- estimateChunkCount(textLength: number, chunkSize: number): number {
- const chunkSizeInChars = chunkSize * 4; // 1 token ≈ 4 chars
- return Math.ceil(textLength / chunkSizeInChars);
- }
- /**
- * Validate vector dimensions
- */
- async validateDimensions(
- modelId: string,
- userId: string,
- actualDimensions: number,
- tenantId?: string,
- ): Promise<boolean> {
- const limits = await this.getModelLimits(modelId, userId, tenantId);
- if (actualDimensions !== limits.expectedDimensions) {
- this.logger.warn(
- this.i18nService.formatMessage('dimensionMismatch', {
- id: modelId,
- expected: limits.expectedDimensions,
- actual: actualDimensions,
- }),
- );
- return false;
- }
- return true;
- }
- /**
- * Get config summary (for logging)
- */
- async getConfigSummary(
- chunkSize: number,
- chunkOverlap: number,
- modelId: string,
- userId: string,
- tenantId?: string,
- ): Promise<string> {
- const limits = await this.getModelLimits(modelId, userId, tenantId);
- return [
- `Model: ${modelId}`,
- `Chunk size: ${chunkSize} tokens (limit: ${limits.maxInputTokens})`,
- `Overlap size: ${chunkOverlap} tokens`,
- `Batch size: ${limits.maxBatchSize}`,
- `Vector dimensions: ${limits.expectedDimensions}`,
- ].join(', ');
- }
- /**
- * Get config limits for frontend
- * Used for frontend slider max value settings
- */
- async getFrontendLimits(
- modelId: string,
- userId: string,
- tenantId?: string,
- ): Promise<{
- maxChunkSize: number;
- maxOverlapSize: number;
- minOverlapSize: number;
- defaultChunkSize: number;
- defaultOverlapSize: number;
- modelInfo: {
- name: string;
- maxInputTokens: number;
- maxBatchSize: number;
- expectedDimensions: number;
- };
- }> {
- const limits = await this.getModelLimits(modelId, userId, tenantId);
- // Calculate final limits (choose smaller of env var and model limit)
- const maxChunkSize = Math.min(this.envMaxChunkSize, limits.maxInputTokens);
- const maxOverlapSize = Math.min(
- this.envMaxOverlapSize,
- Math.floor(maxChunkSize * this.DEFAULTS.maxOverlapRatio),
- );
- // Get model config name
- const modelConfig = await this.modelConfigService.findOne(
- modelId,
- userId,
- tenantId || '',
- );
- const modelName = modelConfig?.name || 'Unknown';
- // Get defaults from tenant or user settings
- let defaultChunkSize = this.DEFAULTS.chunkSize;
- let defaultOverlapSize = this.DEFAULTS.chunkOverlap;
- if (tenantId) {
- const tenantSettings = await this.tenantService.getSettings(tenantId);
- if (tenantSettings?.chunkSize)
- defaultChunkSize = tenantSettings.chunkSize;
- if (tenantSettings?.chunkOverlap)
- defaultOverlapSize = tenantSettings.chunkOverlap;
- }
- return {
- maxChunkSize,
- maxOverlapSize,
- minOverlapSize: this.DEFAULTS.minChunkOverlap,
- defaultChunkSize: Math.min(defaultChunkSize, maxChunkSize),
- defaultOverlapSize: Math.max(
- this.DEFAULTS.minChunkOverlap,
- Math.min(defaultOverlapSize, maxOverlapSize),
- ),
- modelInfo: {
- name: modelName,
- maxInputTokens: limits.maxInputTokens,
- maxBatchSize: limits.maxBatchSize,
- expectedDimensions: limits.expectedDimensions,
- },
- };
- }
- }
|