chunk-config.service.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. import { Injectable, Logger, BadRequestException } from '@nestjs/common';
  2. import { ConfigService } from '@nestjs/config';
  3. import { ModelConfigService } from '../model-config/model-config.service';
  4. import { TenantService } from '../tenant/tenant.service';
  5. // import { UserSettingService } from '../user-setting/user-setting.service';
  6. /**
  7. * Chunk config service
  8. * Responsible for validating and managing chunk parameters to ensure they conform to model limits and environment variable settings
  9. *
  10. * Priority of limits:
  11. * 1. Environment variables (MAX_CHUNK_SIZE, MAX_OVERLAP_SIZE)
  12. * 2. Model settings in database (maxInputTokens, maxBatchSize)
  13. * 3. Default values
  14. */
  15. import {
  16. DEFAULT_CHUNK_SIZE,
  17. MIN_CHUNK_SIZE,
  18. DEFAULT_CHUNK_OVERLAP,
  19. MIN_CHUNK_OVERLAP,
  20. DEFAULT_MAX_OVERLAP_RATIO,
  21. DEFAULT_MAX_BATCH_SIZE,
  22. DEFAULT_VECTOR_DIMENSIONS,
  23. } from '../common/constants';
  24. import { I18nService } from '../i18n/i18n.service';
  25. @Injectable()
  26. export class ChunkConfigService {
  27. private readonly logger = new Logger(ChunkConfigService.name);
  28. // Default settings
  29. private readonly DEFAULTS = {
  30. chunkSize: DEFAULT_CHUNK_SIZE,
  31. chunkOverlap: DEFAULT_CHUNK_OVERLAP,
  32. minChunkSize: MIN_CHUNK_SIZE,
  33. minChunkOverlap: MIN_CHUNK_OVERLAP,
  34. maxOverlapRatio: DEFAULT_MAX_OVERLAP_RATIO, // Overlap up to 50% of chunk size
  35. maxBatchSize: DEFAULT_MAX_BATCH_SIZE, // Default batch limit
  36. expectedDimensions: DEFAULT_VECTOR_DIMENSIONS, // Default vector dimensions
  37. };
  38. // Upper limits set by environment variables (used first)
  39. private readonly envMaxChunkSize: number;
  40. private readonly envMaxOverlapSize: number;
  41. constructor(
  42. private configService: ConfigService,
  43. private modelConfigService: ModelConfigService,
  44. private i18nService: I18nService,
  45. private tenantService: TenantService,
  46. ) {
  47. // Load global limit settings from environment variables
  48. this.envMaxChunkSize = parseInt(
  49. this.configService.get<string>('MAX_CHUNK_SIZE', '8191'),
  50. );
  51. this.envMaxOverlapSize = parseInt(
  52. this.configService.get<string>('MAX_OVERLAP_SIZE', '2000'),
  53. );
  54. this.logger.log(
  55. `Environment variable limits: MAX_CHUNK_SIZE=${this.envMaxChunkSize}, MAX_OVERLAP_SIZE=${this.envMaxOverlapSize}`,
  56. );
  57. }
  58. /**
  59. * Get model limit settings (read from database)
  60. */
  61. async getModelLimits(
  62. modelId: string,
  63. userId: string,
  64. tenantId?: string,
  65. ): Promise<{
  66. maxInputTokens: number;
  67. maxBatchSize: number;
  68. expectedDimensions: number;
  69. providerName: string;
  70. isVectorModel: boolean;
  71. }> {
  72. const modelConfig = await this.modelConfigService.findOne(
  73. modelId,
  74. userId,
  75. tenantId || '',
  76. );
  77. if (!modelConfig || modelConfig.type !== 'embedding') {
  78. throw new BadRequestException(
  79. this.i18nService.formatMessage('embeddingModelNotFound', {
  80. id: modelId,
  81. }),
  82. );
  83. }
  84. // Get limits from database fields and fill with defaults
  85. const maxInputTokens = modelConfig.maxInputTokens || this.envMaxChunkSize;
  86. const maxBatchSize = modelConfig.maxBatchSize || this.DEFAULTS.maxBatchSize;
  87. const expectedDimensions =
  88. modelConfig.dimensions ||
  89. parseInt(
  90. this.configService.get(
  91. 'DEFAULT_VECTOR_DIMENSIONS',
  92. String(this.DEFAULTS.expectedDimensions),
  93. ),
  94. );
  95. const providerName = modelConfig.providerName || 'unknown';
  96. const isVectorModel = modelConfig.isVectorModel || false;
  97. this.logger.log(
  98. this.i18nService.formatMessage('configLoaded', {
  99. name: modelConfig.name,
  100. id: modelConfig.modelId,
  101. }) +
  102. '\n' +
  103. ` - Provider: ${providerName}\n` +
  104. ` - Token limit: ${maxInputTokens}\n` +
  105. ` - Batch limit: ${maxBatchSize}\n` +
  106. ` - Vector dimensions: ${expectedDimensions}\n` +
  107. ` - Is vector model: ${isVectorModel}`,
  108. );
  109. return {
  110. maxInputTokens,
  111. maxBatchSize,
  112. expectedDimensions,
  113. providerName,
  114. isVectorModel,
  115. };
  116. }
  117. /**
  118. * Validate and fix chunk config
  119. * Priority: Environment variable limits > Model limits > User settings
  120. */
  121. async validateChunkConfig(
  122. chunkSize: number,
  123. chunkOverlap: number,
  124. modelId: string,
  125. userId: string,
  126. tenantId?: string,
  127. ): Promise<{
  128. chunkSize: number;
  129. chunkOverlap: number;
  130. warnings: string[];
  131. effectiveMaxChunkSize: number;
  132. effectiveMaxOverlapSize: number;
  133. }> {
  134. const warnings: string[] = [];
  135. const limits = await this.getModelLimits(modelId, userId, tenantId);
  136. // 1. Calculate final limits (choose smaller of env var and model limit)
  137. const effectiveMaxChunkSize = Math.min(
  138. this.envMaxChunkSize,
  139. limits.maxInputTokens,
  140. );
  141. const effectiveMaxOverlapSize = Math.min(
  142. this.envMaxOverlapSize,
  143. Math.floor(effectiveMaxChunkSize * this.DEFAULTS.maxOverlapRatio),
  144. );
  145. // 2. Validate chunk size upper limit
  146. if (chunkSize > effectiveMaxChunkSize) {
  147. const reason =
  148. this.envMaxChunkSize < limits.maxInputTokens
  149. ? `${this.i18nService.getMessage('environmentLimit')} ${this.envMaxChunkSize}`
  150. : `${this.i18nService.getMessage('modelLimit')} ${limits.maxInputTokens}`;
  151. warnings.push(
  152. this.i18nService.formatMessage('chunkOverflow', {
  153. size: chunkSize,
  154. max: effectiveMaxChunkSize,
  155. reason,
  156. }),
  157. );
  158. chunkSize = effectiveMaxChunkSize;
  159. }
  160. // 3. Validate chunk size lower limit
  161. if (chunkSize < this.DEFAULTS.minChunkSize) {
  162. warnings.push(
  163. this.i18nService.formatMessage('chunkUnderflow', {
  164. size: chunkSize,
  165. min: this.DEFAULTS.minChunkSize,
  166. }),
  167. );
  168. chunkSize = this.DEFAULTS.minChunkSize;
  169. }
  170. // 4. Validate overlap size upper limit (env var first)
  171. if (chunkOverlap > effectiveMaxOverlapSize) {
  172. warnings.push(
  173. this.i18nService.formatMessage('overlapOverflow', {
  174. size: chunkOverlap,
  175. max: effectiveMaxOverlapSize,
  176. }),
  177. );
  178. chunkOverlap = effectiveMaxOverlapSize;
  179. }
  180. // 5. Validate overlap doesn't exceed 50% of chunk size
  181. const maxOverlapByRatio = Math.floor(
  182. chunkSize * this.DEFAULTS.maxOverlapRatio,
  183. );
  184. if (chunkOverlap > maxOverlapByRatio) {
  185. warnings.push(
  186. this.i18nService.formatMessage('overlapRatioExceeded', {
  187. size: chunkOverlap,
  188. max: maxOverlapByRatio,
  189. }),
  190. );
  191. chunkOverlap = maxOverlapByRatio;
  192. }
  193. if (chunkOverlap < this.DEFAULTS.minChunkOverlap) {
  194. warnings.push(
  195. this.i18nService.formatMessage('overlapUnderflow', {
  196. size: chunkOverlap,
  197. min: this.DEFAULTS.minChunkOverlap,
  198. }),
  199. );
  200. chunkOverlap = this.DEFAULTS.minChunkOverlap;
  201. }
  202. // 6. Add safety check for batch processing
  203. // During batch processing, ensure total length of multiple texts doesn't exceed model limits
  204. const safetyMargin = 0.8; // 80% safety margin to leave space for batch processing
  205. const safeChunkSize = Math.floor(effectiveMaxChunkSize * safetyMargin);
  206. if (chunkSize > safeChunkSize) {
  207. warnings.push(
  208. this.i18nService.formatMessage('batchOverflowWarning', {
  209. safeSize: safeChunkSize,
  210. size: chunkSize,
  211. percent: Math.round(safetyMargin * 100),
  212. }),
  213. );
  214. }
  215. // 7. Check if estimated chunk count is reasonable
  216. const estimatedChunkCount = this.estimateChunkCount(
  217. 1000000, // Assume 1MB text
  218. chunkSize,
  219. );
  220. if (estimatedChunkCount > 50000) {
  221. warnings.push(
  222. this.i18nService.formatMessage('estimatedChunkCountExcessive', {
  223. count: estimatedChunkCount,
  224. }),
  225. );
  226. }
  227. return {
  228. chunkSize,
  229. chunkOverlap,
  230. warnings,
  231. effectiveMaxChunkSize,
  232. effectiveMaxOverlapSize,
  233. };
  234. }
  235. /**
  236. * Get recommended batch size
  237. */
  238. async getRecommendedBatchSize(
  239. modelId: string,
  240. userId: string,
  241. tenantId?: string,
  242. currentBatchSize: number = 100,
  243. ): Promise<number> {
  244. const limits = await this.getModelLimits(modelId, userId, tenantId);
  245. // Choose smaller of configured value and model limit
  246. const recommended = Math.min(
  247. currentBatchSize,
  248. limits.maxBatchSize,
  249. 200, // Safety upper limit
  250. );
  251. if (recommended < currentBatchSize) {
  252. this.logger.warn(
  253. this.i18nService.formatMessage('batchSizeAdjusted', {
  254. old: currentBatchSize,
  255. new: recommended,
  256. limit: limits.maxBatchSize,
  257. }),
  258. );
  259. }
  260. return Math.max(10, recommended); // Minimum 10
  261. }
  262. /**
  263. * Estimate chunk count
  264. */
  265. estimateChunkCount(textLength: number, chunkSize: number): number {
  266. const chunkSizeInChars = chunkSize * 4; // 1 token ≈ 4 chars
  267. return Math.ceil(textLength / chunkSizeInChars);
  268. }
  269. /**
  270. * Validate vector dimensions
  271. */
  272. async validateDimensions(
  273. modelId: string,
  274. userId: string,
  275. actualDimensions: number,
  276. tenantId?: string,
  277. ): Promise<boolean> {
  278. const limits = await this.getModelLimits(modelId, userId, tenantId);
  279. if (actualDimensions !== limits.expectedDimensions) {
  280. this.logger.warn(
  281. this.i18nService.formatMessage('dimensionMismatch', {
  282. id: modelId,
  283. expected: limits.expectedDimensions,
  284. actual: actualDimensions,
  285. }),
  286. );
  287. return false;
  288. }
  289. return true;
  290. }
  291. /**
  292. * Get config summary (for logging)
  293. */
  294. async getConfigSummary(
  295. chunkSize: number,
  296. chunkOverlap: number,
  297. modelId: string,
  298. userId: string,
  299. tenantId?: string,
  300. ): Promise<string> {
  301. const limits = await this.getModelLimits(modelId, userId, tenantId);
  302. return [
  303. `Model: ${modelId}`,
  304. `Chunk size: ${chunkSize} tokens (limit: ${limits.maxInputTokens})`,
  305. `Overlap size: ${chunkOverlap} tokens`,
  306. `Batch size: ${limits.maxBatchSize}`,
  307. `Vector dimensions: ${limits.expectedDimensions}`,
  308. ].join(', ');
  309. }
  310. /**
  311. * Get config limits for frontend
  312. * Used for frontend slider max value settings
  313. */
  314. async getFrontendLimits(
  315. modelId: string,
  316. userId: string,
  317. tenantId?: string,
  318. ): Promise<{
  319. maxChunkSize: number;
  320. maxOverlapSize: number;
  321. minOverlapSize: number;
  322. defaultChunkSize: number;
  323. defaultOverlapSize: number;
  324. modelInfo: {
  325. name: string;
  326. maxInputTokens: number;
  327. maxBatchSize: number;
  328. expectedDimensions: number;
  329. };
  330. }> {
  331. const limits = await this.getModelLimits(modelId, userId, tenantId);
  332. // Calculate final limits (choose smaller of env var and model limit)
  333. const maxChunkSize = Math.min(this.envMaxChunkSize, limits.maxInputTokens);
  334. const maxOverlapSize = Math.min(
  335. this.envMaxOverlapSize,
  336. Math.floor(maxChunkSize * this.DEFAULTS.maxOverlapRatio),
  337. );
  338. // Get model config name
  339. const modelConfig = await this.modelConfigService.findOne(
  340. modelId,
  341. userId,
  342. tenantId || '',
  343. );
  344. const modelName = modelConfig?.name || 'Unknown';
  345. // Get defaults from tenant or user settings
  346. let defaultChunkSize = this.DEFAULTS.chunkSize;
  347. let defaultOverlapSize = this.DEFAULTS.chunkOverlap;
  348. if (tenantId) {
  349. const tenantSettings = await this.tenantService.getSettings(tenantId);
  350. if (tenantSettings?.chunkSize)
  351. defaultChunkSize = tenantSettings.chunkSize;
  352. if (tenantSettings?.chunkOverlap)
  353. defaultOverlapSize = tenantSettings.chunkOverlap;
  354. }
  355. return {
  356. maxChunkSize,
  357. maxOverlapSize,
  358. minOverlapSize: this.DEFAULTS.minChunkOverlap,
  359. defaultChunkSize: Math.min(defaultChunkSize, maxChunkSize),
  360. defaultOverlapSize: Math.max(
  361. this.DEFAULTS.minChunkOverlap,
  362. Math.min(defaultOverlapSize, maxOverlapSize),
  363. ),
  364. modelInfo: {
  365. name: modelName,
  366. maxInputTokens: limits.maxInputTokens,
  367. maxBatchSize: limits.maxBatchSize,
  368. expectedDimensions: limits.expectedDimensions,
  369. },
  370. };
  371. }
  372. }