import { Injectable, Logger } from '@nestjs/common'; import { I18nService } from '../i18n/i18n.service'; import { ConfigService } from '@nestjs/config'; import { ChatOpenAI } from '@langchain/openai'; import { HumanMessage } from '@langchain/core/messages'; import * as fs from 'fs/promises'; import { VisionAnalysisResult, VisionModelConfig, BatchAnalysisResult, ImageDescription } from './vision.interface'; @Injectable() export class VisionService { private readonly logger = new Logger(VisionService.name); constructor( private configService: ConfigService, private i18nService: I18nService, ) { } /** * Analyze single image (document page) */ async analyzeImage( imagePath: string, modelConfig: VisionModelConfig, pageIndex?: number, ): Promise { const maxRetries = 3; const baseDelay = 3000; // 3 second base delay for (let attempt = 1; attempt <= maxRetries; attempt++) { try { return await this.performAnalysis(imagePath, modelConfig, pageIndex); } catch (error) { const isRetryableError = this.isRetryableError(error); if (attempt === maxRetries || !isRetryableError) { throw new Error(this.i18nService.formatMessage('visionAnalysisFailed', { message: error.message })); } const delay = baseDelay + Math.random() * 2000; // 3-5 second random delay this.logger.warn( `⚠️ Failed to analyze page ${pageIndex || '?'} (${attempt}/${maxRetries}), retrying in ${delay.toFixed(0)}ms: ${error.message}` ); await this.sleep(delay); } } // This line theoretically should not execute, but included to satisfy TypeScript throw new Error(this.i18nService.getMessage('retryMechanismError')); } /** * Perform actual image analysis */ private async performAnalysis( imagePath: string, modelConfig: VisionModelConfig, pageIndex?: number, ): Promise { try { // Load image and convert to base64 const imageBuffer = await fs.readFile(imagePath); const base64Image = imageBuffer.toString('base64'); const mimeType = this.getMimeType(imagePath); // Create vision model instance const model = new ChatOpenAI({ apiKey: modelConfig.apiKey, model: modelConfig.modelId, configuration: { baseURL: modelConfig.baseUrl, }, temperature: 0.1, // Reduce randomness, increase consistency }); // Build professional document analysis prompt const systemPrompt = this.i18nService.getMessage('visionSystemPrompt'); const message = new HumanMessage({ content: [ { type: 'text', text: systemPrompt, }, { type: 'image_url', image_url: { url: `data:${mimeType};base64,${base64Image}`, }, }, ], }); // Call model this.logger.log(this.i18nService.formatMessage('visionModelCall', { model: modelConfig.modelId, page: pageIndex || 'single' })); const response = await model.invoke([message]); let content = response.content as string; // Try to parse JSON let result: VisionAnalysisResult; try { // Clean up markdown code block tags content = content.replace(/```json/g, '').replace(/```/g, '').trim(); const parsed = JSON.parse(content); result = { text: parsed.text || '', images: parsed.images || [], layout: parsed.layout || 'unknown', confidence: parsed.confidence ?? 0.8, pageIndex, }; } catch (parseError) { // If parsing fails, treat entire content as text this.logger.warn(`Failed to parse JSON response for ${imagePath}, using raw text`); result = { text: content, images: [], layout: 'unknown', confidence: 0.5, pageIndex, }; } this.logger.log( this.i18nService.formatMessage('visionAnalysisSuccess', { path: imagePath, page: pageIndex ? ` (page ${pageIndex})` : '', textLen: result.text.length, imgCount: result.images.length, layout: result.layout, confidence: (result.confidence * 100).toFixed(1) }) ); return result; } catch (error) { this.logger.error( this.i18nService.formatMessage('visionAnalysisFailed', { message: error.message }) ); this.logger.error(`Vision analysis error details: ${error.stack}`); throw error; // Re-throw error for retry mechanism } } /** * Determine if error is retryable */ private isRetryableError(error: any): boolean { const errorMessage = error.message?.toLowerCase() || ''; const errorCode = error.status || error.code; // 429 rate limit error if (errorCode === 429 || errorMessage.includes('rate limit') || errorMessage.includes('too many requests')) { return true; } // 5xx server error if (errorCode >= 500 && errorCode < 600) { return true; } // Network related error if (errorMessage.includes('timeout') || errorMessage.includes('network') || errorMessage.includes('connection')) { return true; } return false; } /** * Sleep function */ private sleep(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)); } /** * Batch analyze multiple images */ async batchAnalyze( imagePaths: string[], modelConfig: VisionModelConfig, options: { startIndex?: number; skipQualityCheck?: boolean; onProgress?: (current: number, total: number, pageResult?: VisionAnalysisResult) => void; } = {}, ): Promise { const { startIndex = 1, skipQualityCheck = false, onProgress } = options; const results: VisionAnalysisResult[] = []; let successCount = 0; let failedCount = 0; this.logger.log(this.i18nService.formatMessage('batchAnalysisStarted', { count: imagePaths.length })); this.logger.log(`🔧 Model config: ${modelConfig.modelId} (${modelConfig.baseUrl || 'OpenAI'})`); for (let i = 0; i < imagePaths.length; i++) { const imagePath = imagePaths[i]; const pageIndex = startIndex + i; const progress = Math.round(((i + 1) / imagePaths.length) * 100); this.logger.log(`🖼️ Analyzing page ${pageIndex} (${i + 1}/${imagePaths.length}, ${progress}%)`); // Call progress callback if (onProgress) { onProgress(i + 1, imagePaths.length); } // Quality check(skip analysis if skipped) if (!skipQualityCheck) { const quality = await this.checkImageQuality(imagePath); if (!quality.isGood) { this.logger.warn(`⚠️ Skipped page ${pageIndex} (poor quality): ${quality.reason}`); failedCount++; continue; } else { this.logger.log(`✅ Page ${pageIndex} quality check passed (score: ${(quality.score || 0).toFixed(2)})`); } } try { this.logger.log(`🔍 Analyzing page ${pageIndex} with Vision model...`); const startTime = Date.now(); const result = await this.analyzeImage(imagePath, modelConfig, pageIndex); const duration = ((Date.now() - startTime) / 1000).toFixed(1); results.push(result); successCount++; this.logger.log( `✅ Page ${pageIndex} analysis completed (time: ${duration}s, ` + `text: ${result.text.length} chars, ` + `images: ${result.images.length}, ` + `confidence: ${(result.confidence * 100).toFixed(1)}%)` ); // Call progress callback with result if (onProgress) { onProgress(i + 1, imagePaths.length, result); } } catch (error) { this.logger.error(this.i18nService.formatMessage('pageAnalysisFailed', { page: pageIndex }) + `: ${error.message}`); failedCount++; } } // Calculate estimated cost (assuming $0.01 per image) const estimatedCost = successCount * 0.01; this.logger.log( `🎉 Vision batch analysis completed! ` + `✅ Success: ${successCount} pages, ❌ Failed: ${failedCount} pages, ` + `💰 Estimated cost: $${estimatedCost.toFixed(2)}` ); return { results, totalPages: imagePaths.length, successCount, failedCount, estimatedCost, }; } /** * Check image quality */ async checkImageQuality(imagePath: string): Promise<{ isGood: boolean; reason?: string; score?: number }> { try { const stats = await fs.stat(imagePath); const sizeKB = stats.size / 1024; // Check file size(5KB+) if (sizeKB < 5) { return { isGood: false, reason: `File too small (${sizeKB.toFixed(2)}KB)`, score: 0 }; } // Check file size limit(10MB) if (sizeKB > 10240) { return { isGood: false, reason: `File too large (${sizeKB.toFixed(2)}KB)`, score: 0 }; } // Simple quality scoring let score = 0.5; if (sizeKB > 50) score += 0.2; if (sizeKB > 100) score += 0.2; if (sizeKB > 500) score += 0.1; score = Math.min(score, 1.0); return { isGood: true, score }; } catch (error) { return { isGood: false, reason: this.i18nService.formatMessage('imageLoadError', { message: error.message }), score: 0 }; } } /** * Check if file is a supported image format */ isImageFile(mimetype: string): boolean { const imageMimeTypes = [ 'image/jpeg', 'image/jpg', 'image/png', 'image/gif', 'image/bmp', 'image/webp', ]; return imageMimeTypes.includes(mimetype); } /** * Get MIME type */ private getMimeType(filePath: string): string { const ext = filePath.toLowerCase().split('.').pop(); if (!ext) return 'image/jpeg'; const mimeTypes: Record = { jpg: 'image/jpeg', jpeg: 'image/jpeg', png: 'image/png', gif: 'image/gif', bmp: 'image/bmp', webp: 'image/webp', }; return mimeTypes[ext] || 'image/jpeg'; } /** * Legacy interface compatibility: extract content from single image */ async extractImageContent( imagePath: string, modelConfig: { baseUrl: string; apiKey: string; modelId: string }, ): Promise { const result = await this.analyzeImage(imagePath, modelConfig); return result.text; } }