| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344 |
- import { Injectable, Logger } from '@nestjs/common';
- import { I18nService } from '../i18n/i18n.service';
- import { ConfigService } from '@nestjs/config';
- import { ChatOpenAI } from '@langchain/openai';
- import { HumanMessage } from '@langchain/core/messages';
- import * as fs from 'fs/promises';
- import { VisionAnalysisResult, VisionModelConfig, BatchAnalysisResult, ImageDescription } from './vision.interface';
- @Injectable()
- export class VisionService {
- private readonly logger = new Logger(VisionService.name);
- constructor(
- private configService: ConfigService,
- private i18nService: I18nService,
- ) { }
- /**
- * Analyze single image (document page)
- */
- async analyzeImage(
- imagePath: string,
- modelConfig: VisionModelConfig,
- pageIndex?: number,
- ): Promise<VisionAnalysisResult> {
- const maxRetries = 3;
- const baseDelay = 3000; // 3 second base delay
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
- try {
- return await this.performAnalysis(imagePath, modelConfig, pageIndex);
- } catch (error) {
- const isRetryableError = this.isRetryableError(error);
- if (attempt === maxRetries || !isRetryableError) {
- throw new Error(this.i18nService.formatMessage('visionAnalysisFailed', { message: error.message }));
- }
- const delay = baseDelay + Math.random() * 2000; // 3-5 second random delay
- this.logger.warn(
- `⚠️ Failed to analyze page ${pageIndex || '?'} (${attempt}/${maxRetries}), retrying in ${delay.toFixed(0)}ms: ${error.message}`
- );
- await this.sleep(delay);
- }
- }
- // This line theoretically should not execute, but included to satisfy TypeScript
- throw new Error(this.i18nService.getMessage('retryMechanismError'));
- }
- /**
- * Perform actual image analysis
- */
- private async performAnalysis(
- imagePath: string,
- modelConfig: VisionModelConfig,
- pageIndex?: number,
- ): Promise<VisionAnalysisResult> {
- try {
- // Load image and convert to base64
- const imageBuffer = await fs.readFile(imagePath);
- const base64Image = imageBuffer.toString('base64');
- const mimeType = this.getMimeType(imagePath);
- // Create vision model instance
- const model = new ChatOpenAI({
- apiKey: modelConfig.apiKey,
- model: modelConfig.modelId,
- configuration: {
- baseURL: modelConfig.baseUrl,
- },
- temperature: 0.1, // Reduce randomness, increase consistency
- });
- // Build professional document analysis prompt
- const systemPrompt = this.i18nService.getMessage('visionSystemPrompt');
- const message = new HumanMessage({
- content: [
- {
- type: 'text',
- text: systemPrompt,
- },
- {
- type: 'image_url',
- image_url: {
- url: `data:${mimeType};base64,${base64Image}`,
- },
- },
- ],
- });
- // Call model
- this.logger.log(this.i18nService.formatMessage('visionModelCall', { model: modelConfig.modelId, page: pageIndex || 'single' }));
- const response = await model.invoke([message]);
- let content = response.content as string;
- // Try to parse JSON
- let result: VisionAnalysisResult;
- try {
- // Clean up markdown code block tags
- content = content.replace(/```json/g, '').replace(/```/g, '').trim();
- const parsed = JSON.parse(content);
- result = {
- text: parsed.text || '',
- images: parsed.images || [],
- layout: parsed.layout || 'unknown',
- confidence: parsed.confidence ?? 0.8,
- pageIndex,
- };
- } catch (parseError) {
- // If parsing fails, treat entire content as text
- this.logger.warn(`Failed to parse JSON response for ${imagePath}, using raw text`);
- result = {
- text: content,
- images: [],
- layout: 'unknown',
- confidence: 0.5,
- pageIndex,
- };
- }
- this.logger.log(
- this.i18nService.formatMessage('visionAnalysisSuccess', {
- path: imagePath,
- page: pageIndex ? ` (page ${pageIndex})` : '',
- textLen: result.text.length,
- imgCount: result.images.length,
- layout: result.layout,
- confidence: (result.confidence * 100).toFixed(1)
- })
- );
- return result;
- } catch (error) {
- this.logger.error(
- this.i18nService.formatMessage('visionAnalysisFailed', {
- message: error.message
- })
- );
- this.logger.error(`Vision analysis error details: ${error.stack}`);
- throw error; // Re-throw error for retry mechanism
- }
- }
- /**
- * Determine if error is retryable
- */
- private isRetryableError(error: any): boolean {
- const errorMessage = error.message?.toLowerCase() || '';
- const errorCode = error.status || error.code;
- // 429 rate limit error
- if (errorCode === 429 || errorMessage.includes('rate limit') || errorMessage.includes('too many requests')) {
- return true;
- }
- // 5xx server error
- if (errorCode >= 500 && errorCode < 600) {
- return true;
- }
- // Network related error
- if (errorMessage.includes('timeout') || errorMessage.includes('network') || errorMessage.includes('connection')) {
- return true;
- }
- return false;
- }
- /**
- * Sleep function
- */
- private sleep(ms: number): Promise<void> {
- return new Promise(resolve => setTimeout(resolve, ms));
- }
- /**
- * Batch analyze multiple images
- */
- async batchAnalyze(
- imagePaths: string[],
- modelConfig: VisionModelConfig,
- options: {
- startIndex?: number;
- skipQualityCheck?: boolean;
- onProgress?: (current: number, total: number, pageResult?: VisionAnalysisResult) => void;
- } = {},
- ): Promise<BatchAnalysisResult> {
- const { startIndex = 1, skipQualityCheck = false, onProgress } = options;
- const results: VisionAnalysisResult[] = [];
- let successCount = 0;
- let failedCount = 0;
- this.logger.log(this.i18nService.formatMessage('batchAnalysisStarted', { count: imagePaths.length }));
- this.logger.log(`🔧 Model config: ${modelConfig.modelId} (${modelConfig.baseUrl || 'OpenAI'})`);
- for (let i = 0; i < imagePaths.length; i++) {
- const imagePath = imagePaths[i];
- const pageIndex = startIndex + i;
- const progress = Math.round(((i + 1) / imagePaths.length) * 100);
- this.logger.log(`🖼️ Analyzing page ${pageIndex} (${i + 1}/${imagePaths.length}, ${progress}%)`);
- // Call progress callback
- if (onProgress) {
- onProgress(i + 1, imagePaths.length);
- }
- // Quality check(skip analysis if skipped)
- if (!skipQualityCheck) {
- const quality = await this.checkImageQuality(imagePath);
- if (!quality.isGood) {
- this.logger.warn(`⚠️ Skipped page ${pageIndex} (poor quality): ${quality.reason}`);
- failedCount++;
- continue;
- } else {
- this.logger.log(`✅ Page ${pageIndex} quality check passed (score: ${(quality.score || 0).toFixed(2)})`);
- }
- }
- try {
- this.logger.log(`🔍 Analyzing page ${pageIndex} with Vision model...`);
- const startTime = Date.now();
- const result = await this.analyzeImage(imagePath, modelConfig, pageIndex);
- const duration = ((Date.now() - startTime) / 1000).toFixed(1);
- results.push(result);
- successCount++;
- this.logger.log(
- `✅ Page ${pageIndex} analysis completed (time: ${duration}s, ` +
- `text: ${result.text.length} chars, ` +
- `images: ${result.images.length}, ` +
- `confidence: ${(result.confidence * 100).toFixed(1)}%)`
- );
- // Call progress callback with result
- if (onProgress) {
- onProgress(i + 1, imagePaths.length, result);
- }
- } catch (error) {
- this.logger.error(this.i18nService.formatMessage('pageAnalysisFailed', { page: pageIndex }) + `: ${error.message}`);
- failedCount++;
- }
- }
- // Calculate estimated cost (assuming $0.01 per image)
- const estimatedCost = successCount * 0.01;
- this.logger.log(
- `🎉 Vision batch analysis completed! ` +
- `✅ Success: ${successCount} pages, ❌ Failed: ${failedCount} pages, ` +
- `💰 Estimated cost: $${estimatedCost.toFixed(2)}`
- );
- return {
- results,
- totalPages: imagePaths.length,
- successCount,
- failedCount,
- estimatedCost,
- };
- }
- /**
- * Check image quality
- */
- async checkImageQuality(imagePath: string): Promise<{ isGood: boolean; reason?: string; score?: number }> {
- try {
- const stats = await fs.stat(imagePath);
- const sizeKB = stats.size / 1024;
- // Check file size(5KB+)
- if (sizeKB < 5) {
- return { isGood: false, reason: `File too small (${sizeKB.toFixed(2)}KB)`, score: 0 };
- }
- // Check file size limit(10MB)
- if (sizeKB > 10240) {
- return { isGood: false, reason: `File too large (${sizeKB.toFixed(2)}KB)`, score: 0 };
- }
- // Simple quality scoring
- let score = 0.5;
- if (sizeKB > 50) score += 0.2;
- if (sizeKB > 100) score += 0.2;
- if (sizeKB > 500) score += 0.1;
- score = Math.min(score, 1.0);
- return { isGood: true, score };
- } catch (error) {
- return { isGood: false, reason: this.i18nService.formatMessage('imageLoadError', { message: error.message }), score: 0 };
- }
- }
- /**
- * Check if file is a supported image format
- */
- isImageFile(mimetype: string): boolean {
- const imageMimeTypes = [
- 'image/jpeg',
- 'image/jpg',
- 'image/png',
- 'image/gif',
- 'image/bmp',
- 'image/webp',
- ];
- return imageMimeTypes.includes(mimetype);
- }
- /**
- * Get MIME type
- */
- private getMimeType(filePath: string): string {
- const ext = filePath.toLowerCase().split('.').pop();
- if (!ext) return 'image/jpeg';
- const mimeTypes: Record<string, string> = {
- jpg: 'image/jpeg',
- jpeg: 'image/jpeg',
- png: 'image/png',
- gif: 'image/gif',
- bmp: 'image/bmp',
- webp: 'image/webp',
- };
- return mimeTypes[ext] || 'image/jpeg';
- }
- /**
- * Legacy interface compatibility: extract content from single image
- */
- async extractImageContent(
- imagePath: string,
- modelConfig: { baseUrl: string; apiKey: string; modelId: string },
- ): Promise<string> {
- const result = await this.analyzeImage(imagePath, modelConfig);
- return result.text;
- }
- }
|