anhuiqiang
/
KnowledgeBase


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
							import { Injectable, Logger } from '@nestjs/common';
import { I18nService } from '../i18n/i18n.service';
import { ConfigService } from '@nestjs/config';
import { ChatOpenAI } from '@langchain/openai';
import { HumanMessage } from '@langchain/core/messages';
import * as fs from 'fs/promises';
import { VisionAnalysisResult, VisionModelConfig, BatchAnalysisResult, ImageDescription } from './vision.interface';

@Injectable()
export class VisionService {
  private readonly logger = new Logger(VisionService.name);

  constructor(
    private configService: ConfigService,
    private i18nService: I18nService,
  ) { }

  /**
   * Analyze single image (document page)
   */
  async analyzeImage(
    imagePath: string,
    modelConfig: VisionModelConfig,
    pageIndex?: number,
  ): Promise<VisionAnalysisResult> {
    const maxRetries = 3;
    const baseDelay = 3000; // 3 second base delay

    for (let attempt = 1; attempt <= maxRetries; attempt++) {
      try {
        return await this.performAnalysis(imagePath, modelConfig, pageIndex);
      } catch (error) {
        const isRetryableError = this.isRetryableError(error);

        if (attempt === maxRetries || !isRetryableError) {
          throw new Error(this.i18nService.formatMessage('visionAnalysisFailed', { message: error.message }));
        }

        const delay = baseDelay + Math.random() * 2000; // 3-5 second random delay
        this.logger.warn(
          `⚠️ Failed to analyze page ${pageIndex || '?'} (${attempt}/${maxRetries}), retrying in ${delay.toFixed(0)}ms: ${error.message}`
        );

        await this.sleep(delay);
      }
    }

    // This line theoretically should not execute, but included to satisfy TypeScript
    throw new Error(this.i18nService.getMessage('retryMechanismError'));
  }

  /**
   * Perform actual image analysis
   */
  private async performAnalysis(
    imagePath: string,
    modelConfig: VisionModelConfig,
    pageIndex?: number,
  ): Promise<VisionAnalysisResult> {
    try {
      // Load image and convert to base64
      const imageBuffer = await fs.readFile(imagePath);
      const base64Image = imageBuffer.toString('base64');
      const mimeType = this.getMimeType(imagePath);

      // Create vision model instance
      const model = new ChatOpenAI({
        apiKey: modelConfig.apiKey,
        model: modelConfig.modelId,
        configuration: {
          baseURL: modelConfig.baseUrl,
        },
        temperature: 0.1, // Reduce randomness, increase consistency
      });

      // Build professional document analysis prompt
      const systemPrompt = this.i18nService.getMessage('visionSystemPrompt');

      const message = new HumanMessage({
        content: [
          {
            type: 'text',
            text: systemPrompt,
          },
          {
            type: 'image_url',
            image_url: {
              url: `data:${mimeType};base64,${base64Image}`,
            },
          },
        ],
      });

      // Call model
      this.logger.log(this.i18nService.formatMessage('visionModelCall', { model: modelConfig.modelId, page: pageIndex || 'single' }));
      const response = await model.invoke([message]);
      let content = response.content as string;

      // Try to parse JSON
      let result: VisionAnalysisResult;
      try {
        // Clean up markdown code block tags
        content = content.replace(/```json/g, '').replace(/```/g, '').trim();
        const parsed = JSON.parse(content);

        result = {
          text: parsed.text || '',
          images: parsed.images || [],
          layout: parsed.layout || 'unknown',
          confidence: parsed.confidence ?? 0.8,
          pageIndex,
        };
      } catch (parseError) {
        // If parsing fails, treat entire content as text
        this.logger.warn(`Failed to parse JSON response for ${imagePath}, using raw text`);
        result = {
          text: content,
          images: [],
          layout: 'unknown',
          confidence: 0.5,
          pageIndex,
        };
      }

      this.logger.log(
        this.i18nService.formatMessage('visionAnalysisSuccess', {
          path: imagePath,
          page: pageIndex ? ` (page ${pageIndex})` : '',
          textLen: result.text.length,
          imgCount: result.images.length,
          layout: result.layout,
          confidence: (result.confidence * 100).toFixed(1)
        })
      );

      return result;
    } catch (error) {
      this.logger.error(
        this.i18nService.formatMessage('visionAnalysisFailed', { 
          message: error.message 
        })
      );
      this.logger.error(`Vision analysis error details: ${error.stack}`);
      throw error; // Re-throw error for retry mechanism
    }
  }

  /**
   * Determine if error is retryable
   */
  private isRetryableError(error: any): boolean {
    const errorMessage = error.message?.toLowerCase() || '';
    const errorCode = error.status || error.code;

    // 429 rate limit error
    if (errorCode === 429 || errorMessage.includes('rate limit') || errorMessage.includes('too many requests')) {
      return true;
    }

    // 5xx server error
    if (errorCode >= 500 && errorCode < 600) {
      return true;
    }

    // Network related error
    if (errorMessage.includes('timeout') || errorMessage.includes('network') || errorMessage.includes('connection')) {
      return true;
    }

    return false;
  }

  /**
   * Sleep function
   */
  private sleep(ms: number): Promise<void> {
    return new Promise(resolve => setTimeout(resolve, ms));
  }

  /**
   * Batch analyze multiple images
   */
  async batchAnalyze(
    imagePaths: string[],
    modelConfig: VisionModelConfig,
    options: {
      startIndex?: number;
      skipQualityCheck?: boolean;
      onProgress?: (current: number, total: number, pageResult?: VisionAnalysisResult) => void;
    } = {},
  ): Promise<BatchAnalysisResult> {
    const { startIndex = 1, skipQualityCheck = false, onProgress } = options;
    const results: VisionAnalysisResult[] = [];
    let successCount = 0;
    let failedCount = 0;

    this.logger.log(this.i18nService.formatMessage('batchAnalysisStarted', { count: imagePaths.length }));
    this.logger.log(`🔧 Model config: ${modelConfig.modelId} (${modelConfig.baseUrl || 'OpenAI'})`);

    for (let i = 0; i < imagePaths.length; i++) {
      const imagePath = imagePaths[i];
      const pageIndex = startIndex + i;
      const progress = Math.round(((i + 1) / imagePaths.length) * 100);

      this.logger.log(`🖼️  Analyzing page ${pageIndex} (${i + 1}/${imagePaths.length}, ${progress}%)`);

      // Call progress callback
      if (onProgress) {
        onProgress(i + 1, imagePaths.length);
      }

      // Quality check(skip analysis if skipped)
      if (!skipQualityCheck) {
        const quality = await this.checkImageQuality(imagePath);
        if (!quality.isGood) {
          this.logger.warn(`⚠️  Skipped page ${pageIndex} (poor quality): ${quality.reason}`);
          failedCount++;
          continue;
        } else {
          this.logger.log(`✅ Page ${pageIndex} quality check passed (score: ${(quality.score || 0).toFixed(2)})`);
        }
      }

      try {
        this.logger.log(`🔍 Analyzing page ${pageIndex} with Vision model...`);
        const startTime = Date.now();
        const result = await this.analyzeImage(imagePath, modelConfig, pageIndex);
        const duration = ((Date.now() - startTime) / 1000).toFixed(1);

        results.push(result);
        successCount++;

        this.logger.log(
          `✅ Page ${pageIndex} analysis completed (time: ${duration}s, ` +
          `text: ${result.text.length} chars, ` +
          `images: ${result.images.length}, ` +
          `confidence: ${(result.confidence * 100).toFixed(1)}%)`
        );

        // Call progress callback with result
        if (onProgress) {
          onProgress(i + 1, imagePaths.length, result);
        }
      } catch (error) {
        this.logger.error(this.i18nService.formatMessage('pageAnalysisFailed', { page: pageIndex }) + `: ${error.message}`);
        failedCount++;
      }
    }

    // Calculate estimated cost (assuming $0.01 per image)
    const estimatedCost = successCount * 0.01;

    this.logger.log(
      `🎉 Vision batch analysis completed! ` +
      `✅ Success: ${successCount} pages, ❌ Failed: ${failedCount} pages, ` +
      `💰 Estimated cost: $${estimatedCost.toFixed(2)}`
    );

    return {
      results,
      totalPages: imagePaths.length,
      successCount,
      failedCount,
      estimatedCost,
    };
  }

  /**
   * Check image quality
   */
  async checkImageQuality(imagePath: string): Promise<{ isGood: boolean; reason?: string; score?: number }> {
    try {
      const stats = await fs.stat(imagePath);
      const sizeKB = stats.size / 1024;

      // Check file size(5KB+)
      if (sizeKB < 5) {
        return { isGood: false, reason: `File too small (${sizeKB.toFixed(2)}KB)`, score: 0 };
      }

      // Check file size limit(10MB)
      if (sizeKB > 10240) {
        return { isGood: false, reason: `File too large (${sizeKB.toFixed(2)}KB)`, score: 0 };
      }

      // Simple quality scoring
      let score = 0.5;
      if (sizeKB > 50) score += 0.2;
      if (sizeKB > 100) score += 0.2;
      if (sizeKB > 500) score += 0.1;

      score = Math.min(score, 1.0);

      return { isGood: true, score };
    } catch (error) {
      return { isGood: false, reason: this.i18nService.formatMessage('imageLoadError', { message: error.message }), score: 0 };
    }
  }

  /**
   * Check if file is a supported image format
   */
  isImageFile(mimetype: string): boolean {
    const imageMimeTypes = [
      'image/jpeg',
      'image/jpg',
      'image/png',
      'image/gif',
      'image/bmp',
      'image/webp',
    ];
    return imageMimeTypes.includes(mimetype);
  }

  /**
   * Get MIME type
   */
  private getMimeType(filePath: string): string {
    const ext = filePath.toLowerCase().split('.').pop();
    if (!ext) return 'image/jpeg';

    const mimeTypes: Record<string, string> = {
      jpg: 'image/jpeg',
      jpeg: 'image/jpeg',
      png: 'image/png',
      gif: 'image/gif',
      bmp: 'image/bmp',
      webp: 'image/webp',
    };
    return mimeTypes[ext] || 'image/jpeg';
  }

  /**
   * Legacy interface compatibility: extract content from single image
   */
  async extractImageContent(
    imagePath: string,
    modelConfig: { baseUrl: string; apiKey: string; modelId: string },
  ): Promise<string> {
    const result = await this.analyzeImage(imagePath, modelConfig);
    return result.text;
  }
}