const fs = require('fs'); const { execSync } = require('child_process'); const path = require('path'); const puppeteer = require('puppeteer'); console.log('=== MD to PDF Converter Starting ==='); console.log('Node.js version:', process.version); console.log('Working directory:', process.cwd()); console.log('Input path:', process.argv[2]); console.log('Output path:', process.argv[3]); // Arguments: node md_to_pdf.js const inputPath = process.argv[2]; const outputPath = process.argv[3]; if (!inputPath || !outputPath) { console.error('Usage: node md_to_pdf.js '); process.exit(1); } console.log(`Processing Markdown: ${inputPath}`); (async () => { try { console.log('Reading input file...'); let mdContent = fs.readFileSync(inputPath, 'utf8'); console.log(`File read successfully, length: ${mdContent.length} characters`); // 1. Protect Math Blocks const mathBlocks = []; const placeholderPrefix = 'MATHBLOCK_PLACEHOLDER_'; mdContent = mdContent.replace(/\$\$([\s\S]*?)\$\$/g, (match, p1) => { const id = mathBlocks.length; mathBlocks.push(`$$${p1}$$`); return `${placeholderPrefix}${id}`; }); mdContent = mdContent.replace(/\$([^\$\n]+?)\$/g, (match, p1) => { const id = mathBlocks.length; mathBlocks.push(`$${p1}$`); return `${placeholderPrefix}${id}`; }); console.log(`Protected ${mathBlocks.length} math blocks`); // 2. Convert to HTML using marked (CLI via npx or library?) // Since we are in a container, we should use the library directly if possible, // but the reference uses npx. To avoid npx/network dependency at runtime, // we will require 'marked' from node_modules (assuming we verify it's installed). const marked = require('marked'); console.log('Parsing markdown content...'); let finalHtml = marked.parse(mdContent); console.log('Markdown parsed successfully'); // 3. Restore Math Blocks mathBlocks.forEach((block, index) => { finalHtml = finalHtml.replace(`${placeholderPrefix}${index}`, block); }); // 4. Fix Mermaid syntax finalHtml = finalHtml.replace( /
([\s\S]*?)<\/code><\/pre>/g,
            (match, content) => {
                content = content.replace(/"/g, '"')
                    .replace(/'/g, "'")
                    .replace(/>/g, '>')
                    .replace(/</g, '<')
                    .replace(/&/g, '&');
                return `
${content}
`; } ); // 5. Wrap in Template const template = ` Document ${finalHtml} `; console.log('Template prepared, starting PDF generation...'); // 6. Generate PDF with Puppeteer console.log('Starting Puppeteer browser launch...'); const browser = await puppeteer.launch({ executablePath: '/usr/bin/chromium-browser', // Alpine location args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-background-timer-throttling', '--disable-renderer-backgrounding', '--disable-backgrounding-occluded-windows', '--memory-pressure-off', '--js-flags=--max-old-space-size=4096', // 增加内存限制 '--enable-features=NetworkService', '--disable-features=VizDisplayCompositor', '--disable-gpu', '--disable-web-security', '--disable-features=VizDisplayCompositor' ], headless: 'new', timeout: 120000 // Increased timeout for containerized environment }); console.log('Browser launched successfully'); const page = await browser.newPage(); console.log('Page created successfully'); // ページのビューポートとユーザーエージェントを設定 await page.setViewport({ width: 1200, height: 800 }); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'); console.log('Viewport and user agent set'); // さまざまなタイムアウトを設定 - 長時間の待機を避けるためにデフォルト値を低下 await page.setDefaultNavigationTimeout(30000); // 30秒 await page.setDefaultTimeout(30000); // 30秒 console.log('Timeouts configured'); // すべての外部リソースの読み込みをブロックするリクエストをインターセプト await page.setRequestInterception(true); page.on('request', (req) => { // すべての外部リソース要求を完全にブロック(CDNリソースを含む)してネットワークタイムアウトを回避 const url = req.url(); if (url.startsWith('http') || url.startsWith('https') || url.startsWith('ftp')) { // すべての外部リクエストに空白のレスポンスを返して、ネットワークタイムアウトエラーを回避 req.respond({ status: 200, contentType: 'text/plain', body: '' }).catch(() => {}); } else { // ローカルおよびdata URLリソースを許可 req.continue().catch(() => {}); } }); console.log('Request interception configured to block all external resources'); // エラーイベントを監視 page.on('error', (error) => { console.error('Page error:', error); }); page.on('pageerror', (error) => { console.error('Page error event:', error); }); page.on('console', (msg) => { console.log('Browser console:', msg.text()); }); console.log('Error listeners attached'); // 再試行メカニズム let success = false; let attempts = 0; const maxAttempts = 3; while (!success && attempts < maxAttempts) { attempts++; console.log(`Attempt ${attempts} of ${maxAttempts} for PDF generation...`); console.log(`HTML template length: ${template.length} characters`); try { console.log('About to navigate to data URL...'); // 外部リソースを待たずに高速なナビゲーションオプションを使用 await page.goto(`data:text/html;charset=UTF-8,${encodeURIComponent(template)}`, { waitUntil: 'domcontentloaded', // 等待DOM加载完成,但不等待资源 timeout: 30000 // Reduced timeout for faster failure }); console.log('Page loaded successfully'); // 画像の読み込みを待機(タイムアウトあり、読み込み失敗の画像は素早くスキップ) try { console.log('Checking for images to load...'); await page.evaluate(async () => { const images = Array.from(document.querySelectorAll('img')); console.log(`Found ${images.length} images on the page`); if (images.length > 0) { // すべての画像の読み込みを待つのではなく、短時間だけ待って次に進む await new Promise((resolve) => { setTimeout(() => { console.log(`Continuing after attempting to load ${images.length} images`); resolve(); }, 500); // 只等待500ms,不管图像是否加载完成 }); } }); } catch (e) { console.warn('Error checking images:', e.message); } // MathJaxのレンダリングを待機(タイムアウトあり) console.log('Checking for MathJax...'); let mathjaxFinished = false; let mermaidProcessed = false; // 移动变量声明到这里 try { // ページに数式が含まれているか確認(MathJaxは通常、$...$または$$...$$形式の数式を処理します) const hasMathContent = await page.evaluate(() => { const html = document.documentElement.innerHTML; // 数学記号のタグを確認 return html.includes('$') || html.includes('\\(') || html.includes('\\[') || html.includes('\\begin{') || html.includes('math-tex') || document.querySelectorAll('mjx-container').length > 0 || document.querySelectorAll('[class*="math"]').length > 0; }); console.log(`Math content found: ${hasMathContent}`); if (hasMathContent) { console.log('Math content detected, waiting for MathJax...'); // 特定のセレクタを無限に待つのではなく、MathJaxの初期化に合理的な時間を待機 await new Promise(r => setTimeout(r, 1000)); // 短暂等待1秒 // MathJaxが存在するか再度確認 const mathjaxExists = await page.evaluate(() => typeof window.MathJax !== 'undefined'); if (mathjaxExists) { // MathJaxが存在する場合、レンダリング完了を待機 await page.evaluate(async () => { if (window.MathJax && window.MathJax.Hub) { await window.MathJax.Hub.Queue(['Typeset', window.MathJax.Hub]); } else if (window.MathJax && window.MathJax.typesetPromise) { await window.MathJax.typesetPromise(); } }); console.log('MathJax typesetting completed'); mathjaxFinished = true; } else { console.log('MathJax not found after content check'); } } else { console.log('No math content found, skipping MathJax wait'); } } catch (e) { console.warn('Error checking MathJax:', e.message); } // MathJaxが完了していない場合、追加の時間を待機 if (!mathjaxFinished) { console.log('Waiting 1 second before generating PDF...'); await new Promise(r => setTimeout(r, 1000)); } // Mermaidが完了していない場合、追加の時間を待機 if (!mermaidProcessed) { console.log('Waiting 1 second before generating PDF...'); await new Promise(r => setTimeout(r, 1000)); } // 等待 Mermaid 图表渲染 console.log('Checking for Mermaid diagrams...'); try { // ページにMermaidチャートコンテナがあるか確認 const mermaidElementsCount = await page.evaluate(() => document.querySelectorAll('.mermaid').length); console.log(`Mermaid diagrams found: ${mermaidElementsCount > 0}`); if (mermaidElementsCount > 0) { console.log(`Processing ${mermaidElementsCount} Mermaid diagrams...`); // Mermaidライブラリが存在するか確認し、初期化を試みる const mermaidExists = await page.evaluate(() => typeof mermaid !== 'undefined'); if (mermaidExists) { console.log('Mermaid library found, attempting to initialize...'); await page.evaluate(async () => { // mermaidオブジェクトが存在するか確認 if (typeof mermaid !== 'undefined' && mermaid.init) { try { // Mermaidチャートの初期化を試みる mermaid.init(undefined, '.mermaid'); } catch (e) { console.log('Mermaid init error:', e.message); } } else { console.log('Mermaid library not fully loaded, skipping initialization'); } // レンダリング完了を待機(最大5秒) const startTime = Date.now(); while (Date.now() - startTime < 5000) { // 未完成のMermaidチャートがあるか確認 const incompleteCharts = document.querySelectorAll('.mermaid:not(.mermaid-loaded)'); if (incompleteCharts.length === 0) { break; } // 等待一小段时间后重试 await new Promise(r => setTimeout(r, 100)); } }); } else { console.log('Mermaid library not found in document, skipping processing'); } console.log('Mermaid diagrams processed'); mermaidProcessed = true; } else { console.log('No Mermaid diagrams found, skipping wait'); } } catch (e) { console.warn('Error processing Mermaid:', e.message); } // 等待页面基本渲染完成(不等待所有外部资源) console.log('Waiting for basic page content to be loaded...'); try { // complete状態ではなくDOMContentLoadedイベントを待機 await page.waitForFunction(() => document.readyState !== 'loading', { timeout: 10000 }); // Reduced timeout console.log('Page DOM loaded, readyState is not loading'); } catch (e) { console.warn('DOM did not finish loading, continuing...', e.message); } // 确保所有异步操作完成后再生成PDF console.log('Waiting 2 seconds before generating PDF...'); await new Promise(r => setTimeout(r, 2000)); console.log('Generating PDF file...'); await page.pdf({ path: outputPath, format: 'A4', printBackground: true, scale: 0.75, // Scale down to fit more content margin: { top: '10mm', right: '10mm', bottom: '10mm', left: '10mm' }, timeout: 120000 }); console.log('PDF generated successfully'); success = true; console.log(`PDF successfully generated at ${outputPath}`); } catch (error) { console.error(`Attempt ${attempts} failed:`, error.message); console.error(`Error stack:`, error.stack); // 致命的なエラーの場合は再試行不要 if (error.message.includes('Protocol error') || error.message.includes('Target closed') || error.message.includes('Browser closed') || error.message.includes('Connection closed') || error.message.includes('Navigation failed') || error.message.includes('net::ERR_CONNECTION_CLOSED')) { console.error('Fatal browser error occurred, aborting retries'); throw error; } if (attempts >= maxAttempts) { // すべての再試行が失敗した場合、最も簡略化された方法を試す console.log('All attempts failed, trying most basic PDF generation...'); console.log('Creating a new page for basic method...'); // 重新创建页面以确保干净的状态 const basicPage = await browser.newPage(); await basicPage.setViewport({ width: 1200, height: 800 }); await basicPage.setDefaultNavigationTimeout(60000); await basicPage.goto(`data:text/html;charset=UTF-8,${encodeURIComponent(template)}`, { waitUntil: 'domcontentloaded', timeout: 120000 // Increased timeout for containerized environment }); // 等待一段较短的时间 console.log('Waiting 2 seconds in basic method...'); await new Promise(r => setTimeout(r, 2000)); try { console.log('Generating PDF with basic method...'); await basicPage.pdf({ path: outputPath, format: 'A4', printBackground: true, scale: 0.75, margin: { top: '10mm', right: '10mm', bottom: '10mm', left: '10mm' }, timeout: 300000 // Increased timeout for containerized environment }); success = true; console.log(`PDF generated using basic method at ${outputPath}`); await basicPage.close(); } catch (basicError) { console.error('Basic PDF generation also failed:', basicError.message); console.error('Basic error stack:', basicError.stack); await basicPage.close(); throw basicError; } } else { // 一定時間待機してから再試行(システムが復旧する時間を与える) const delay = 10000 * attempts; // 逐次的に遅延時間を増加 console.log(`Waiting ${delay}ms before retry...`); await new Promise(r => setTimeout(r, delay)); } } } console.log('Closing browser...'); await browser.close(); console.log('Browser closed'); console.log('=== MD to PDF Conversion Completed Successfully ==='); } catch (err) { console.error('Error during conversion:', err); console.error('Error stack:', err.stack); process.exit(1); } })();