| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498 |
- const fs = require('fs');
- const { execSync } = require('child_process');
- const path = require('path');
- const puppeteer = require('puppeteer');
- console.log('=== MD to PDF Converter Starting ===');
- console.log('Node.js version:', process.version);
- console.log('Working directory:', process.cwd());
- console.log('Input path:', process.argv[2]);
- console.log('Output path:', process.argv[3]);
- // Arguments: node md_to_pdf.js <input_md_path> <output_pdf_path>
- const inputPath = process.argv[2];
- const outputPath = process.argv[3];
- if (!inputPath || !outputPath) {
- console.error('Usage: node md_to_pdf.js <input_md_path> <output_pdf_path>');
- process.exit(1);
- }
- console.log(`Processing Markdown: ${inputPath}`);
- (async () => {
- try {
- console.log('Reading input file...');
- let mdContent = fs.readFileSync(inputPath, 'utf8');
- console.log(`File read successfully, length: ${mdContent.length} characters`);
- // 1. Protect Math Blocks
- const mathBlocks = [];
- const placeholderPrefix = 'MATHBLOCK_PLACEHOLDER_';
- mdContent = mdContent.replace(/\$\$([\s\S]*?)\$\$/g, (match, p1) => {
- const id = mathBlocks.length;
- mathBlocks.push(`$$${p1}$$`);
- return `${placeholderPrefix}${id}`;
- });
- mdContent = mdContent.replace(/\$([^\$\n]+?)\$/g, (match, p1) => {
- const id = mathBlocks.length;
- mathBlocks.push(`$${p1}$`);
- return `${placeholderPrefix}${id}`;
- });
- console.log(`Protected ${mathBlocks.length} math blocks`);
- // 2. Convert to HTML using marked (CLI via npx or library?)
- // Since we are in a container, we should use the library directly if possible,
- // but the reference uses npx. To avoid npx/network dependency at runtime,
- // we will require 'marked' from node_modules (assuming we verify it's installed).
- const marked = require('marked');
- console.log('Parsing markdown content...');
- let finalHtml = marked.parse(mdContent);
- console.log('Markdown parsed successfully');
- // 3. Restore Math Blocks
- mathBlocks.forEach((block, index) => {
- finalHtml = finalHtml.replace(`${placeholderPrefix}${index}`, block);
- });
- // 4. Fix Mermaid syntax
- finalHtml = finalHtml.replace(
- /<pre><code class="language-mermaid">([\s\S]*?)<\/code><\/pre>/g,
- (match, content) => {
- content = content.replace(/"/g, '"')
- .replace(/'/g, "'")
- .replace(/>/g, '>')
- .replace(/</g, '<')
- .replace(/&/g, '&');
- return `<div class="mermaid">${content}</div>`;
- }
- );
- // 5. Wrap in Template
- const template = `
- <!DOCTYPE html>
- <html lang="zh-CN">
- <head>
- <meta charset="UTF-8">
- <title>Document</title>
- <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/github-markdown-css/5.2.0/github-markdown-light.min.css">
- <!-- Mermaid -->
- <script src="https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js"></script>
- <!-- MathJax -->
- <script>
- window.MathJax = {
- tex: {
- inlineMath: [['$', '$'], ['\\\\(', '\\\\)']],
- displayMath: [['$$', '$$'], ['\\\\[', '\\\\]']],
- processEscapes: false
- },
- startup: {
- pageReady: () => {
- return MathJax.startup.defaultPageReady().then(() => {
- const div = document.createElement('div');
- div.id = 'mathjax-finished';
- div.style.display = 'none';
- document.body.appendChild(div);
- });
- }
- }
- };
- </script>
- <script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"></script>
- <style>
- body {
- box-sizing: border-box;
- margin: 0 auto;
- padding: 20px;
- }
- .mermaid {
- display: flex;
- justify-content: center;
- margin: 20px 0;
- }
- table {
- width: 100% !important;
- display: table !important;
- }
- </style>
- <!-- Embedded Mermaid Library -->
- <script>
- // This is a minimal stub to prevent errors when mermaid is referenced but not available
- if (typeof window.mermaid === 'undefined') {
- window.mermaid = {
- initialize: function() {},
- init: function() {},
- render: function() {}
- };
- }
- </script>
- <!-- MathJax configuration and library -->
- <script>
- window.MathJax = {
- tex: {
- inlineMath: [['$', '$'], ['\\\\(', '\\\\)']],
- displayMath: [['$$', '$$'], ['\\\\[', '\\\\]']],
- processEscapes: false
- },
- startup: {
- pageReady: () => {
- return MathJax.startup.defaultPageReady().then(() => {
- const div = document.createElement('div');
- div.id = 'mathjax-finished';
- div.style.display = 'none';
- document.body.appendChild(div);
- });
- }
- }
- };
- </script>
- <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"></script>
- </head>
- <body class="markdown-body">
- ${finalHtml}
- <script>
- // Initialize mermaid if it's available
- if (typeof mermaid !== 'undefined') {
- mermaid.initialize({ startOnLoad: true, theme: 'default', securityLevel: 'loose' });
- } else {
- console.log('Mermaid library not loaded, skipping initialization');
- }
- </script>
- </body>
- </html>`;
- console.log('Template prepared, starting PDF generation...');
- // 6. Generate PDF with Puppeteer
- console.log('Starting Puppeteer browser launch...');
- const browser = await puppeteer.launch({
- executablePath: '/usr/bin/chromium-browser', // Alpine location
- args: [
- '--no-sandbox',
- '--disable-setuid-sandbox',
- '--disable-dev-shm-usage',
- '--disable-background-timer-throttling',
- '--disable-renderer-backgrounding',
- '--disable-backgrounding-occluded-windows',
- '--memory-pressure-off',
- '--js-flags=--max-old-space-size=4096', // 增加内存限制
- '--enable-features=NetworkService',
- '--disable-features=VizDisplayCompositor',
- '--disable-gpu',
- '--disable-web-security',
- '--disable-features=VizDisplayCompositor'
- ],
- headless: 'new',
- timeout: 120000 // Increased timeout for containerized environment
- });
- console.log('Browser launched successfully');
- const page = await browser.newPage();
- console.log('Page created successfully');
- // ページのビューポートとユーザーエージェントを設定
- await page.setViewport({ width: 1200, height: 800 });
- await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
- console.log('Viewport and user agent set');
- // さまざまなタイムアウトを設定 - 長時間の待機を避けるためにデフォルト値を低下
- await page.setDefaultNavigationTimeout(30000); // 30秒
- await page.setDefaultTimeout(30000); // 30秒
- console.log('Timeouts configured');
- // すべての外部リソースの読み込みをブロックするリクエストをインターセプト
- await page.setRequestInterception(true);
- page.on('request', (req) => {
- // すべての外部リソース要求を完全にブロック(CDNリソースを含む)してネットワークタイムアウトを回避
- const url = req.url();
- if (url.startsWith('http') || url.startsWith('https') || url.startsWith('ftp')) {
- // すべての外部リクエストに空白のレスポンスを返して、ネットワークタイムアウトエラーを回避
- req.respond({
- status: 200,
- contentType: 'text/plain',
- body: ''
- }).catch(() => {});
- } else {
- // ローカルおよびdata URLリソースを許可
- req.continue().catch(() => {});
- }
- });
- console.log('Request interception configured to block all external resources');
- // エラーイベントを監視
- page.on('error', (error) => {
- console.error('Page error:', error);
- });
- page.on('pageerror', (error) => {
- console.error('Page error event:', error);
- });
- page.on('console', (msg) => {
- console.log('Browser console:', msg.text());
- });
- console.log('Error listeners attached');
- // 再試行メカニズム
- let success = false;
- let attempts = 0;
- const maxAttempts = 3;
- while (!success && attempts < maxAttempts) {
- attempts++;
- console.log(`Attempt ${attempts} of ${maxAttempts} for PDF generation...`);
- console.log(`HTML template length: ${template.length} characters`);
- try {
- console.log('About to navigate to data URL...');
- // 外部リソースを待たずに高速なナビゲーションオプションを使用
- await page.goto(`data:text/html;charset=UTF-8,${encodeURIComponent(template)}`, {
- waitUntil: 'domcontentloaded', // 等待DOM加载完成,但不等待资源
- timeout: 30000 // Reduced timeout for faster failure
- });
- console.log('Page loaded successfully');
- // 画像の読み込みを待機(タイムアウトあり、読み込み失敗の画像は素早くスキップ)
- try {
- console.log('Checking for images to load...');
- await page.evaluate(async () => {
- const images = Array.from(document.querySelectorAll('img'));
- console.log(`Found ${images.length} images on the page`);
- if (images.length > 0) {
- // すべての画像の読み込みを待つのではなく、短時間だけ待って次に進む
- await new Promise((resolve) => {
- setTimeout(() => {
- console.log(`Continuing after attempting to load ${images.length} images`);
- resolve();
- }, 500); // 只等待500ms,不管图像是否加载完成
- });
- }
- });
- } catch (e) {
- console.warn('Error checking images:', e.message);
- }
- // MathJaxのレンダリングを待機(タイムアウトあり)
- console.log('Checking for MathJax...');
- let mathjaxFinished = false;
- let mermaidProcessed = false; // 移动变量声明到这里
- try {
- // ページに数式が含まれているか確認(MathJaxは通常、$...$または$$...$$形式の数式を処理します)
- const hasMathContent = await page.evaluate(() => {
- const html = document.documentElement.innerHTML;
- // 数学記号のタグを確認
- return html.includes('$') || html.includes('\\(') || html.includes('\\[') ||
- html.includes('\\begin{') || html.includes('math-tex') ||
- document.querySelectorAll('mjx-container').length > 0 ||
- document.querySelectorAll('[class*="math"]').length > 0;
- });
- console.log(`Math content found: ${hasMathContent}`);
- if (hasMathContent) {
- console.log('Math content detected, waiting for MathJax...');
- // 特定のセレクタを無限に待つのではなく、MathJaxの初期化に合理的な時間を待機
- await new Promise(r => setTimeout(r, 1000)); // 短暂等待1秒
- // MathJaxが存在するか再度確認
- const mathjaxExists = await page.evaluate(() => typeof window.MathJax !== 'undefined');
- if (mathjaxExists) {
- // MathJaxが存在する場合、レンダリング完了を待機
- await page.evaluate(async () => {
- if (window.MathJax && window.MathJax.Hub) {
- await window.MathJax.Hub.Queue(['Typeset', window.MathJax.Hub]);
- } else if (window.MathJax && window.MathJax.typesetPromise) {
- await window.MathJax.typesetPromise();
- }
- });
- console.log('MathJax typesetting completed');
- mathjaxFinished = true;
- } else {
- console.log('MathJax not found after content check');
- }
- } else {
- console.log('No math content found, skipping MathJax wait');
- }
- } catch (e) {
- console.warn('Error checking MathJax:', e.message);
- }
- // MathJaxが完了していない場合、追加の時間を待機
- if (!mathjaxFinished) {
- console.log('Waiting 1 second before generating PDF...');
- await new Promise(r => setTimeout(r, 1000));
- }
- // Mermaidが完了していない場合、追加の時間を待機
- if (!mermaidProcessed) {
- console.log('Waiting 1 second before generating PDF...');
- await new Promise(r => setTimeout(r, 1000));
- }
- // 等待 Mermaid 图表渲染
- console.log('Checking for Mermaid diagrams...');
- try {
- // ページにMermaidチャートコンテナがあるか確認
- const mermaidElementsCount = await page.evaluate(() => document.querySelectorAll('.mermaid').length);
- console.log(`Mermaid diagrams found: ${mermaidElementsCount > 0}`);
- if (mermaidElementsCount > 0) {
- console.log(`Processing ${mermaidElementsCount} Mermaid diagrams...`);
- // Mermaidライブラリが存在するか確認し、初期化を試みる
- const mermaidExists = await page.evaluate(() => typeof mermaid !== 'undefined');
- if (mermaidExists) {
- console.log('Mermaid library found, attempting to initialize...');
- await page.evaluate(async () => {
- // mermaidオブジェクトが存在するか確認
- if (typeof mermaid !== 'undefined' && mermaid.init) {
- try {
- // Mermaidチャートの初期化を試みる
- mermaid.init(undefined, '.mermaid');
- } catch (e) {
- console.log('Mermaid init error:', e.message);
- }
- } else {
- console.log('Mermaid library not fully loaded, skipping initialization');
- }
- // レンダリング完了を待機(最大5秒)
- const startTime = Date.now();
- while (Date.now() - startTime < 5000) {
- // 未完成のMermaidチャートがあるか確認
- const incompleteCharts = document.querySelectorAll('.mermaid:not(.mermaid-loaded)');
- if (incompleteCharts.length === 0) {
- break;
- }
- // 等待一小段时间后重试
- await new Promise(r => setTimeout(r, 100));
- }
- });
- } else {
- console.log('Mermaid library not found in document, skipping processing');
- }
- console.log('Mermaid diagrams processed');
- mermaidProcessed = true;
- } else {
- console.log('No Mermaid diagrams found, skipping wait');
- }
- } catch (e) {
- console.warn('Error processing Mermaid:', e.message);
- }
- // 等待页面基本渲染完成(不等待所有外部资源)
- console.log('Waiting for basic page content to be loaded...');
- try {
- // complete状態ではなくDOMContentLoadedイベントを待機
- await page.waitForFunction(() => document.readyState !== 'loading', { timeout: 10000 }); // Reduced timeout
- console.log('Page DOM loaded, readyState is not loading');
- } catch (e) {
- console.warn('DOM did not finish loading, continuing...', e.message);
- }
- // 确保所有异步操作完成后再生成PDF
- console.log('Waiting 2 seconds before generating PDF...');
- await new Promise(r => setTimeout(r, 2000));
- console.log('Generating PDF file...');
- await page.pdf({
- path: outputPath,
- format: 'A4',
- printBackground: true,
- scale: 0.75, // Scale down to fit more content
- margin: { top: '10mm', right: '10mm', bottom: '10mm', left: '10mm' },
- timeout: 120000
- });
- console.log('PDF generated successfully');
- success = true;
- console.log(`PDF successfully generated at ${outputPath}`);
- } catch (error) {
- console.error(`Attempt ${attempts} failed:`, error.message);
- console.error(`Error stack:`, error.stack);
- // 致命的なエラーの場合は再試行不要
- if (error.message.includes('Protocol error') ||
- error.message.includes('Target closed') ||
- error.message.includes('Browser closed') ||
- error.message.includes('Connection closed') ||
- error.message.includes('Navigation failed') ||
- error.message.includes('net::ERR_CONNECTION_CLOSED')) {
- console.error('Fatal browser error occurred, aborting retries');
- throw error;
- }
- if (attempts >= maxAttempts) {
- // すべての再試行が失敗した場合、最も簡略化された方法を試す
- console.log('All attempts failed, trying most basic PDF generation...');
- console.log('Creating a new page for basic method...');
- // 重新创建页面以确保干净的状态
- const basicPage = await browser.newPage();
- await basicPage.setViewport({ width: 1200, height: 800 });
- await basicPage.setDefaultNavigationTimeout(60000);
- await basicPage.goto(`data:text/html;charset=UTF-8,${encodeURIComponent(template)}`, {
- waitUntil: 'domcontentloaded',
- timeout: 120000 // Increased timeout for containerized environment
- });
- // 等待一段较短的时间
- console.log('Waiting 2 seconds in basic method...');
- await new Promise(r => setTimeout(r, 2000));
- try {
- console.log('Generating PDF with basic method...');
- await basicPage.pdf({
- path: outputPath,
- format: 'A4',
- printBackground: true,
- scale: 0.75,
- margin: { top: '10mm', right: '10mm', bottom: '10mm', left: '10mm' },
- timeout: 300000 // Increased timeout for containerized environment
- });
- success = true;
- console.log(`PDF generated using basic method at ${outputPath}`);
- await basicPage.close();
- } catch (basicError) {
- console.error('Basic PDF generation also failed:', basicError.message);
- console.error('Basic error stack:', basicError.stack);
- await basicPage.close();
- throw basicError;
- }
- } else {
- // 一定時間待機してから再試行(システムが復旧する時間を与える)
- const delay = 10000 * attempts; // 逐次的に遅延時間を増加
- console.log(`Waiting ${delay}ms before retry...`);
- await new Promise(r => setTimeout(r, delay));
- }
- }
- }
- console.log('Closing browser...');
- await browser.close();
- console.log('Browser closed');
- console.log('=== MD to PDF Conversion Completed Successfully ===');
- } catch (err) {
- console.error('Error during conversion:', err);
- console.error('Error stack:', err.stack);
- process.exit(1);
- }
- })();
|