const { chromium } = require('playwright'); const fs = require('fs'); const path = require('path'); const TurndownService = require('turndown'); const turndownService = new TurndownService(); const OUTPUT_DIR = process.env.OUTPUT_DIR || '/tmp/crawlapi'; function parseOptions(raw) { try { return JSON.parse(raw); } catch { return {}; } } function parseUrl(raw) { try { return JSON.parse(raw); } catch { return raw.replace(/^"|"$/g, ''); } } function ensureDir(dir) { if (!fs.existsSync(dir)) { fs.mkdirSync(dir, { recursive: true }); } } function writeFile(filename, data) { ensureDir(OUTPUT_DIR); const filepath = path.join(OUTPUT_DIR, filename); fs.writeFileSync(filepath, data); return filepath; } async function run() { const endpoint = process.argv[2]; const url = parseUrl(process.argv[3]); const options = parseOptions(process.argv[4]); const browser = await chromium.launch({ headless: true }); const context = await browser.newContext({ viewport: { width: options.width || 1440, height: options.height || 900, }, userAgent: options.user_agent || undefined, }); const page = await context.newPage(); try { const timeout = (options.timeout || 30) * 1000; await page.goto(url, { waitUntil: 'networkidle', timeout }); if (options.wait_for) { await page.waitForSelector(options.wait_for, { timeout }); } let result = {}; switch (endpoint) { case 'crawl': { const html = await page.content(); const title = await page.title(); result = { html, title, url: page.url() }; break; } case 'content': { const html = await page.content(); result = { html, url: page.url() }; break; } case 'screenshot': { const screenshotOptions = { type: 'png' }; if (options.full_page) { screenshotOptions.fullPage = true; } const buffer = await page.screenshot(screenshotOptions); const filepath = writeFile(`${Date.now()}.png`, buffer); result = { file_path: filepath, url: page.url() }; break; } case 'pdf': { const buffer = await page.pdf({ format: 'A4', printBackground: true, }); const filepath = writeFile(`${Date.now()}.pdf`, buffer); result = { file_path: filepath, url: page.url() }; break; } case 'markdown': { const html = await page.content(); const markdown = turndownService.turndown(html); result = { markdown, url: page.url() }; break; } case 'snapshot': { const html = await page.content(); const screenshotBuffer = await page.screenshot({ type: 'png', fullPage: options.full_page || false }); const filepath = writeFile(`${Date.now()}.png`, screenshotBuffer); result = { html, file_path: filepath, url: page.url() }; break; } case 'scrape': { const selectors = options.selectors || ['h1', 'h2', 'p', 'a']; const data = {}; for (const selector of selectors) { const elements = await page.locator(selector).all(); const texts = []; for (const el of elements) { const text = await el.textContent(); if (text) texts.push(text.trim()); } data[selector] = texts; } result = { data, url: page.url() }; break; } case 'json': { const title = await page.title(); const description = await page.locator('meta[name="description"]').getAttribute('content').catch(() => null); const headings = await page.locator('h1, h2, h3').allTextContents(); const links = await page.locator('a[href]').evaluateAll(els => els.map(el => ({ href: el.href, text: el.textContent?.trim() }))); result = { title, description, headings, links: links.slice(0, 50), url: page.url() }; break; } case 'links': { const links = await page.locator('a[href]').evaluateAll(els => els.map(el => ({ href: el.href, text: el.textContent?.trim() || '', })).filter(l => l.href) ); result = { links: [...new Map(links.map(l => [l.href, l])).values()], url: page.url() }; break; } default: throw new Error(`Unknown endpoint: ${endpoint}`); } console.log(JSON.stringify(result)); } catch (error) { console.error(error.message); process.exit(1); } finally { await browser.close(); } } run();