爬取掘金文档并转化为 md
·
Yin灏
const puppeteer = require("puppeteer");
const turndown = require("turndown");
const fs = require("fs");
(async () => {
const browser = await puppeteer.launch({
headless: "new",
});
const page = await browser.newPage();
await page.goto("https://juejin.cn/post/7240730470975111227");
await page.evaluate(() => {
const headers = document.querySelectorAll(".code-block-extension-header");
headers.forEach((header) => header.remove());
});
await page.evaluate(() => {
const codeBlocks = document.querySelectorAll("code.hljs");
codeBlocks.forEach((codeBlock) => {
// const code = codeBlock.innerHTML;
const code = codeBlock.innerHTML.trim().replace(/^\s+/gm, "");
codeBlock.innerHTML = "```js\n" + code + "\n```";
});
});
const articleContent = await page.evaluate(() => {
return document.querySelector(".article-content").innerHTML;
});
const turndownService = new turndown();
const markdownContent = turndownService
.turndown(articleContent)
.split("\n")
.map((line) => line.trim())
.join("\n");
fs.writeFile("article.md", markdownContent, (err) => {
if (err) throw err;
console.log("Markdown file saved!");
});
await browser.close();
})();