Skip to content

Instantly share code, notes, and snippets.

@timokoola
Created October 13, 2020 14:45
Show Gist options
  • Select an option

  • Save timokoola/8c364d72025683789f03511178da62bd to your computer and use it in GitHub Desktop.

Select an option

Save timokoola/8c364d72025683789f03511178da62bd to your computer and use it in GitHub Desktop.
Node.js snippet for quick json creation from scraped webpage (Chinese Grammar Wiki)
const cheerio = require("cheerio");
const got = require("got");
const fs = require("fs");
const hsk3URL =
"https://resources.allsetlearning.com/chinese/grammar/HSK_3_grammar_points";
(async () => {
const response = await got(hsk3URL);
const $ = cheerio.load(response.body);
let result = [];
let titles = { 0: "title", 1: "description", 2: "example" };
$("tr").map((i, el) => {
let item = {};
$(el)
.find("td")
.map((j, el) => {
let text =
j == 2 ? $(el).text().trim().replace(/\s/g, "") : $(el).text().trim();
item[titles[j]] = text;
let link = $(el).find("a").attr("href");
if (link) {
item["link"] = `https://resources.allsetlearning.com${link}`;
}
});
if (item && item.title) {
result.push(item);
}
});
let data = JSON.stringify(result);
fs.writeFileSync("hsk3.json", data);
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment