To set up:
npm install
Modify function _cleanClasses(node) in node_modules/readability/Readability.js
To run:
node test.js
To set up:
npm install
Modify function _cleanClasses(node) in node_modules/readability/Readability.js
To run:
node test.js
| { | |
| "name": "test", | |
| "dependencies": { | |
| "jsdom": "^11.5.1", | |
| "node-fetch": "^1.7.3", | |
| "readability": "github:mozilla/readability" | |
| }, | |
| } |
| const { JSDOM } = require("jsdom"); | |
| const Readability = require("readability"); | |
| const OPTIONS = {features: { | |
| FetchExternalResources: false, | |
| ProcessExternalResources: false | |
| }}; | |
| // ... fetch the article's HTML with node-fetch and call _parse | |
| function _parse(url, html) { | |
| console.log("Parsing", url); | |
| console.log('html:', html); | |
| const dom = new JSDOM(html, OPTIONS); | |
| const doc = dom.window.document; | |
| Node = dom.window.Node; | |
| const reader = new Readability(url, doc); | |
| const result = reader.parse(); | |
| } | |
| const fetch = require('node-fetch'); | |
| const URL = 'https://www.cnet.com/es/noticias/twitter-lite-america-latina/#ftag=ESPdf5230a'; | |
| fetch(URL) | |
| .then(function(res) { | |
| return res.text(); | |
| }) | |
| .then(function(html) { | |
| return _parse(URL, html); | |
| }); |