Created
October 10, 2019 10:48
-
-
Save fpaint/1d6bdfdd7d609d8677c4a8211fa81938 to your computer and use it in GitHub Desktop.
Script for downloading Wildbow's "Ward" & generating fb2 book for my reader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'nokogiri' | |
| require 'faraday' | |
| require 'date' | |
| require 'digest' | |
| class Ward | |
| BASE = 'https://www.parahumans.net' | |
| class Page | |
| attr_reader :title, :content | |
| def initialize(title, content) | |
| @title = title | |
| @content = content | |
| end | |
| end | |
| class Loader | |
| def get(url) | |
| cache = "cache/#{Digest::MD5.hexdigest(url)}" | |
| if(File.exists?(cache)) | |
| body = File.read(cache) | |
| else | |
| res = Faraday.get(url) | |
| raise "Get #{url} failed: #{res.status}" unless res.success? | |
| body = res.body | |
| File.write(cache, body) | |
| end | |
| return parse(body) | |
| end | |
| def parse(html) | |
| doc = Nokogiri::HTML(html) | |
| title = doc.css('main article header.entry-header h1.entry-title').text | |
| content = doc.css('main article div.entry-content') | |
| content.css('.sharedaddy').remove | |
| content.css('p').first.remove | |
| content.css('p').last.remove | |
| content.css('a, span').each { |node| node.replace(node.children) } | |
| content.css('em').each { |node| node.name = 'emphasis' } | |
| content.xpath('//@style').remove | |
| Page.new(title, content.inner_html) | |
| end | |
| end | |
| class Book | |
| def initialize(pages = []) | |
| @pages = pages | |
| end | |
| def generate(filename) | |
| doc = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml| | |
| xml.FictionBook(xmlns: 'http://www.gribuser.ru/xml/fictionbook/2.0', 'xmlns:l' => 'http://www.w3.org/1999/xlink') { | |
| xml.description { | |
| xml.send('title-info') { | |
| xml.author { | |
| xml.send 'first-name', 'John Charles' | |
| xml.send 'last-name', 'McCrae' | |
| } | |
| xml.send 'book-title', 'Ward' | |
| xml.annotation { | |
| xml.p 'The unwritten rules that govern the fights and outright wars between ‘capes’ have been amended: everyone gets their second chance. It’s an uneasy thing to come to terms with when notorious supervillains and even monsters are playing at being hero. The world ended two years ago, and as humanity straddles the old world and the new, there aren’t records, witnesses, or facilities to answer the villains’ past actions in the present. One of many compromises, uneasy truces and deceptions that are starting to splinter as humanity rebuilds.' | |
| xml.p 'None feel the injustice of this new status quo or the lack of established footing more than the past residents of the parahuman asylums. The facilities hosted parahumans and their victims, but the facilities are ruined or gone; one of many fragile ex-patients is left to find a place in a fractured world. She’s perhaps the person least suited to have anything to do with this tenuous peace or to stand alongside these false heroes. She’s put in a position to make the decision: will she compromise to help forge what they call, with dark sentiment, a second golden age? Or will she stand tall as a gilded dark age dawns?' | |
| } | |
| xml.lang 'en' | |
| } | |
| xml.send('document-info') { | |
| xml.author { | |
| xml.nickname 'Cuterebra' | |
| } | |
| xml.send 'program-used', 'Ruby, Faraday, Nokogiri' | |
| xml.date(Date.today, value: Date.today) | |
| xml.id 'fbab61de-f08a-4763-88f8-7b3808e153d0' | |
| xml.send 'src-url', BASE | |
| } | |
| } | |
| xml.body { | |
| @pages.each do |page| | |
| xml.section do |section| | |
| section.title page.title | |
| section << page.content | |
| end | |
| end | |
| } | |
| } | |
| end | |
| File.write(filename, doc.to_xml) | |
| end | |
| end | |
| def contents | |
| res = Faraday.get(BASE) | |
| doc = Nokogiri::HTML(res.body) | |
| urls = (links(doc.css('aside #nav_menu-5')) + links(doc.css('aside #nav_menu-6'))).uniq.reject{|url| url.include?('category/story')} | |
| urls | |
| end | |
| def links(node) | |
| node.css('a').map{|a| a.attr(:href)} | |
| end | |
| def create_book(filename) | |
| print 'Getting contents...', $/ | |
| urls = contents | |
| pages = [] | |
| loader = Loader.new | |
| urls.each do |url| | |
| print "Reading #{url}", $/ | |
| pages << loader.get(url) | |
| end | |
| print 'Generating book', $/ | |
| book = Ward::Book.new(pages) | |
| book.generate(filename) | |
| end | |
| end | |
| ward = Ward.new | |
| ward.create_book('ward.fb2') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
One more fix with forum name corrected
`require 'nokogiri'
require 'faraday'
require 'date'
require 'digest'
require 'fileutils'
class Ward
BASE = 'https://www.parahumans.net'
class Page
end
class Loader
end
class Book
end
def contents
res = Faraday.get(BASE)
doc = Nokogiri::HTML(res.body)
urls = (links(doc.css('aside #nav_menu-5')) + links(doc.css('aside #nav_menu-6'))).uniq.reject{|url| url.include?('category/story')}
urls
end
def links(node)
node.css('a').map{|a| a.attr(:href)}
end
def create_book(filename)
print 'Getting contents...', $/
urls = contents
pages = []
loader = Loader.new
urls.each do |url|
print "Reading #{url}", $/
pages << loader.get(url)
end
print 'Generating book', $/
book = Ward::Book.new(pages)
book.generate(filename)
end
end
ward = Ward.new
ward.create_book('ward.fb2')`