Last active
November 14, 2018 21:32
-
-
Save ianhenrysmith/1891d8ad5ffc1dd744c6b65c361eda9a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # bundle exec rake wikiseed:star_wars SUBDOMAIN=jammies COUNT=1 | |
| require 'open-uri' | |
| require 'nokogiri' | |
| namespace :wikiseed do | |
| TYPE_MAPPINGS = { | |
| wikipedia: { | |
| category: 'Wikipedia', | |
| url: 'https://en.wikipedia.org/wiki/Special:Random', | |
| thumbnail_selectors: [ | |
| '#mw-content-text .thumbinner .thumbimage', | |
| '.thumbimage' | |
| ] | |
| }, | |
| star_wars: { | |
| category: 'Star Wars', | |
| url: 'http://starwars.wikia.com/wiki/Special:Random', | |
| thumbnail_selectors: [ | |
| '.pi-image-thumbnail', | |
| '#mw-content-text .image.image-thumbnail img', | |
| '.thumbimage' | |
| ] | |
| }, | |
| star_trek: { | |
| category: 'Star Trek', | |
| url: 'http://memory-alpha.wikia.com/wiki/Special:Random', | |
| thumbnail_selectors: [ | |
| '.wiki-sidebar .thumbimage', | |
| '#mw-content-text .image.image-thumbnail img', | |
| '.thumbimage' | |
| ] | |
| }, | |
| lotr: { | |
| category: 'Lord of the Rings', | |
| url: 'http://lotr.wikia.com/wiki/Special:Random', | |
| thumbnail_selectors: [ | |
| '.wiki-sidebar .thumbimage', | |
| '#mw-content-text .image.image-thumbnail img', | |
| '.thumbimage' | |
| ] | |
| }, | |
| matrix: { | |
| category: 'The Matrix', | |
| url: 'http://matrix.wikia.com/wiki/Special:Random', | |
| thumbnail_selectors: [ | |
| '.wiki-sidebar .thumbimage', | |
| '#mw-content-text .image.image-thumbnail img', | |
| '.thumbimage' | |
| ] | |
| }, | |
| marvel: { | |
| category: 'Marvel', | |
| url: 'http://marvel.wikia.com/wiki/Special:Random', | |
| thumbnail_selectors: [ | |
| '.wiki-sidebar .thumbimage', | |
| '#mw-content-text .image.image-thumbnail img', | |
| '.thumbimage' | |
| ] | |
| }, | |
| dc: { | |
| category: 'DC Comics', | |
| url: 'http://dc.wikia.com/wiki/Special:Random', | |
| thumbnail_selectors: [ | |
| '.wiki-sidebar .thumbimage', | |
| '#mw-content-text .image.image-thumbnail img', | |
| '.thumbimage' | |
| ] | |
| }, | |
| harry_potter: { | |
| category: 'Harry Potter', | |
| url: 'http://harrypotter.wikia.com/wiki/Special:Random', | |
| thumbnail_selectors: [ | |
| '.wiki-sidebar .thumbimage', | |
| '#mw-content-text .image.image-thumbnail img', | |
| '.thumbimage' | |
| ] | |
| } | |
| }.freeze | |
| task star_wars: :environment do | |
| @page_type = :star_wars | |
| seed_content | |
| end | |
| task star_trek: :environment do | |
| @page_type = :star_trek | |
| seed_content | |
| end | |
| task lotr: :environment do | |
| @page_type = :lotr | |
| seed_content | |
| end | |
| task matrix: :environment do | |
| @page_type = :matrix | |
| seed_content | |
| end | |
| task marvel: :environment do | |
| @page_type = :marvel | |
| seed_content | |
| end | |
| task dc: :environment do | |
| @page_type = :dc | |
| seed_content | |
| end | |
| task harry_potter: :environment do | |
| @page_type = :harry_potter | |
| seed_content | |
| end | |
| task wikipedia: :environment do | |
| @page_type = :wikipedia | |
| seed_content | |
| end | |
| task all: %i[star_wars star_trek lotr matrix marvel dc harry_potter wikipedia] | |
| def wiki_url | |
| ENV.fetch('URL') { wiki_data[:url] } | |
| end | |
| def created_count | |
| @created_count ||= 0 | |
| @created_count += 1 | |
| end | |
| def create_content(params) | |
| puts " - creating content ##{created_count}- #{params[:title]}" | |
| content = content_composer.compose_content(post: params, post_type_id: post_type_id) | |
| editor = content_editor(content).update(params) | |
| editor.post | |
| end | |
| def content_composer | |
| @composer ||= ::Service::Composition::Composer.new(instance_context) | |
| end | |
| def content_editor(content) | |
| ::ContentService::Editor.new(instance_context, content) | |
| end | |
| def instance_context | |
| @_context ||= ::Service::KapostContext.new(user, newsroom) | |
| end | |
| def newsroom | |
| @newsroom ||= ::Newsroom.by_sub(subdomain) | |
| end | |
| def subdomain | |
| ENV.fetch('SUBDOMAIN') | |
| end | |
| def count | |
| ENV['URL'].present? ? 1 : (ENV.fetch('COUNT') { 10 }).to_i | |
| end | |
| def email | |
| ENV.fetch('EMAIL') {} | |
| end | |
| def user | |
| @user ||= email ? ::User.by_email(email) : newsroom.admins.first | |
| end | |
| def post_type_id | |
| @post_type_id ||= createable_post_type_id | |
| end | |
| def users | |
| @users ||= get_data(memberships_url) | |
| end | |
| def content_types | |
| @content_types ||= instance_context.umbrella_post_types | |
| end | |
| def createable_post_type_id | |
| (blog_content_type || html_content_type).id | |
| end | |
| def blog_content_type | |
| content_types.detect { |ct| ct.display_name.downcase.match(/blog post/) } | |
| end | |
| def html_content_type | |
| content_types.detect { |ct| ct.body_type == 'html' } | |
| end | |
| def doc_title(doc) | |
| doc.title.split(' - ').first # star wars | |
| end | |
| def doc_url(doc) | |
| doc.css("link[rel='canonical']").first.attribute('href').value | |
| end | |
| def doc_content(doc) | |
| doc.css('#mw-content-text p').text | |
| end | |
| def doc_thumbnail(doc) | |
| urls = wiki_data[:thumbnail_selectors].map do |selector| | |
| thumbnail_url_from_selector(doc, selector) | |
| end | |
| processed_thumbnail_urls(urls).first | |
| end | |
| def thumbnail_url_from_selector(doc, selector) | |
| return nil unless doc.css(selector).present? | |
| return nil unless defined? doc.css(selector).first.attribute('src').value | |
| doc.css(selector).first.attribute('src').value | |
| end | |
| def processed_thumbnail_urls(thumbnail_urls) | |
| thumbnail_urls.compact.reject do |url| | |
| [/Eras-/, /Era-/].any? { |blacklisted| url.match(blacklisted) } | |
| end | |
| end | |
| def wiki_data | |
| TYPE_MAPPINGS[@page_type] | |
| end | |
| def content_data(doc) | |
| { | |
| body: doc_content(doc), | |
| categories: Array(wiki_data[:category]), | |
| external_post_url: doc_url(doc), | |
| operation: :mark_as_published_and_completed, | |
| synced_to_gallery_date: Time.now, | |
| title: doc_title(doc) | |
| } | |
| end | |
| def seed_content | |
| puts "> importing content from #{@page_type}" | |
| (1..count).each do | |
| doc = Nokogiri::HTML(open(wiki_url)) | |
| content = create_content(content_data(doc)) | |
| content.thumbnail_url = doc_thumbnail(doc) | |
| content.save | |
| end | |
| end | |
| end |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
hackathon script for importing content into kapost