Created
November 2, 2011 16:43
-
-
Save alprut/1334161 to your computer and use it in GitHub Desktop.
A ruby script to get book information from AWS using barcode in preface images of books
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/ruby | |
| require 'rubygems' | |
| require 'amazon/aws' | |
| require 'amazon/aws/search' | |
| require 'csv' | |
| require 'fileutils' | |
| $KCODE="UTF8" | |
| # This script gets the ISBNs as string from the barcode in preface images | |
| # of books in the specified directory, gets book informations from AWS | |
| # using it, creates directories by the title names, puts the image to | |
| # the directory after rotate 90 degree to right, and saves the ISBN | |
| # and title to 'bookinfo.csv' by CSV format. | |
| # | |
| # usage: bookdir.rb outdir preface_dir" | |
| # Preparation | |
| # 1. Install zbarimg. You can get the source from http://zbar.sourceforge.net/ | |
| # 2. Get AWS ID and key from http://aws.amazon.com | |
| # 3. Configure /etc/amazonrc or ~/.amazonrc . If you want to put | |
| # the configuration file to other directory or use other name, | |
| # please use the following code. | |
| # | |
| # ENV['AMAZONRCDIR'] = '.' # the directory to put the configuration file | |
| # ENV['AMAZONRCFILE'] = 'amazonrc' # the configuration file name | |
| # This is an example of the configuraiton file. | |
| # Please remove # in the file in real. | |
| #[global] | |
| # key_id = 'your AWS ID' | |
| # secret_key_id = 'your AWS key' | |
| # locale = jp | |
| # cache = true | |
| # cache_dir = . | |
| class AmazonBook | |
| attr_reader :asin | |
| attr_reader :ean | |
| attr_reader :isbn | |
| attr_reader :small_image_url | |
| attr_reader :medium_image_url | |
| attr_reader :large_image_url | |
| attr_reader :detail_page_url | |
| attr_reader :title | |
| attr_reader :publisher | |
| attr_reader :publication_date | |
| attr_reader :author | |
| attr_reader :label | |
| def initialize(page) | |
| item = page.item_lookup_response.items.item[0] | |
| p item.class | |
| @asin = item.asin | |
| @ean = item.item_attributes.ean | |
| @isbn = item.item_attributes.isbn | |
| # @small_image_url = item.small_image.url | |
| # @medium_image = item.medium_image.url | |
| # @large_image = item.large_image.url | |
| # @detail_page_url = item.detail_page_url | |
| @publisher = item.item_attributes.publisher | |
| @publication_date = item.item_attributes.publication_date | |
| @author = item.item_attributes.author | |
| @title = item.item_attributes.title.to_s | |
| # This removes the publisher name in title. | |
| # eg. "title (3) (publisher name)" -> "title_3" | |
| true_title = @title.scan(/[^\((<〔<]*/).first | |
| no = "" | |
| @title.scan(/[\((<〔<](\d+)[\))>〕>]/) do | |
| no = "_" + $1 | |
| break | |
| end | |
| @title = true_title.sub(/ $/, "").gsub(/ /, "_") + no | |
| end | |
| def AmazonBook.get_by_isbn(isbn) | |
| key = Amazon::AWS::ItemLookup.new('ISBN', | |
| {'ItemId' => isbn, | |
| 'SearchIndex' => 'Books'}) | |
| resp_grp = Amazon::AWS::ResponseGroup.new( 'Medium' ) | |
| req = Amazon::AWS::Search::Request.new() | |
| page = req.search(key, resp_grp) | |
| result = AmazonBook.new(page) | |
| return result | |
| end | |
| def dump(file) | |
| fp = File.open(file, "w") | |
| fp.puts @asin.to_s + "," + | |
| @ean.to_s + "," + | |
| @isbn.to_s + "," + | |
| @title.to_s + "," + | |
| @author.to_s + "," + | |
| @publisher.to_s + "," + | |
| @publication_date.to_s | |
| end | |
| end | |
| class Picture | |
| def initialize(path) | |
| @path = path | |
| end | |
| def rotate_to_right(dst) | |
| cmd = "convert -rotate 90 \'" + @path + "\' \'" + dst + "\'" | |
| IO.popen(cmd) { |io| | |
| io.read | |
| } | |
| end | |
| def rotate_to_left(dst) | |
| cmd = "convert -rotate -90 \'" + @path + "\' \'" + dst + "\'" | |
| IO.popen(cmd) { |io| | |
| io.read | |
| } | |
| end | |
| end | |
| class BookPreface < Picture | |
| def get_isbn | |
| cmd = "cp \'" + @path + "\' /tmp/" | |
| IO.popen(cmd) { |io| | |
| io.read | |
| } | |
| cmd = "zbarimg " + "/tmp/\'" + File.basename(@path) + "\'" | |
| result = nil | |
| IO.popen(cmd) { |io| | |
| while line = io.gets | |
| if /EAN-13:97/ =~ line | |
| result = line.sub(/EAN-13:/, "") | |
| result.chomp! | |
| end | |
| end | |
| } | |
| return result | |
| end | |
| end | |
| if ARGV.length < 2 | |
| puts "usage: bookdir.rb outdir preface_dir" | |
| exit | |
| end | |
| out_dir = ARGV[0] | |
| preface_dir = ARGV[1] | |
| if ! FileTest.directory?(preface_dir) | |
| puts preface_dir + " is not directory" | |
| exit | |
| end | |
| if FileTest.exist?(out_dir) | |
| if ! FileTest.directory?(out_dir) | |
| puts out_dir + " is not directory" | |
| exit | |
| end | |
| else | |
| FileUtils.mkdir(out_dir) | |
| end | |
| Dir.glob(preface_dir + "/*.jpg") { |path| | |
| puts "Trying to get ISBN from " + path | |
| preface = BookPreface.new(path) | |
| isbn = preface.get_isbn | |
| if isbn == "" || !isbn | |
| puts "Failed to get ISBN. Skipped" | |
| next | |
| end | |
| puts "Trying to get book information by ISBN: \"" + isbn + "\"" | |
| bookinfo = AmazonBook.get_by_isbn(isbn) | |
| dirname = out_dir + "/" + bookinfo.title.gsub(/ /, "_").gsub(/ /, "_") | |
| if ! FileTest.exist?(dirname) | |
| FileUtils.mkdir(dirname) | |
| end | |
| preface_to = dirname + "/" + "preface.jpg" | |
| if FileTest.exist?(preface_to) | |
| puts "\"" + preface_to + "\" exists. Skipped" | |
| next | |
| end | |
| preface.rotate_to_right(preface_to) | |
| bookinfo.dump(dirname + "/" + "bookinfo.csv") | |
| puts "Done: \"" + preface_to + "\"" | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment