-
-
Save smucode/1779815 to your computer and use it in GitHub Desktop.
Vital Ruby Advance Lab 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require("nokogiri") | |
| require("./url_fetcher") | |
| class HtmlParser | |
| def parse url | |
| html = UrlFetcher.new.fetch url | |
| Nokogiri::HTML(html).css('a').map{|e| parse_url(url, get_href(e)) }.compact | |
| end | |
| def get_href e | |
| e.attribute("href") rescue nil | |
| end | |
| def parse_url base, url | |
| URI.parse(base).merge(URI.parse(url)).to_s rescue nil | |
| end | |
| end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require("./url_fetcher"); | |
| require("./html_parser"); | |
| describe UrlFetcher do | |
| it "fetches urls" do | |
| fetcher = UrlFetcher.new | |
| res = fetcher.fetch("ftp://ftp.powertech.no/test.txt") | |
| res.should eq("test.txt\n") | |
| end | |
| it "returns nil on failures" do | |
| fetcher = UrlFetcher.new | |
| res = fetcher.fetch("http://foo.bar/") | |
| res.should eq(nil) | |
| end | |
| end | |
| describe HtmlParser do | |
| it "parses html and returns empty array when no links exists" do | |
| parser = HtmlParser.new | |
| list = parser.parse("ftp://ftp.powertech.no/test.txt") | |
| list.size.should eq(0) | |
| end | |
| it "parses html and returns array when links exists" do | |
| parser = HtmlParser.new | |
| list = parser.parse("http://ulv.no/") | |
| list.size.should eq(3) | |
| list[0].should eq("http://www.sau.no/") | |
| end | |
| it "should not return empty hrefs" do | |
| parser = HtmlParser.new | |
| list = parser.parse("http://vg.no/") | |
| list.each{|e| e.should_not eq(nil)} | |
| end | |
| it "should only return absolute paths" do | |
| parser = HtmlParser.new | |
| list = parser.parse("http://vg.no/") | |
| list.each{|e| e.should include("http") } | |
| end | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment