smucode · February 9, 2012 12:55
diff --git a/html_parser.rb b/html_parser.rb
 require("nokogiri")
 require("./url_fetcher")

 class HtmlParser
  def parse url
    html = UrlFetcher.new.fetch url
    Nokogiri::HTML(html).css('a').map{|e| parse_url(url, get_href(e)) }.compact
  end
  
  def get_href e
    e.attribute("href") rescue nil
  end
  
  def parse_url base, url
    URI.parse(base).merge(URI.parse(url)).to_s rescue nil
  end  
 end
diff --git a/html_parser_spec.rb b/html_parser_spec.rb
 require("./url_fetcher");
 require("./html_parser");

 describe UrlFetcher do
  it "fetches urls" do
    fetcher = UrlFetcher.new
    res = fetcher.fetch("ftp://ftp.powertech.no/test.txt")
    res.should eq("test.txt\n")
  end
  
  it "returns nil on failures" do
    fetcher = UrlFetcher.new
    res = fetcher.fetch("http://foo.bar/")
    res.should eq(nil)
  end
 end

 describe HtmlParser do
  it "parses html and returns empty array when no links exists" do
    parser = HtmlParser.new
    list = parser.parse("ftp://ftp.powertech.no/test.txt")
    list.size.should eq(0)
  end
  
  it "parses html and returns array when links exists" do
    parser = HtmlParser.new
    list = parser.parse("http://ulv.no/")

    list.size.should eq(3)    
    list[0].should eq("http://www.sau.no/")
  end

  it "should not return empty hrefs" do
    parser = HtmlParser.new
    list = parser.parse("http://vg.no/")

    list.each{|e| e.should_not eq(nil)}
  end

  it "should only return absolute paths" do
    parser = HtmlParser.new
    list = parser.parse("http://vg.no/")
    list.each{|e| e.should include("http") }
  end

 end
	require("nokogiri")
	require("./url_fetcher")

	class HtmlParser
	def parse url
	html = UrlFetcher.new.fetch url
	Nokogiri::HTML(html).css('a').map{\|e\| parse_url(url, get_href(e)) }.compact
	end

	def get_href e
	e.attribute("href") rescue nil
	end

	def parse_url base, url
	URI.parse(base).merge(URI.parse(url)).to_s rescue nil
	end
	end
	require("./url_fetcher");
	require("./html_parser");

	describe UrlFetcher do
	it "fetches urls" do
	fetcher = UrlFetcher.new
	res = fetcher.fetch("ftp://ftp.powertech.no/test.txt")
	res.should eq("test.txt\n")
	end

	it "returns nil on failures" do
	fetcher = UrlFetcher.new
	res = fetcher.fetch("http://foo.bar/")
	res.should eq(nil)
	end
	end

	describe HtmlParser do
	it "parses html and returns empty array when no links exists" do
	parser = HtmlParser.new
	list = parser.parse("ftp://ftp.powertech.no/test.txt")
	list.size.should eq(0)
	end

	it "parses html and returns array when links exists" do
	parser = HtmlParser.new
	list = parser.parse("http://ulv.no/")

	list.size.should eq(3)
	list[0].should eq("http://www.sau.no/")
	end

	it "should not return empty hrefs" do
	parser = HtmlParser.new
	list = parser.parse("http://vg.no/")

	list.each{\|e\| e.should_not eq(nil)}
	end

	it "should only return absolute paths" do
	parser = HtmlParser.new
	list = parser.parse("http://vg.no/")
	list.each{\|e\| e.should include("http") }
	end

	end