Created
March 5, 2017 23:32
-
-
Save yuanw/1751da11b2a315aeb53d7eccc8f406e2 to your computer and use it in GitHub Desktop.
HTML parsing example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: learn-html-conduit | |
| version: 0.1.0.0 | |
| -- synopsis: | |
| -- description: | |
| homepage: https://github.com/githubuser/learn-html-conduit#readme | |
| license: BSD3 | |
| license-file: LICENSE | |
| author: Author name here | |
| maintainer: [email protected] | |
| copyright: 2017 Author name here | |
| category: Web | |
| build-type: Simple | |
| extra-source-files: README.md | |
| cabal-version: >=1.10 | |
| library | |
| hs-source-dirs: src | |
| exposed-modules: Lib | |
| build-depends: base >= 4.7 && < 5 | |
| , bytestring | |
| , http-conduit | |
| , html-conduit | |
| , text | |
| , xml-conduit | |
| default-language: Haskell2010 | |
| executable learn-html-conduit-exe | |
| hs-source-dirs: app | |
| main-is: Main.hs | |
| ghc-options: -threaded -rtsopts -with-rtsopts=-N | |
| build-depends: base | |
| , bytestring | |
| , http-conduit | |
| , html-conduit | |
| , learn-html-conduit | |
| , text | |
| , xml-conduit | |
| default-language: Haskell2010 | |
| test-suite learn-html-conduit-test | |
| type: exitcode-stdio-1.0 | |
| hs-source-dirs: test | |
| main-is: Spec.hs | |
| build-depends: base | |
| , learn-html-conduit | |
| ghc-options: -threaded -rtsopts -with-rtsopts=-N | |
| default-language: Haskell2010 | |
| source-repository head | |
| type: git | |
| location: https://github.com/githubuser/learn-html-conduit |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| {-# LANGUAGE OverloadedStrings #-} | |
| module Main where | |
| import Network.HTTP.Conduit (simpleHttp) | |
| import qualified Data.Text as T | |
| import Text.HTML.DOM (parseLBS) | |
| import Text.XML.Cursor (Axis, Cursor, attributeIs, content, element, fromDocument, child, | |
| ($//), (&|), (&//), (>=>)) | |
| -- The URL we're going to search | |
| url :: String | |
| url = "http://www.bing.com/search?q=school+of+haskell" | |
| -- The data we're going to search for | |
| findNodes :: Axis | |
| findNodes = element "span" >=> attributeIs "class" "sb_count" >=> child | |
| -- Extract the data from each node in turn | |
| extractData :: Cursor -> T.Text | |
| extractData = T.concat . content | |
| -- Process the list of data elements | |
| processData :: [T.Text] -> IO () | |
| processData = putStrLn . T.unpack . T.concat | |
| cursorFor :: String -> IO Cursor | |
| cursorFor u = do | |
| page <- simpleHttp u | |
| return $ fromDocument $ parseLBS page | |
| -- test | |
| main :: IO () | |
| main = do | |
| cursor <- cursorFor url | |
| processData $ cursor $// findNodes &| extractData |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This file was automatically generated by 'stack init' | |
| # | |
| # Some commonly used options have been documented as comments in this file. | |
| # For advanced use and comprehensive documentation of the format, please see: | |
| # http://docs.haskellstack.org/en/stable/yaml_configuration/ | |
| # Resolver to choose a 'specific' stackage snapshot or a compiler version. | |
| # A snapshot resolver dictates the compiler version and the set of packages | |
| # to be used for project dependencies. For example: | |
| # | |
| # resolver: lts-3.5 | |
| # resolver: nightly-2015-09-21 | |
| # resolver: ghc-7.10.2 | |
| # resolver: ghcjs-0.1.0_ghc-7.10.2 | |
| # resolver: | |
| # name: custom-snapshot | |
| # location: "./custom-snapshot.yaml" | |
| resolver: lts-8.3 | |
| # User packages to be built. | |
| # Various formats can be used as shown in the example below. | |
| # | |
| # packages: | |
| # - some-directory | |
| # - https://example.com/foo/bar/baz-0.0.2.tar.gz | |
| # - location: | |
| # git: https://github.com/commercialhaskell/stack.git | |
| # commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a | |
| # - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a | |
| # extra-dep: true | |
| # subdirs: | |
| # - auto-update | |
| # - wai | |
| # | |
| # A package marked 'extra-dep: true' will only be built if demanded by a | |
| # non-dependency (i.e. a user package), and its test suites and benchmarks | |
| # will not be run. This is useful for tweaking upstream packages. | |
| packages: | |
| - '.' | |
| # Dependency packages to be pulled from upstream that are not in the resolver | |
| # (e.g., acme-missiles-0.3) | |
| extra-deps: | |
| - http-conduit-2.2.3.1 | |
| - html-conduit-1.2.1.1 | |
| # Override default flag values for local packages and extra-deps | |
| flags: {} | |
| # Extra package databases containing global packages | |
| extra-package-dbs: [] | |
| # Control whether we use the GHC we find on the path | |
| # system-ghc: true | |
| # | |
| # Require a specific version of stack, using version ranges | |
| # require-stack-version: -any # Default | |
| # require-stack-version: ">=1.1" | |
| # | |
| # Override the architecture used by stack, especially useful on Windows | |
| # arch: i386 | |
| # arch: x86_64 | |
| # | |
| # Extra directories used by stack for building | |
| # extra-include-dirs: [/path/to/dir] | |
| # extra-lib-dirs: [/path/to/dir] | |
| # | |
| # Allow a newer minor version of GHC than the snapshot specifies | |
| # compiler-check: newer-minor |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment