Created
December 23, 2016 22:17
-
-
Save Jamil/e3c021b2bc5c5aa5a77797df4c72ba53 to your computer and use it in GitHub Desktop.
Bigram model generation, and pseudorandom text generation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import Data.Bits | |
| import Data.List | |
| bigrams :: [String] -> [(String, String)] | |
| bigrams [x] = [] | |
| bigrams (x:y:xs) = (x, y):(bigrams $ y:xs) | |
| successors :: String -> [(String, String)] -> [String] | |
| successors predecessor bigrams = map snd matches | |
| where matches = filter (\p -> fst p == predecessor) bigrams | |
| bigrammap :: [(String, String)] -> [(String, [String])] | |
| bigrammap bgs = zip leaders followers | |
| where followers = map (\l -> successors l bgs) leaders | |
| leaders = nub $ map fst bgs | |
| gen :: [(String, [String])] -> (Int -> Int) -> Int -> String -> [String] | |
| gen lbs rand seed x = case lookup x lbs of | |
| Just [] -> [] | |
| Just followers -> nxt:(gen lbs rand index nxt) | |
| where nxt = followers !! (index `mod` (length followers)) | |
| index = rand seed | |
| Nothing -> [] | |
| pseudorand seed = (c * 2685821657736338717) .&. (2 ^ 64 - 1) | |
| where a = seed `xor` (seed `shiftR` 12) | |
| b = a `xor` (a `shiftL` 25) | |
| c = b `xor` (b `shiftR` 27) | |
| x = ["the", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog", "who", "jumps", "below", "the", "white", "rabbit", "who", "jumps", "over", "the", "green", "turtle", "who", "jumps", "above", "the", "lazy", "cat", "who"] | |
| grams = bigrams $ bigrammap x | |
| genx = gen grams pseudorand 1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment