Created
March 17, 2012 17:52
-
-
Save lucidjargon/2063465 to your computer and use it in GitHub Desktop.
Simple minhash in F#
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| let third (a,b,c) = c | |
| let fst3 (a,b,c) = a | |
| let split op c (data: 'a []) = | |
| data.[1..data.Length - 1] |> Array.fold (fun (bset, curTransform, i) curbit -> | |
| if i = c then | |
| bset |> Set.add (hash curTransform), curbit, 1 | |
| else bset, op curTransform curbit, i + 1) (Set.empty , data.[0], 1) | |
| let minhash vset = vset |> Set.minElement | |
| let rec minhashes ci k l s = | |
| match ci with | |
| | i when i >= k || Set.count s = 0 -> l | |
| | i -> let minim = minhash s | |
| minhashes (i + 1) k (Set.add minim l) (Set.remove minim s) | |
| let charArr (s:string) = s.ToCharArray() |> Array.map string | |
| let arr = [ "The cat in the hat" ; "The fat cat wears many hats"; "race car" ; "fast car"; "race horse" ] | |
| let sass = arr |> List.map (charArr >> (split (+) 2) >> fst3 >> (minhashes 0 5 Set.empty)) | |
| sass |> (rank (arr|>List.toArray) 0 []) |> List.sortBy third |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
("C:\Users\sir.deenicus\Downloads\Ch13.5-ConditionalRandomFields.pdf",
"C:\Users\sir.deenicus\Downloads\ftml_book.pdf", 6);
("C:\Users\sir.deenicus\Downloads\AGI-book(7-Mar-2012).pdf",
"C:\Users\sir.deenicus\Downloads\ftml_book.pdf", 6);
("C:\Users\sir.deenicus\Downloads\10.1.1.64.3559.pdf",
"C:\Users\sir.deenicus\Downloads\ftml_book.pdf", 6);
("C:\Users\sir.deenicus\Downloads\10.1.1.116.4959.pdf",
"C:\Users\sir.deenicus\Downloads\ftml_book.pdf", 6);
("C:\Users\sir.deenicus\Downloads\ftml_book.pdf",
"C:\Users\sir.deenicus\Downloads\weighted majority.pdf", 5);
("C:\Users\sir.deenicus\Downloads\ftml_book.pdf",
"C:\Users\sir.deenicus\Downloads\networks-book.pdf", 5);
("C:\Users\sir.deenicus\Downloads\ftml.pdf",
"C:\Users\sir.deenicus\Downloads\networks-book.pdf", 5);
("C:\Users\sir.deenicus\Downloads\ftml.pdf",
"C:\Users\sir.deenicus\Downloads\ftml_book.pdf", 5);