Skip to content

Instantly share code, notes, and snippets.

@paucus
Created March 13, 2017 19:11
Show Gist options
  • Select an option

  • Save paucus/2a1662b31a73f704e2533ad2e77bc9e0 to your computer and use it in GitHub Desktop.

Select an option

Save paucus/2a1662b31a73f704e2533ad2e77bc9e0 to your computer and use it in GitHub Desktop.
-module(index).
-export([index_file/1]).
readlines(Name) ->
{ok,File} = file:open(Name,[read]),
extract_content(File,[]).
extract_content(File,Partial) ->
case io:get_line(File,"") of
eof -> file:close(File), Partial;
Line -> {Strip,_} = lists:split(length(Line)-1,Line), extract_content(File,Partial ++ [Strip])
end.
show_file_contents([L|Ls]) ->
io:format("~s~n",[L]),
show_file_contents(Ls);
show_file_contents([]) -> ok.
nopunct([]) -> [];
nopunct([X|Xs]) ->
case lists:member(X,".,;:\t\n\'\"") of
true -> nopunct(Xs);
false -> [X|nopunct(Xs)]
end.
dedup([],Y) -> Y;
dedup([X|Xs],Y) ->
case lists:member(X,Y) of
false -> dedup(Xs,[X|Y]);
true -> dedup(Xs,Y)
end.
make_index([],Y,_) -> Y;
make_index([X],Y,_) -> Y ++ [{X,X}];
make_index([X,Nx|Xs],Y,Z) ->
case Nx == (X + 1) of
true -> make_index([Nx|Xs], Y, Z);
_ -> make_index([Nx|Xs], Y ++ [{Z,X}], Nx)
end.
combine_indices({X,I}, []) -> [{X,I}];
combine_indices({X,I}, [{Y,Indices}|Ys]) ->
case X == Y of
true -> [{Y, Indices ++ I} | Ys];
_ -> [{Y,Indices}|combine_indices({X,I},Ys)]
end.
index_words([], Word) -> Word;
index_words(Word, []) -> Word;
index_words([X|Xs], Y) -> index_words(Xs,combine_indices(X,Y)).
create_word_index([], Y, _) -> Y;
create_word_index([L|Ls], Y, N) ->
Words = string:tokens(nopunct(L)," "),
Entries = dedup(Words, []),
Indexes = lists:map(fun(X) -> {X,[N]} end, Entries),
create_word_index(Ls, index_words(Indexes,Y), N+1).
index([]) -> [];
index(Lines) ->
lists:map(fun({Word, AllLines}) ->
{Word, make_index(AllLines, [], hd(AllLines))}
end, Lines).
% USAGE
% index:index_file("./gettysburg-address.txt").
index_file(Name) ->
Lines = readlines(Name),
Indices = create_word_index(Lines,[],1),
index(Indices).
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment