Skip to content

Instantly share code, notes, and snippets.

@dom111
Last active August 14, 2025 21:12
Show Gist options
  • Select an option

  • Save dom111/31e3f1d2200c2552551bde4ec85ebdb6 to your computer and use it in GitHub Desktop.

Select an option

Save dom111/31e3f1d2200c2552551bde4ec85ebdb6 to your computer and use it in GitHub Desktop.
function build_markov_chain_lookup(input, separator = null, block_separator = null, context_levels = 5, tokens = {}) {
if (block_separator !== null) {
const blocks = input.split(block_separator);
blocks.forEach((block) =>
build_markov_chain_lookup(block, separator, null, context_levels, tokens)
)
return {
tokens,
context_levels,
separator: separator ?? '',
};
}
const set = [];
if (separator === null) {
set.push(...input);
}
else {
set.push(...input.split(separator));
}
set.push('__END__');
const previous_tokens = ['__BEGIN__'];
let current;
for (let i = 0; i < set.length; i++) {
current = set[i];
for (let j = context_levels; j > 0; j--) {
const previous = previous_tokens.slice(-(j)).join('-_-');
if (!(previous in tokens)) {
tokens[previous] = {};
}
if (!(current in tokens[previous])) {
tokens[previous][current] = 0;
}
tokens[previous][current]++;
}
previous_tokens.push(current);
}
return {
tokens,
context_levels,
separator: separator ?? '',
};
}
function shuffle_array(array) {
const new_array = [];
while (array.length) {
const key = Math.floor(Math.random() * array.length);
new_array.push(array[key]);
array.splice(key, 1);
}
return new_array;
}
function choose_next_token(token_data) {
let total = Object.values(token_data)
.reduce((total, value) => total + value, 0),
target = Math.floor(Math.random() * total);
return Object.keys(token_data)
.reduce((value, key) => {
if (value) {
return value;
}
target -= token_data[key];
if (target < 1) {
value = key;
}
return value;
}, null);
}
function build_markov_chain(lookup, n = 1) {
const chains = [],
{tokens, context_levels, separator} = lookup;
for (let i = 0; i < n; i++) {
let current = '__BEGIN__';
const output = [];
while (current !== '__END__') {
output.push(current);
for (let j = context_levels; j > 0; j--) {
const previous = output.slice(-(j)).join('-_-'),
token_data = tokens[previous];
if (token_data) {
current = choose_next_token(token_data);
break;
}
}
}
chains.push(output.slice(1).join(separator));
}
return chains;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment