thetarnav · May 22, 2024 14:12
diff --git a/bitap_search.js b/bitap_search.js
 const MAX_UNICODE = 1114111
 let bit_array = new Int32Array(MAX_UNICODE).fill(~0)

 /**
 * This algorithm determines if a given short string exists in a larger string with k errors.
 * It's useful for search-as-you-type features.
 * 
 * @param {string} text 
 * @param {string} pattern 
 * @returns -1 when not found, otherwise the index of the first occurrence of the pattern in the text
 */
 function bitap_search(text, pattern) {
 	let len = pattern.length
 	let R = ~1

 	if (len === 0 || len > text.length) {
 		return -1
 	}
 	
 	bit_array.fill(~0) // reset

 	for (let i = 0; i < len; i++) {
 		bit_array[pattern.charCodeAt(i)] &= ~(1 << i)
 	}

 	for (let i = 0; i < text.length; i++) {
 		R |= bit_array[text.charCodeAt(i)]
 		R <<= 1
 		if ((R & (1 << len)) === 0) {
 			return i - len + 1
 		}
 	}

 	return -1
 }
diff --git a/fuzzy_match.js b/fuzzy_match.js
 /*

 The JavaScript implementation of version v0.2.0 of the __fts_fuzzy_match__ algorithm by Forrest Smith,
 updated by nrgwsth, to include the "exhaustive search" functionality
 described in the 2017 update to the _Reverse Engineering Sublime Text’s Fuzzy Match_ article.

 Copied from https://github.com/tajmone/fuzzy-search/blob/master/fts_fuzzy_match/0.2.0/js

 */

 const BONUS_SEQUENTIAL   = 15 // bonus for adjacent matches
 const BONUS_SEPARATOR    = 30 // bonus if match occurs after a separator
 const BONUS_CAMEL        = 30 // bonus if match is uppercase and prev is lower
 const BONUS_FIRST_LETTER = 15 // bonus if the first letter is matched

 const PENALTY_LEADING_LETTER     = -5  // penalty applied for every letter in str before the first match
 const PENALTY_MAX_LEADING_LETTER = -15 // maximum penalty for leading letters
 const PENALTY_UNMATCHED_LETTER   = -1

 /**
 * Returns true if each character in pattern is found sequentially within str
 * 
 * @param   {string}  pattern
 * @param   {string}  str
 * @returns {boolean} */
 export function fuzzy_match_simple(pattern, str) {
 	let pattern_idx = 0
 	let str_idx     = 0

 	while (pattern_idx !== pattern.length && str_idx !== str.length) {
 		const pattern_char = pattern.charAt(pattern_idx).toLowerCase()
 		const str_char     = str    .charAt(str_idx)    .toLowerCase()

 		if (pattern_char === str_char) {
 			pattern_idx += 1
 		}
 		str_idx += 1
 	}

 	return pattern.length !== 0 && str.length !== 0 && pattern_idx === pattern.length
 }

 /**
 * Does a fuzzy search to find pattern inside a string.
 * 
 * @param   {string} pattern    pattern to search for
 * @param   {string} str        string which is being searched
 * @returns {[boolean, number]} a boolean which tells if pattern was
 *                              found or not and a search score
 */
 export function fuzzy_match(pattern, str) {
 	return _fuzzy_match(
 		pattern,
 		str,
 		0,      /* pattern_cur_index */
 		0,      /* str_curr_index */
 		null,   /* src_matces */
 		[],     /* matches */
 		256,    /* max_matches */
 		0,      /* next_match */
 		0,      /* recursion_count */
 		10,     /* recursion_limit */
 	);
 }

 /**
 * @param   {string           } pattern 
 * @param   {string           } str 
 * @param   {number           } pattern_cur_index 
 * @param   {number           } str_curr_index 
 * @param   {number[] | null  } src_matces 
 * @param   {number[]         } matches
 * @param   {number           } max_matches 
 * @param   {number           } next_match 
 * @param   {number           } recursion_count 
 * @param   {number           } recursion_limit 
 * @returns {[boolean, number]} */
 function _fuzzy_match(
 	pattern,
 	str,
 	pattern_cur_index,
 	str_curr_index,
 	src_matces,
 	matches,
 	max_matches,
 	next_match,
 	recursion_count,
 	recursion_limit,
 ) {
 	let out_score = 0

 	// Return if recursion limit is reached.
 	if (++recursion_count >= recursion_limit) {
 		return [false, out_score]
 	}

 	// Return if we reached ends of strings.
 	if (pattern_cur_index === pattern.length || str_curr_index === str.length) {
 		return [false, out_score]
 	}

 	// Recursion params
 	let recursive_match = false
 	let best_recursive_matches = []
 	let best_recursive_score = 0

 	// Loop through pattern and str looking for a match.
 	let first_match = true
 	while (pattern_cur_index < pattern.length && str_curr_index < str.length) {
 		// Match found.
 		if (
 			pattern[pattern_cur_index].toLowerCase() === str[str_curr_index].toLowerCase()
 		) {
 			if (next_match >= max_matches) {
 				return [false, out_score]
 			}

 			if (first_match && src_matces) {
 				matches = [...src_matces]
 				first_match = false
 			}

 			let recursive_matches = []
 			const [matched, recursiveScore] = _fuzzy_match(
 				pattern,
 				str,
 				pattern_cur_index,
 				str_curr_index + 1,
 				matches,
 				recursive_matches,
 				max_matches,
 				next_match,
 				recursion_count,
 				recursion_limit
 			)

 			if (matched) {
 				// Pick best recursive score.
 				if (!recursive_match || recursiveScore > best_recursive_score) {
 					best_recursive_matches = [...recursive_matches]
 					best_recursive_score = recursiveScore
 				}
 				recursive_match = true
 			}

 			matches[next_match++] = str_curr_index
 			++pattern_cur_index
 		}
 		++str_curr_index
 	}

 	const matched = pattern_cur_index === pattern.length

 	if (matched) {
 		out_score = 100

 		// Apply leading letter penalty
 		let penalty = PENALTY_LEADING_LETTER * matches[0]
 		penalty = penalty < PENALTY_MAX_LEADING_LETTER
 			? PENALTY_MAX_LEADING_LETTER
 			: penalty
 		out_score += penalty

 		// Apply unmatched penalty
 		out_score += PENALTY_UNMATCHED_LETTER * (str.length - next_match)

 		// Apply ordering bonuses
 		for (let i = 0; i < next_match; i++) {
 			const curr_idx = matches[i]

 			if (i > 0) {
 				const prev_idx = matches[i - 1]
 				if (curr_idx == prev_idx + 1) {
 					out_score += BONUS_SEQUENTIAL
 				}
 			}

 			// Check for bonuses based on neighbor character value.
 			if (curr_idx > 0) {
 				// Camel case
 				const neighbor = str[curr_idx - 1]
 				const curr = str[curr_idx]
 				if (
 					neighbor === neighbor.toLowerCase() &&
 					curr === curr.toUpperCase()
 				) {
 					out_score += BONUS_CAMEL
 				}
 				// is neighbor separator
 				if (neighbor == "_" || neighbor == " ") {
 					out_score += BONUS_SEPARATOR
 				}
 			} else {
 				// First letter
 				out_score += BONUS_FIRST_LETTER
 			}
 		}

 		// Return best result
 		if (recursive_match && (!matched || best_recursive_score > out_score)) {
 			// Recursive score is better than "this"
 			matches = [...best_recursive_matches]
 			out_score = best_recursive_score
 			return [true, out_score]
 		} else if (matched) {
 			// "this" score is better than recursive
 			return [true, out_score]
 		} else {
 			return [false, out_score]
 		}
 	}
 	return [false, out_score]
 }
diff --git a/levenshtein_distance.js b/levenshtein_distance.js
 /**
 * calculates the minimum number of single-character edits \
 * (insertions, deletions, or substitutions)               \
 * required to change one word into another.               \
 * It's a good choice for spell checking or auto-correct features.
 * 
 * @param {string} a
 * @param {string} b
 * @returns {number} the Levenshtein distance between the two strings
 * - 0 if the strings are equal
 * - a positive integer if the strings are different
 */
 function levenshtein_distance(a, b) {
 	let mat = new Array(b.length + 1)

 	for (let i = 0; i <= b.length; i += 1) {
 		mat[i] = new Array(a.length + 1)
 		mat[i][0] = i
 	}

 	for (let j = 0; j <= a.length; j += 1) {
 		mat[0][j] = j
 	}

 	for (let i = 1; i <= b.length; i += 1) {
 		for (let j = 1; j <= a.length; j += 1) {
 			if (b[i-1] == a[j-1]) {
 				mat[i][j] = mat[i-1][j-1]
 			} else {
 				mat[i][j] = Math.min(
 					mat[i-1][j-1] + 1, // substitution
 					mat[i  ][j-1] + 1, // insertion
 					mat[i-1][j  ] + 1, // deletion
 				)
 			}
 		}
 	}

 	return mat[b.length][a.length]
 }

 /**
 * @param {string  } query 
 * @param {string[]} strings 
 * @param {number  } max_results 
 * @returns indexes array, pointing to the strings array
 */
 export function search_by_levenshtein_distance(query, strings, max_results) {
 	/** @type {number[]} */ let indexes = new Array(max_results)
 	/** @type {number[]} */ let scores  = new Array(max_results)
 	let len = 0

 	strings_loop: for (let str_idx = 0; str_idx < strings.length; str_idx += 1) {
 		let string = strings[str_idx]
 		let score = levenshtein_distance(string, query)

 		for (let i = 0; i < len; i += 1) {
 			if (score < scores[i]) {
 				// shift to the right
 				for (let j = len; j > i; j -= 1) {
 					indexes[j] = indexes[j-1]
 					scores [j] = scores [j-1]
 				}

 				scores [i] = score
 				indexes[i] = str_idx
 				continue strings_loop
 			}
 		}

 		if (len < max_results) {
 			scores [len] = score
 			indexes[len] = str_idx
 			len += 1
 		}
 	}

 	if (len < max_results) {
 		indexes = indexes.slice(0, len)
 	}

 	return indexes
 }
	const MAX_UNICODE = 1114111
	let bit_array = new Int32Array(MAX_UNICODE).fill(~0)

	/**
	* This algorithm determines if a given short string exists in a larger string with k errors.
	* It's useful for search-as-you-type features.
	*
	* @param {string} text
	* @param {string} pattern
	* @returns -1 when not found, otherwise the index of the first occurrence of the pattern in the text
	*/
	function bitap_search(text, pattern) {
	let len = pattern.length
	let R = ~1

	if (len === 0 \|\| len > text.length) {
	return -1
	}

	bit_array.fill(~0) // reset

	for (let i = 0; i < len; i++) {
	bit_array[pattern.charCodeAt(i)] &= ~(1 << i)
	}

	for (let i = 0; i < text.length; i++) {
	R \|= bit_array[text.charCodeAt(i)]
	R <<= 1
	if ((R & (1 << len)) === 0) {
	return i - len + 1
	}
	}

	return -1
	}
	/*

	The JavaScript implementation of version v0.2.0 of the __fts_fuzzy_match__ algorithm by Forrest Smith,
	updated by nrgwsth, to include the "exhaustive search" functionality
	described in the 2017 update to the _Reverse Engineering Sublime Text’s Fuzzy Match_ article.

	Copied from https://github.com/tajmone/fuzzy-search/blob/master/fts_fuzzy_match/0.2.0/js

	*/

	const BONUS_SEQUENTIAL = 15 // bonus for adjacent matches
	const BONUS_SEPARATOR = 30 // bonus if match occurs after a separator
	const BONUS_CAMEL = 30 // bonus if match is uppercase and prev is lower
	const BONUS_FIRST_LETTER = 15 // bonus if the first letter is matched

	const PENALTY_LEADING_LETTER = -5 // penalty applied for every letter in str before the first match
	const PENALTY_MAX_LEADING_LETTER = -15 // maximum penalty for leading letters
	const PENALTY_UNMATCHED_LETTER = -1

	/**
	* Returns true if each character in pattern is found sequentially within str
	*
	* @param {string} pattern
	* @param {string} str
	* @returns {boolean} */
	export function fuzzy_match_simple(pattern, str) {
	let pattern_idx = 0
	let str_idx = 0

	while (pattern_idx !== pattern.length && str_idx !== str.length) {
	const pattern_char = pattern.charAt(pattern_idx).toLowerCase()
	const str_char = str .charAt(str_idx) .toLowerCase()

	if (pattern_char === str_char) {
	pattern_idx += 1
	}
	str_idx += 1
	}

	return pattern.length !== 0 && str.length !== 0 && pattern_idx === pattern.length
	}

	/**
	* Does a fuzzy search to find pattern inside a string.
	*
	* @param {string} pattern pattern to search for
	* @param {string} str string which is being searched
	* @returns {[boolean, number]} a boolean which tells if pattern was
	* found or not and a search score
	*/
	export function fuzzy_match(pattern, str) {
	return _fuzzy_match(
	pattern,
	str,
	0, /* pattern_cur_index */
	0, /* str_curr_index */
	null, /* src_matces */
	[], /* matches */
	256, /* max_matches */
	0, /* next_match */
	0, /* recursion_count */
	10, /* recursion_limit */
	);
	}

	/**
	* @param {string } pattern
	* @param {string } str
	* @param {number } pattern_cur_index
	* @param {number } str_curr_index
	* @param {number[] \| null } src_matces
	* @param {number[] } matches
	* @param {number } max_matches
	* @param {number } next_match
	* @param {number } recursion_count
	* @param {number } recursion_limit
	* @returns {[boolean, number]} */
	function _fuzzy_match(
	pattern,
	str,
	pattern_cur_index,
	str_curr_index,
	src_matces,
	matches,
	max_matches,
	next_match,
	recursion_count,
	recursion_limit,
	) {
	let out_score = 0

	// Return if recursion limit is reached.
	if (++recursion_count >= recursion_limit) {
	return [false, out_score]
	}

	// Return if we reached ends of strings.
	if (pattern_cur_index === pattern.length \|\| str_curr_index === str.length) {
	return [false, out_score]
	}

	// Recursion params
	let recursive_match = false
	let best_recursive_matches = []
	let best_recursive_score = 0

	// Loop through pattern and str looking for a match.
	let first_match = true
	while (pattern_cur_index < pattern.length && str_curr_index < str.length) {
	// Match found.
	if (
	pattern[pattern_cur_index].toLowerCase() === str[str_curr_index].toLowerCase()
	) {
	if (next_match >= max_matches) {
	return [false, out_score]
	}

	if (first_match && src_matces) {
	matches = [...src_matces]
	first_match = false
	}

	let recursive_matches = []
	const [matched, recursiveScore] = _fuzzy_match(
	pattern,
	str,
	pattern_cur_index,
	str_curr_index + 1,
	matches,
	recursive_matches,
	max_matches,
	next_match,
	recursion_count,
	recursion_limit
	)

	if (matched) {
	// Pick best recursive score.
	if (!recursive_match \|\| recursiveScore > best_recursive_score) {
	best_recursive_matches = [...recursive_matches]
	best_recursive_score = recursiveScore
	}
	recursive_match = true
	}

	matches[next_match++] = str_curr_index
	++pattern_cur_index
	}
	++str_curr_index
	}

	const matched = pattern_cur_index === pattern.length

	if (matched) {
	out_score = 100

	// Apply leading letter penalty
	let penalty = PENALTY_LEADING_LETTER * matches[0]
	penalty = penalty < PENALTY_MAX_LEADING_LETTER
	? PENALTY_MAX_LEADING_LETTER
	: penalty
	out_score += penalty

	// Apply unmatched penalty
	out_score += PENALTY_UNMATCHED_LETTER * (str.length - next_match)

	// Apply ordering bonuses
	for (let i = 0; i < next_match; i++) {
	const curr_idx = matches[i]

	if (i > 0) {
	const prev_idx = matches[i - 1]
	if (curr_idx == prev_idx + 1) {
	out_score += BONUS_SEQUENTIAL
	}
	}

	// Check for bonuses based on neighbor character value.
	if (curr_idx > 0) {
	// Camel case
	const neighbor = str[curr_idx - 1]
	const curr = str[curr_idx]
	if (
	neighbor === neighbor.toLowerCase() &&
	curr === curr.toUpperCase()
	) {
	out_score += BONUS_CAMEL
	}
	// is neighbor separator
	if (neighbor == "_" \|\| neighbor == " ") {
	out_score += BONUS_SEPARATOR
	}
	} else {
	// First letter
	out_score += BONUS_FIRST_LETTER
	}
	}

	// Return best result
	if (recursive_match && (!matched \|\| best_recursive_score > out_score)) {
	// Recursive score is better than "this"
	matches = [...best_recursive_matches]
	out_score = best_recursive_score
	return [true, out_score]
	} else if (matched) {
	// "this" score is better than recursive
	return [true, out_score]
	} else {
	return [false, out_score]
	}
	}
	return [false, out_score]
	}
	/**
	* calculates the minimum number of single-character edits \
	* (insertions, deletions, or substitutions) \
	* required to change one word into another. \
	* It's a good choice for spell checking or auto-correct features.
	*
	* @param {string} a
	* @param {string} b
	* @returns {number} the Levenshtein distance between the two strings
	* - 0 if the strings are equal
	* - a positive integer if the strings are different
	*/
	function levenshtein_distance(a, b) {
	let mat = new Array(b.length + 1)

	for (let i = 0; i <= b.length; i += 1) {
	mat[i] = new Array(a.length + 1)
	mat[i][0] = i
	}

	for (let j = 0; j <= a.length; j += 1) {
	mat[0][j] = j
	}

	for (let i = 1; i <= b.length; i += 1) {
	for (let j = 1; j <= a.length; j += 1) {
	if (b[i-1] == a[j-1]) {
	mat[i][j] = mat[i-1][j-1]
	} else {
	mat[i][j] = Math.min(
	mat[i-1][j-1] + 1, // substitution
	mat[i ][j-1] + 1, // insertion
	mat[i-1][j ] + 1, // deletion
	)
	}
	}
	}

	return mat[b.length][a.length]
	}

	/**
	* @param {string } query
	* @param {string[]} strings
	* @param {number } max_results
	* @returns indexes array, pointing to the strings array
	*/
	export function search_by_levenshtein_distance(query, strings, max_results) {
	/** @type {number[]} */ let indexes = new Array(max_results)
	/** @type {number[]} */ let scores = new Array(max_results)
	let len = 0

	strings_loop: for (let str_idx = 0; str_idx < strings.length; str_idx += 1) {
	let string = strings[str_idx]
	let score = levenshtein_distance(string, query)

	for (let i = 0; i < len; i += 1) {
	if (score < scores[i]) {
	// shift to the right
	for (let j = len; j > i; j -= 1) {
	indexes[j] = indexes[j-1]
	scores [j] = scores [j-1]
	}

	scores [i] = score
	indexes[i] = str_idx
	continue strings_loop
	}
	}

	if (len < max_results) {
	scores [len] = score
	indexes[len] = str_idx
	len += 1
	}
	}

	if (len < max_results) {
	indexes = indexes.slice(0, len)
	}

	return indexes
	}