Created
July 18, 2020 19:33
-
-
Save demdxx/f06af44747361b66f0eb51728d5f2132 to your computer and use it in GitHub Desktop.
Cosine Similarity implementation in MySQL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- for calculation of norm vector -- | |
| DELIMITER $$ | |
| CREATE FUNCTION vector_norm( vector JSON ) | |
| RETURNS DOUBLE | |
| READS SQL DATA | |
| DETERMINISTIC | |
| BEGIN | |
| DECLARE array_length INTEGER(11); | |
| DECLARE retval DOUBLE(19,2); | |
| DECLARE cell_value DOUBLE(19,2); | |
| DECLARE idx INT(11); | |
| SELECT json_length( vector ) INTO array_length; | |
| SET retval = 0.0; | |
| SET idx = 0; | |
| WHILE idx < array_length DO | |
| SELECT json_extract( vector, concat( '$[', idx, ']' ) ) INTO cell_value; | |
| SET retval = retval + POWER(cell_value, 2); | |
| SET idx = idx + 1; | |
| END WHILE; | |
| RETURN SQRT(retval); | |
| END$$ | |
| DELIMITER ; | |
| -- dot product implementation -- | |
| DELIMITER $$ | |
| CREATE FUNCTION dot_product( vector1 JSON, vector2 JSON ) | |
| RETURNS DOUBLE | |
| READS SQL DATA | |
| DETERMINISTIC | |
| BEGIN | |
| DECLARE array_length INTEGER(11); | |
| DECLARE retval DOUBLE(19,2); | |
| DECLARE cell_value1 DOUBLE(19,2); | |
| DECLARE cell_value2 DOUBLE(19,2); | |
| DECLARE idx INT(11); | |
| SELECT json_length( vector1 ) INTO array_length; | |
| SET retval = 0.0; | |
| SET idx = 0; | |
| WHILE idx < array_length DO | |
| SELECT json_extract( vector1, concat( '$[', idx, ']' ) ) INTO cell_value1; | |
| SELECT json_extract( vector2, concat( '$[', idx, ']' ) ) INTO cell_value2; | |
| SET retval = retval + cell_value1 * cell_value2; | |
| SET idx = idx + 1; | |
| END WHILE; | |
| RETURN retval; | |
| END$$ | |
| DELIMITER ; | |
| -- cosine similarity calculation -- | |
| DELIMITER $$ | |
| CREATE FUNCTION cosine_similarity( vector1 JSON, vector2 JSON ) | |
| RETURNS DOUBLE | |
| READS SQL DATA | |
| DETERMINISTIC | |
| BEGIN | |
| DECLARE retval DOUBLE(19,2); | |
| SELECT dot_product(vector1, vector2) / (vector_norm(vector1) * vector_norm(vector2)) INTO retval; | |
| RETURN retval; | |
| END$$ | |
| DELIMITER ; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you ! We are looking for this feature in production for mysql. what do you suggest ?