Created
December 30, 2016 22:22
-
-
Save appendjeff/5e3c3450baa6192724144a5179d6dc45 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from random import sample, choice | |
| from collections import namedtuple | |
| def build_set(X): | |
| """Given a list of elements, returns a random set of such elements.""" | |
| return set(sample(X, choice(X))) | |
| def build_sets(n, x): | |
| """Builds a list of n sets where each set contains a subset of [1 ... x]""" | |
| X = xrange(x) | |
| return [build_set(X) for i in range(n)] | |
| def get_closest_neighbor(set_index, other_sets): | |
| """Return the index of the closest neighbor.""" | |
| return get_k_closest_neighbors(set_index, other_sets, 1)[0] | |
| def get_k_closest_neighbors(set_index, other_sets, k): | |
| """ | |
| Given a set (index) and a list of sets, return a list of | |
| sets (index) that have the most common elements. | |
| The returning list will be in ascending order; sorted by the length | |
| of the union. | |
| :param set_index - <int> Index of the set to be compared against others. | |
| :param other_sets - <list> The population of sets to be compared. | |
| :param k - <int> Number of neighboring sets to return. | |
| """ | |
| CandNeighbor = namedtuple('CandNeighbor', ['set_index', 'len_of_common_elements']) | |
| # might want to use different data structure -- max heap? heapsort? | |
| closest_neighbors = [CandNeighbor(None, 0) for i in range(k)] | |
| _set = other_sets[set_index] | |
| for other_set_index, other_set in enumerate(other_sets): | |
| if set_index == other_set_index: | |
| continue | |
| # update the closest_neighbors | |
| intersection_len = len(_set.intersection(other_set)) | |
| update_neighbors = False | |
| cn_i = k | |
| while cn_i > 0: | |
| cn_i -= 1 | |
| if intersection_len > closest_neighbors[cn_i].len_of_common_elements: | |
| update_neighbors = True | |
| continue | |
| else: | |
| cn_i += 1 | |
| break | |
| if update_neighbors: | |
| closest_neighbors.insert(cn_i, | |
| CandNeighbor(other_set_index,intersection_len)) | |
| closest_neighbors.pop() | |
| return closest_neighbors | |
| def test_get_k_closest_neighbors(): | |
| sets = [ | |
| set([4, 5, 10]), | |
| set([4, 6, 10]), | |
| set(), | |
| set([1, 10, 12, 50]) | |
| ] | |
| closest_neighbors = get_k_closest_neighbors(0, sets, 1) | |
| assert closest_neighbors[0].set_index == 1 | |
| closest_neighbors = get_k_closest_neighbors(0, sets, 2) | |
| assert closest_neighbors[0].set_index == 1 | |
| if __name__ == '__main__': | |
| test_get_k_closest_neighbors() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment