Skip to content

Instantly share code, notes, and snippets.

@appendjeff
Created December 30, 2016 22:22
Show Gist options
  • Select an option

  • Save appendjeff/5e3c3450baa6192724144a5179d6dc45 to your computer and use it in GitHub Desktop.

Select an option

Save appendjeff/5e3c3450baa6192724144a5179d6dc45 to your computer and use it in GitHub Desktop.
from random import sample, choice
from collections import namedtuple
def build_set(X):
"""Given a list of elements, returns a random set of such elements."""
return set(sample(X, choice(X)))
def build_sets(n, x):
"""Builds a list of n sets where each set contains a subset of [1 ... x]"""
X = xrange(x)
return [build_set(X) for i in range(n)]
def get_closest_neighbor(set_index, other_sets):
"""Return the index of the closest neighbor."""
return get_k_closest_neighbors(set_index, other_sets, 1)[0]
def get_k_closest_neighbors(set_index, other_sets, k):
"""
Given a set (index) and a list of sets, return a list of
sets (index) that have the most common elements.
The returning list will be in ascending order; sorted by the length
of the union.
:param set_index - <int> Index of the set to be compared against others.
:param other_sets - <list> The population of sets to be compared.
:param k - <int> Number of neighboring sets to return.
"""
CandNeighbor = namedtuple('CandNeighbor', ['set_index', 'len_of_common_elements'])
# might want to use different data structure -- max heap? heapsort?
closest_neighbors = [CandNeighbor(None, 0) for i in range(k)]
_set = other_sets[set_index]
for other_set_index, other_set in enumerate(other_sets):
if set_index == other_set_index:
continue
# update the closest_neighbors
intersection_len = len(_set.intersection(other_set))
update_neighbors = False
cn_i = k
while cn_i > 0:
cn_i -= 1
if intersection_len > closest_neighbors[cn_i].len_of_common_elements:
update_neighbors = True
continue
else:
cn_i += 1
break
if update_neighbors:
closest_neighbors.insert(cn_i,
CandNeighbor(other_set_index,intersection_len))
closest_neighbors.pop()
return closest_neighbors
def test_get_k_closest_neighbors():
sets = [
set([4, 5, 10]),
set([4, 6, 10]),
set(),
set([1, 10, 12, 50])
]
closest_neighbors = get_k_closest_neighbors(0, sets, 1)
assert closest_neighbors[0].set_index == 1
closest_neighbors = get_k_closest_neighbors(0, sets, 2)
assert closest_neighbors[0].set_index == 1
if __name__ == '__main__':
test_get_k_closest_neighbors()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment