appendjeff · December 30, 2016 22:22
diff --git a/neighbor_sets.py b/neighbor_sets.py
 from random import sample, choice
 from collections import namedtuple


 def build_set(X):
    """Given a list of elements, returns a random set of such elements."""
    return set(sample(X, choice(X)))


 def build_sets(n, x):
    """Builds a list of n sets where each set contains a subset of [1 ... x]"""
    X = xrange(x)
    return [build_set(X) for i in range(n)]


 def get_closest_neighbor(set_index, other_sets):
    """Return the index of the closest neighbor."""
    return get_k_closest_neighbors(set_index, other_sets, 1)[0]


 def get_k_closest_neighbors(set_index, other_sets, k):
    """
    Given a set (index) and a list of sets, return a list of
    sets (index) that have the most common elements.

    The returning list will be in ascending order; sorted by the length
        of the union.

    :param set_index - <int> Index of the set to be compared against others.

    :param other_sets - <list> The population of sets to be compared.

    :param k - <int> Number of neighboring sets to return.
    """
    CandNeighbor = namedtuple('CandNeighbor', ['set_index', 'len_of_common_elements'])
    # might want to use different data structure -- max heap? heapsort?
    closest_neighbors = [CandNeighbor(None, 0) for i in range(k)]

    _set = other_sets[set_index]
    for other_set_index, other_set in enumerate(other_sets):
        if set_index == other_set_index:
            continue

        # update the closest_neighbors
        intersection_len = len(_set.intersection(other_set))
        update_neighbors = False
        cn_i  = k
        while cn_i > 0:
            cn_i -= 1
            if intersection_len > closest_neighbors[cn_i].len_of_common_elements:
                update_neighbors = True
                continue
            else:
                cn_i += 1
                break
        if update_neighbors:
            closest_neighbors.insert(cn_i,
                    CandNeighbor(other_set_index,intersection_len))
            closest_neighbors.pop()
    return closest_neighbors


 def test_get_k_closest_neighbors():
    sets = [
            set([4, 5, 10]),
            set([4, 6, 10]),
            set(),
            set([1, 10, 12, 50])
            ]
    closest_neighbors = get_k_closest_neighbors(0, sets, 1)
    assert closest_neighbors[0].set_index == 1

    closest_neighbors = get_k_closest_neighbors(0, sets, 2)
    assert closest_neighbors[0].set_index == 1


 if __name__ == '__main__':
    test_get_k_closest_neighbors()
	from random import sample, choice
	from collections import namedtuple


	def build_set(X):
	"""Given a list of elements, returns a random set of such elements."""
	return set(sample(X, choice(X)))


	def build_sets(n, x):
	"""Builds a list of n sets where each set contains a subset of [1 ... x]"""
	X = xrange(x)
	return [build_set(X) for i in range(n)]


	def get_closest_neighbor(set_index, other_sets):
	"""Return the index of the closest neighbor."""
	return get_k_closest_neighbors(set_index, other_sets, 1)[0]


	def get_k_closest_neighbors(set_index, other_sets, k):
	"""
	Given a set (index) and a list of sets, return a list of
	sets (index) that have the most common elements.

	The returning list will be in ascending order; sorted by the length
	of the union.

	:param set_index - <int> Index of the set to be compared against others.

	:param other_sets - <list> The population of sets to be compared.

	:param k - <int> Number of neighboring sets to return.
	"""
	CandNeighbor = namedtuple('CandNeighbor', ['set_index', 'len_of_common_elements'])
	# might want to use different data structure -- max heap? heapsort?
	closest_neighbors = [CandNeighbor(None, 0) for i in range(k)]

	_set = other_sets[set_index]
	for other_set_index, other_set in enumerate(other_sets):
	if set_index == other_set_index:
	continue

	# update the closest_neighbors
	intersection_len = len(_set.intersection(other_set))
	update_neighbors = False
	cn_i = k
	while cn_i > 0:
	cn_i -= 1
	if intersection_len > closest_neighbors[cn_i].len_of_common_elements:
	update_neighbors = True
	continue
	else:
	cn_i += 1
	break
	if update_neighbors:
	closest_neighbors.insert(cn_i,
	CandNeighbor(other_set_index,intersection_len))
	closest_neighbors.pop()
	return closest_neighbors


	def test_get_k_closest_neighbors():
	sets = [
	set([4, 5, 10]),
	set([4, 6, 10]),
	set(),
	set([1, 10, 12, 50])
	]
	closest_neighbors = get_k_closest_neighbors(0, sets, 1)
	assert closest_neighbors[0].set_index == 1

	closest_neighbors = get_k_closest_neighbors(0, sets, 2)
	assert closest_neighbors[0].set_index == 1


	if __name__ == '__main__':
	test_get_k_closest_neighbors()
No results found