Skip to content

Instantly share code, notes, and snippets.

@khoaoaoaoa
Created March 2, 2024 07:54
Show Gist options
  • Select an option

  • Save khoaoaoaoa/514552b90f2d1ffdfeb4baaf0c6e730f to your computer and use it in GitHub Desktop.

Select an option

Save khoaoaoaoa/514552b90f2d1ffdfeb4baaf0c6e730f to your computer and use it in GitHub Desktop.
import csv
import sys
def main():
# TODO: Check for command-line usage
if len(sys.argv) != 3:
print("Invalid number of arguments")
return 1
# TODO: Read database file into a variable
dna_dtb = []
with open(sys.argv[1], "r") as file:
dna = csv.DictReader(file)
for row in dna:
dna_dtb.append(row)
str_checksequence = list(dna_dtb[0].keys())[1:]
# TODO: Read DNA sequence file into a variable
dna = ""
with open(sys.argv[2], "r") as file:
dna = file.read()
# TODO: Find longest match of each STR in DNA sequence
str_counts = {}
for str in str_checksequence:
str_counts[str] = longest_match(dna, str)
# TODO: Check database for matching profiles
print(dna_validation(dna_dtb, str_counts, str_checksequence))
return
def dna_validation(dna_dtb, subject_strcounts, str_checksequence):
for dna_opponent in dna_dtb:
is_valid = True
for str in str_checksequence:
print(dna_opponent[str] != subject_strcounts[str], dna_opponent[str], subject_strcounts[str])
if dna_opponent[str] != subject_strcounts[str]:
is_valid = False
break
if is_valid:
return dna_opponent["name"]
return "No match"
def longest_match(sequence, subsequence):
"""Returns length of longest run of subsequence in sequence."""
# Initialize variables
longest_run = 0
subsequence_length = len(subsequence)
sequence_length = len(sequence)
# Check each character in sequence for most consecutive runs of subsequence
for i in range(sequence_length):
# Initialize count of consecutive runs
count = 0
# Check for a subsequence match in a "substring" (a subset of characters) within sequence
# If a match, move substring to next potential match in sequence
# Continue moving substring and checking for matches until out of consecutive matches
while True:
# Adjust substring start and end
start = i + count * subsequence_length
end = start + subsequence_length
# If there is a match in the substring
if sequence[start:end] == subsequence:
count += 1
# If there is no match in the substring
else:
break
# Update most consecutive matches found
longest_run = max(longest_run, count)
# After checking for runs at each character in seqeuence, return longest run found
return longest_run
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment