Created
March 2, 2024 07:54
-
-
Save khoaoaoaoa/514552b90f2d1ffdfeb4baaf0c6e730f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import csv | |
| import sys | |
| def main(): | |
| # TODO: Check for command-line usage | |
| if len(sys.argv) != 3: | |
| print("Invalid number of arguments") | |
| return 1 | |
| # TODO: Read database file into a variable | |
| dna_dtb = [] | |
| with open(sys.argv[1], "r") as file: | |
| dna = csv.DictReader(file) | |
| for row in dna: | |
| dna_dtb.append(row) | |
| str_checksequence = list(dna_dtb[0].keys())[1:] | |
| # TODO: Read DNA sequence file into a variable | |
| dna = "" | |
| with open(sys.argv[2], "r") as file: | |
| dna = file.read() | |
| # TODO: Find longest match of each STR in DNA sequence | |
| str_counts = {} | |
| for str in str_checksequence: | |
| str_counts[str] = longest_match(dna, str) | |
| # TODO: Check database for matching profiles | |
| print(dna_validation(dna_dtb, str_counts, str_checksequence)) | |
| return | |
| def dna_validation(dna_dtb, subject_strcounts, str_checksequence): | |
| for dna_opponent in dna_dtb: | |
| is_valid = True | |
| for str in str_checksequence: | |
| print(dna_opponent[str] != subject_strcounts[str], dna_opponent[str], subject_strcounts[str]) | |
| if dna_opponent[str] != subject_strcounts[str]: | |
| is_valid = False | |
| break | |
| if is_valid: | |
| return dna_opponent["name"] | |
| return "No match" | |
| def longest_match(sequence, subsequence): | |
| """Returns length of longest run of subsequence in sequence.""" | |
| # Initialize variables | |
| longest_run = 0 | |
| subsequence_length = len(subsequence) | |
| sequence_length = len(sequence) | |
| # Check each character in sequence for most consecutive runs of subsequence | |
| for i in range(sequence_length): | |
| # Initialize count of consecutive runs | |
| count = 0 | |
| # Check for a subsequence match in a "substring" (a subset of characters) within sequence | |
| # If a match, move substring to next potential match in sequence | |
| # Continue moving substring and checking for matches until out of consecutive matches | |
| while True: | |
| # Adjust substring start and end | |
| start = i + count * subsequence_length | |
| end = start + subsequence_length | |
| # If there is a match in the substring | |
| if sequence[start:end] == subsequence: | |
| count += 1 | |
| # If there is no match in the substring | |
| else: | |
| break | |
| # Update most consecutive matches found | |
| longest_run = max(longest_run, count) | |
| # After checking for runs at each character in seqeuence, return longest run found | |
| return longest_run | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment