Created
June 10, 2014 21:57
-
-
Save stepjue/97742fefb4536166a60d to your computer and use it in GitHub Desktop.
Solution to Rosalind's "grph" problem (http://rosalind.info/problems/grph/). Prints the adjacency list of the 3-overlap-graph of DNA strings in FASTA format.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def prefix(l, k): | |
| return l[0:k] | |
| def suffix(l, k): | |
| return l[-k:] | |
| def parse_fasta(file): | |
| fasta = {} | |
| with open(file) as f: | |
| lines = f.readlines() | |
| i = 0 | |
| current = '' | |
| while i < len(lines): | |
| if lines[i][0] == '>': | |
| current = lines[i].rstrip() | |
| fasta[current] = '' | |
| else: | |
| fasta[current] += lines[i].rstrip() | |
| i += 1 | |
| return fasta | |
| def overlap(fasta, k): | |
| for key, val in fasta.items(): | |
| for key2, val2 in fasta.items(): | |
| if(val != val2 and suffix(val, k) == prefix(val2, k)): | |
| print(key[1:] + " " + key2[1:]) | |
| fasta = parse_fasta('rosalind_grph.txt') | |
| overlap(fasta, 3) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment