Instantly share code, notes, and snippets.
Created
December 17, 2018 18:55
-
Star
0
(0)
You must be signed in to star a gist -
Fork
0
(0)
You must be signed in to fork a gist
-
-
Save scienceystuff/d5783a5367f9edc9c5e33742f3acfec4 to your computer and use it in GitHub Desktop.
Samtools Tags in a python script, this will be updated as needed
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # standard tags for samtools | |
| # format is {name : (type, description)} | |
| # where type is one of: character (A), array-nonspecific (B), real-number (f), hexadecimal-array (H), integer (i), string (Z), reserved (X) | |
| # example usage: | |
| # samtools_tags['BC'] | |
| # # outputs 'Barcode sequence identifying the sample' | |
| # samtools_tags_examples['BC'] | |
| # # outputs 'ATCACG' | |
| sm_type = { | |
| 'i' : 'integer', | |
| 'A' : 'character', | |
| 'B' : 'general-array', | |
| 'f' : 'real-number', | |
| 'Z' : 'string', | |
| 'X' : 'reserved' | |
| } | |
| samtools_tags = { | |
| 'AM' : (sm_type['i'], 'The smallest template-independent mapping quality in the template'), | |
| 'AS' : (sm_type['i'], 'Alignment score generated by aligner'), | |
| 'BC' : (sm_type['Z'], 'Barcode sequence identifying the sample'), | |
| 'BQ' : (sm_type['Z'], 'Offset to Base Alignment Quality BAQ'), | |
| 'BZ' : (sm_type['Z'], 'Phred quality of the unique molecular barcode bases in the OX tag'), | |
| 'CB' : (sm_type['Z'], 'Cell identifier'), | |
| 'CC' : (sm_type['Z'], 'Reference name of the next hit'), | |
| 'CG' : (sm_type['B'], 'CIGAR in BAM binary encoding iff it consists of > 65535 operators, BAM only'), | |
| 'CM' : (sm_type['i'], 'Edit distance between the color sequence and the color reference, see also NM'), | |
| 'CO' : (sm_type['Z'], 'Free-text comments'), | |
| 'CP' : (sm_type['i'], 'Leftmost coordinates of the next hit'), | |
| 'CQ' : (sm_type['Z'], 'Color read base qualities'), | |
| 'CR' : (sm_type['Z'], 'Uncorrected cellular barcode sequences'), | |
| 'CS' : (sm_type['Z'], 'Color read sequence'), | |
| 'CT' : (sm_type['Z'], 'Complete read annotation tag, used for consensus annotation dummy features'), | |
| 'CY' : (sm_type['Z'], 'Phred quality of the cellular barcode sequence in the CR tag'), | |
| 'E2' : (sm_type['Z'], 'The 2nd most likely base calls'), | |
| 'FI' : (sm_type['i'], 'The index of segmant in the template'), | |
| 'FS' : (sm_type['Z'], 'Segment suffix'), | |
| 'FZ' : (sm_type['B'], 'Flow signal intensity'), | |
| 'GC' : (sm_type['X'], 'Reserved for backwards compatibility reasons'), | |
| 'GQ' : (sm_type['X'], 'Reserved for backwards compatibility reasons'), | |
| 'GS' : (sm_type['X'], 'Reserved for backwards compatibility reasons'), | |
| 'H0' : (sm_type['i'], 'Number of perfect hits'), | |
| 'H1' : (sm_type['i'], 'Number of 1-difference hits, see also NM'), | |
| 'H2' : (sm_type['i'], 'Number of 2-difference hits'), | |
| 'HI' : (sm_type['i'], 'Query hit index'), | |
| 'IH' : (sm_type['i'], 'Query hit total count'), | |
| 'LB' : (sm_type['Z'], 'Library'), | |
| 'MC' : (sm_type['Z'], 'CIGAR string for mate or next segment'), | |
| 'MD' : (sm_type['Z'], 'String for mismatching positions'), | |
| 'MF' : (sm_type['X'], 'Reserved for backwards compatibility reasons'), | |
| 'MI' : (sm_type['Z'], 'Molecular identifier and a string that uniquely identifies the molecule from which the record was derived'), | |
| 'MQ' : (sm_type['i'], 'Mapping quality of the mate or next segment'), | |
| 'NH' : (sm_type['i'], 'Number of reported alignments that contain the query in the current record'), | |
| 'NM' : (sm_type['i'], 'Edit distance to the reference'), | |
| 'OC' : (sm_type['Z'], 'Original CIGAR'), | |
| 'OP' : (sm_type['i'], 'Original mapping position'), | |
| 'OQ' : (sm_type['Z'], 'Original base quality'), | |
| 'OX' : (sm_type['Z'], 'Original unique molecular barcode bases'), | |
| 'PG' : (sm_type['Z'], 'Program'), | |
| 'PQ' : (sm_type['i'], 'Phred likelihood of the template'), | |
| 'PT' : (sm_type['Z'], 'Read annotations for parts of the padded read sequence'), | |
| 'PU' : (sm_type['Z'], 'Platform unit'), | |
| 'Q2' : (sm_type['Z'], 'Phred quality of the mate or next segment sequence in the R2 tag'), | |
| 'QT' : (sm_type['Z'], 'Phred quality of the sample barcode sequence in the BC tag'), | |
| 'QX' : (sm_type['Z'], 'Quality score of the unique molecular identifier in the RX tag'), | |
| 'R2' : (sm_type['Z'], 'Sequence of the mate or next segment in the template'), | |
| 'RG' : (sm_type['Z'], 'Read group'), | |
| 'RT' : (sm_type['X'], 'Reserved for backwards compatibility reasons'), | |
| 'RX' : (sm_type['Z'], 'Sequence bases of the unique molecular identifier that may be corrected'), | |
| 'S2' : (sm_type['X'], 'Reserved for backwards compatibility reasons'), | |
| 'SA' : (sm_type['Z'], 'Other canonical alignments in a chimeric alignment'), | |
| 'SM' : (sm_type['i'], 'Template-independent mapping quality'), | |
| 'SQ' : (sm_type['X'], 'Reserved for backwards compatibility reasons'), | |
| 'TC' : (sm_type['i'], 'The number of segments in the template'), | |
| 'U2' : (sm_type['Z'], 'Phred probability of the 2nd call being wrong conditional on the last being wrong'), | |
| 'UQ' : (sm_type['i'], 'Phred likelihood of the segment that is conditional on the mapping being correct'), | |
| 'X?' : (sm_type['X'], 'Reserved for end users, second value may be [0-9,Aa-Zz]'), | |
| 'Y?' : (sm_type['X'], 'Reserved for end users, second value may be [0-9,Aa-Zz]'), | |
| 'Z?' : (sm_type['X'], 'Reserved for end users, second value may be [0-9,Aa-Zz]') | |
| } | |
| samtools_tags_examples = { | |
| 'AM' : (sm_type['i'], 'NA'), | |
| 'AS' : (sm_type['i'], 'NA'), | |
| 'BC' : (sm_type['Z'], 'ATCACG'), | |
| 'BQ' : (sm_type['Z'], 'NA'), | |
| 'BZ' : (sm_type['Z'], 'NA'), | |
| 'CB' : (sm_type['Z'], 'NA'), | |
| 'CC' : (sm_type['Z'], 'NA'), | |
| 'CG' : (sm_type['B'], 'NA'), | |
| 'CM' : (sm_type['i'], 'NA'), | |
| 'CO' : (sm_type['Z'], 'The cake is a lie'), | |
| 'CP' : (sm_type['i'], 'NA'), | |
| 'CQ' : (sm_type['Z'], 'NA'), | |
| 'CR' : (sm_type['Z'], 'NA'), | |
| 'CS' : (sm_type['Z'], 'NA'), | |
| 'CT' : (sm_type['Z'], 'NA'), | |
| 'CY' : (sm_type['Z'], 'NA'), | |
| 'E2' : (sm_type['Z'], 'NA'), | |
| 'FI' : (sm_type['i'], 'NA'), | |
| 'FS' : (sm_type['Z'], 'NA'), | |
| 'FZ' : (sm_type['B'], 'NA'), | |
| 'GC' : (sm_type['X'], 'Reserved for backwards compatibility reasons'), | |
| 'GQ' : (sm_type['X'], 'Reserved for backwards compatibility reasons'), | |
| 'GS' : (sm_type['X'], 'Reserved for backwards compatibility reasons'), | |
| 'H0' : (sm_type['i'], 'NA'), | |
| 'H1' : (sm_type['i'], 'NA'), | |
| 'H2' : (sm_type['i'], 'NA'), | |
| 'HI' : (sm_type['i'], 'NA'), | |
| 'IH' : (sm_type['i'], 'NA'), | |
| 'LB' : (sm_type['Z'], 'NA'), | |
| 'MC' : (sm_type['Z'], 'NA'), | |
| 'MD' : (sm_type['Z'], 'NA'), | |
| 'MF' : (sm_type['X'], 'Reserved for backwards compatibility reasons'), | |
| 'MI' : (sm_type['Z'], 'NA'), | |
| 'MQ' : (sm_type['i'], 'NA'), | |
| 'NH' : (sm_type['i'], 'NA'), | |
| 'NM' : (sm_type['i'], 'NA'), | |
| 'OC' : (sm_type['Z'], 'NA'), | |
| 'OP' : (sm_type['i'], 'NA'), | |
| 'OQ' : (sm_type['Z'], 'NA'), | |
| 'OX' : (sm_type['Z'], 'NA'), | |
| 'PG' : (sm_type['Z'], 'Illumina'), | |
| 'PQ' : (sm_type['i'], 'NA'), | |
| 'PT' : (sm_type['Z'], 'NA'), | |
| 'PU' : (sm_type['Z'], 'Nextera'), | |
| 'Q2' : (sm_type['Z'], 'NA'), | |
| 'QT' : (sm_type['Z'], 'NA'), | |
| 'QX' : (sm_type['Z'], 'NA'), | |
| 'R2' : (sm_type['Z'], 'NA'), | |
| 'RG' : (sm_type['Z'], 'RG1'), | |
| 'RT' : (sm_type['X'], 'Reserved for backwards compatibility reasons'), | |
| 'RX' : (sm_type['Z'], 'NA'), | |
| 'S2' : (sm_type['X'], 'Reserved for backwards compatibility reasons'), | |
| 'SA' : (sm_type['Z'], 'NA'), | |
| 'SM' : (sm_type['i'], 'NA'), | |
| 'SQ' : (sm_type['X'], 'Reserved for backwards compatibility reasons'), | |
| 'TC' : (sm_type['i'], 'NA'), | |
| 'U2' : (sm_type['Z'], 'NA'), | |
| 'UQ' : (sm_type['i'], ''), | |
| 'X?' : (sm_type['X'], 'Xi'), | |
| 'Y?' : (sm_type['X'], 'Yi'), | |
| 'Z?' : (sm_type['X'], 'Zi') | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment