Last active
April 28, 2020 19:44
-
-
Save NSLog0/f21735765ea6405fecc575f9e1c2df9b to your computer and use it in GitHub Desktop.
DNA Analysis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from Bio.Seq import Seq\n", | |
| "from Bio.Alphabet import generic_dna, generic_rna\n", | |
| "from Bio import SeqIO" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "record = SeqIO.parse(\"./coronavirus.fasta\", \"fasta\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "virus_dna = list(record)[0].seq\n", | |
| "virus_rna = virus_dna.transcribe()\n", | |
| "protein = virus_rna.translate()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Seq('ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGT...AAA', SingleLetterAlphabet())" | |
| ] | |
| }, | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "virus_dna" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'IKGLYLPR*QTNQLSISCRSVL*TNFKICVAVTRLHA*CTHAV*LITNYCR*QDTSNSSIFCRLLTVSSVLQPIISTSRFRPGVTER*DGEPCPWFQRENTRPTQFACFTGSRRARTWLWRLRGGGLIRGTSTS*RWHLWLSRS*KRRFAST*TALCVHQTFGCSNCTSWSCYG*AGSRTRRHSVRS*W*DTWCPCPSCGRNTSGLPQGSSS*ER**RSWWP*LRRRSKVI*LRRRAWH*SL*RFSRKLEH*T*QWCYP*THA*A*RRGIHSLCR*QLLWP*WLPS*VH*RPSSTCW*SFMHFVRTTGLY*H*EGCILLP*T*A*NCLVHGTF*KEL*IADTF*N*IGKEI*HLQWGMSKFCISLKFHNQDYSTKG*KEKA*WLYG*NSICLSSCVTK*MQPNVPFNSHEV*SLW*NFMADGRFC*SHLRILWH*EFD*RRCHYLWLLTPKCCC*NLLSSMSQFRSRT*A*SCRIP**IWLENHSS*GWSHYCLWRLCVLLCWLP*QVCLLGSTC*R*HRL*PYRCCWRRFRRS**QPS*NTPKRESQHQYCW*L*T**RDRHYFGIFFCFHKCFCGNCERFGL*SIQTNC*ILW*F*SYKRKS*KRCLEYW*TEINTESSLCICIRGCSCCTINFLPHS*NCSKFCACFTEGRYNNTRWNFTVFTETH*CYDVHI*FGY*QSSCNGLHYRWCCSVDFAVAN*HLWHCL*KTQTRP*LA*REV*GRCRVS*RRLGNC*IYLNLCL*NCRWTNCHLCKGN*GECSDIL*ACK*IFGFVC*LYHYWWS*T*SLEFR*NICHALKGIVQKVC*IQRRNWPTHASKSPKRNYLLRGRNTSHRSVNRGSCLENW*FTTIRTTY**SC*SSIGWYTSLY*RAYVARNQRHRKVLCPCT*YDGNKQYLHTQRRCTNKGYFW**HCDRSARLQECEYHF*T**KD**ST**EVLCLYS*TRYRSK*VRLCCGRCCHKNFATSI*ITYTTGH*FR*VEYGYILLI**VW*V*IGFTYVLFFLPSR*G*RRR*L*RRRV*AINSI*VWY*R*LPR*TFGIWCHFCCSST*RRARRRLVR***STNCWSTRRQ*GQSDNYYSNNC*GSTSIRDGTYTSCSDY*SE*F*WLFKTY*QCIH*KCRHCGRS*KGKTNSGC*CSQCLP*TWRRCCRSLK*GY*QCHAS*I**LHSY*WTT*SGW*LCFKRTQSC*TLSSCCRPKC*QR*RHSTS*ECL*KF*SARSSTCTIIISWYFWC*PYTFFKSLCRYCSHKCLLSCL**KSL*QTCFKLFGNEE*KAS*TKDR*DS*RGS*AIYN*K*TFS*TEKTR**ENQSLC*RSYNNSGRN*VPHRKLVTLY*H*WQSSSRFCHSC**H*HHFLKERCSIYSG*CCSRGCFNCCGYTY*KGWWHY*NASESFEKSANRQLYNHLPGSGFKWLHCRGGKDSA*KV*KCLLHSTIYYL**EARNSWNCFLEFARNACTCRRNTQINACLCGN*SHSFNYTA*I*GY*NTRGCG*LWC*ILLLHQ*NNCSVTYQHT*RSK*NSCYNATWLCNTWLKFGRSCSVYEISQSASYSFCFFT*CCYSV*WLSYFFF*NT*RTFY*NHLTCWFL*RLVLFWTIYTTRYRIS*ER**KCILH**SYHIPPRW*SYHL*QS*DTSFFERSEDY*GVYNSRQH*PPHASCGHVNDIWTTVWSNLFGWS*CY*NKTS*FT*R*NILCFT***HSTC*GF*VLPHN*S*FSG*VHVSIKSH*KVEIPTS*WFNFY*MGR*QLLSCHCIVNTPTNRVEV*STCSTRCLLQSKGW*SC*LLCTYLSLL**DSR*VR*C*RNNELLVSTCQFRFLQKSLERGV*NLWTTADNP*GCRSCYVHGHTFL*TI*ERCSDTLYVW*TSYKISSTTGVTFCYDVSTTCSV*T*AWYIYLC**VHW*LPVWSL*TYNF*RNFVLHRRCFTYKVLRIQRSYYGCFLQRKQLHNNHKTSYL*IGWCCLYRN*P*VGQLL*ERQFLFHRATN*SCTKPTISKRKLR*F*VCM**YQIC**FKPVNWL*ETCFKRA*SYIFP*LKW*CGGY*L*TLHTLF*ERS*IVT*TYCLAC*QCN**SHV*TKYLVYTLSLEHKTS*NIKFV*CTEVRGRAGNG*SCLRRSKTSL*RSSGKSYHTERRS*V*CENYRSCRRHYT*TSK**FKNYRRGWPHRSNGCLCRQF*SYY*ET**II*SIRFENPCYSWFSCC**CPLGYYS*LC*AFS*QSC*YNY*HSYTVFKPCLY*LYALFLYFIATIVYFY*KYKF*N*SIYADYYSKEYC*ECR*ILSRGFI*LFEVT*FF*TDKYYNLVFTIKCLPRFFNLLNRCFRCFNV*FRHAFLLYWLQRRLFELY*CHYCNLLYWFYTL*CLS*WFRFFRHLSFFRNYTNYHFIF*MGFNCFWLSCRVVFGIYSFH*VFLCTWIGCNHAIVFQLFCSTFY**FLAYVVNN*SCTNGPDFSYG*NVHLLCIILLCMEKLCACCRRL*FINLYDVLQT**SNKSRMYNYC*WC*KVLLCLC*WR*RLLQTTQLELC*L*YILCW*YIY***SCERLVTTV*KTNKSY*PVFLHR**CYSEEWFHPSLL**SWSKDL*KTFSLSFC*LRQPES**H*RFIAY*CYSF*W*IKM*RIICKISVCLLQSAYVSTYTVTRSGISV*CW**CGSCS*NV*CLR*YVFINF*RTNGKTQNTSCNCRS*TCKECVLRQCLIYFYFSSSARVC*FRCRN*RCC*MS*IVTSI*HRSYWR*L**LYAHL*QS*KHDTP*PWCLY*L*CASY*CAGSKKSQHCFDMER*RFHVIV*TTTKTNT*CC*KE*LTF*VDMCNY*TSC*CCNNKDST*GW*NC**LVEAVN*SYTCVPFCCCYFLFNNTCSCHV*TY*LFK*NHRIQGY*WWCHS*HSIYRYLFC*QTC*F*HMV*PAWW*LY**QSLPIDCCSHNKRSGFCRAWFAWHDITHN*W*LFAFLT*SF*CSW*HLLHTIKTYRVH*LCNISLCFGC*MYNF*RCFW*ASTILL*YQCTRRFCCL*KFTP*HTLCAHGWLYYSIS*HLP*RFC*SGNNF*F*VL*ARHL*KIRSWCLCIY*W*MGT*Q*LLQIFTRSFLWCRCCKFTY*YVYTTNSTYWCFGHISIYSSWWYCSYRSNMPCLLFYEV*KSFW*IQSCSCL*YFTIPYVIHCTLFNTSLLILTWCLFCYLLVLDILSY**CFFFSTYSVDGYVHTFSTFLDNNCLYHLYFHKAFLLVL**LPKETCSL*WCFL*YF*RSCAVHLFVK*RNVSKVA**CAITSYAI**ILSSL**VQVF*WSNGYN*LQRSCLLSSRKGSQ*LQ*LRF*CSLPTTTNLYHLSCFAEWF*KNGIPIW*S*GLYGTSNLWYNYT*RSLA**RSLLSKTCDLHL*RHA*P*L*RFTHS*V*S*FLGTGW*CSTQGYWTFYAKLCT*A*G*YSQS*DT*V*VCSHSTRTDFFSVSLLQWFTIWCLPMCYEAQFHY*GFIP*WFMW*CWF*HRL*LCLFLLHAPYGITNWSSCWHRLRR*LLWTFC*QANSTSSWYGHNYYS*CFSLVVRCCYKWRQVVSQSIYHNS**L*PCGYEVQL*TSNTRPC*HTRTSFCSNWNCRFRYVCFIKRITAKWYEWTYHIG*CFIRR*IYTF*CC*TMLRCYFPKCSEKNNQGYTPLVVTHNFDFTFSFSPEYSMVFVLFFV*KCLFTFCYGYYCYVCFCNDVCQT*ACISLFVFVTFSCHCSLF*YGLYAC*LGDAYYDMVGYG*Y*FVWF*AKRLCYVCISCSVTNPYDSKNCV**WC*ESVDTYECLDTRL*SLLW*CFRSSHFHVGSYNLCYF*LLRCSYNCHVFGQRYCFYVC*VLPYFLHNW*YTSVYNASLLFLRLFLYLLLWPLLFTQPLL*TDSWCL*LLSFYTGV*IYEFTGTTPTQE*HRCLQTQH*IVGCWWQTLYQSSHCTV*NVRCKVHISSLTLSFATTQSRIII*IVGSMCPVTQ*HSLS*RYY*SL*KNGFTTFCFAFHAGCCRHKQAL*RNAGQQGNLTSYSLRV*FPSIICSFCYCSRSL*AGCC*W*F*SCS*KVEEVFECG*I*I*P*CSHAT*VGKDG*SSYDPNV*TG*I*GQEGKSY*CYADNAFHYA*KVG**CTQQHYQQCKRWLCSLEHNTSYNSSQTNGCHTRL*HI*KYV*WYNIYLCISIVGNPTGCRCR**NCST**N*YGQFT*FSMASYCNSFKGQFCCQITE**A*SCCTTTDVLCCRYYTNCLH**QCVSLLQHNKGR*VCTCTVIRFTGFEMG*IP*E*WNWYYLYRTGTTL*VCYRHT*RS*SEVFILY*RIKQPK*RYGTW*FSCHSTSTSW*CNRSACQFNCIIFLCFCCRCC*SLQRLSS*WGTTNH*LC*DVVYTHWYWSGNNSYTGSQYGSRILWWCIVLSVLPLPHRSSKS*RIL*LKR*VCTNTYNLC**PCGFYT*KHSLYRLRYVERLWL*L*STPRTHASVS*CTIVFKRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGFPFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQHTMLVKQGDDYVYLPYPDPSRILGAGCFVDDIVKTDGTLMIERFVSLAIDAYPLTKHPNQEYADVFHLYLQYIRKLHDELTGHMLDMYSVMLTNDNTSRYWEPEFYEAMYTPHTVLQAVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQAENVTGLFKDCSKVITGLHPTQAPTHLSVDTKFKTEGLCVDIPGIPKDMTYRRLISMMGFKMNYQVNGYPNMFITREEAIRHVRAWIGFDVEGCHATREAVGTNLPLQLGFSTGVNLVAVPTGYVDTPNNTDFSRVSAKPPPGDQFKHLIPLMYKGLPWNVVRIKIVQMLSDTLKNLSDRVVFVLWAHGFELTSMKYFVKIGPERTCCLCDRRATCFSTASDTYACWHHSIGFDYVYNPFMIDVQQWGFTGNLQSNHDLYCQVHGNAHVASCDAIMTRCLAVHECFVKRVDWTIEYPIIGDELKINAACRKVQHMVVKAALLADKFPVLHDIGNPKAIKCVPQADVEWKFYDAQPCSDKAYKIEELFYSYATHSDKFTDGVCLFWNCNVDRYPANSIVCRFDTRVLSNLNLPGCDGGSLYVNKHAFHTPAFDKSAFVNLKQLPFFYYSDSPCESHGKQVVSDIDYVPLKSATCITRCNLGGAVCRHHANEYRLYLDAYNMMISAGFSLWVYKQFDTYNLWNTFTRLQSLENVAFNVVNKGHFDGQQGEVPVSIINNTVYTKVDGVDVELFENKTTLPVNVAFELWAKRNIKPVPEVKILNNLGVDIAANTVIWDYKRDAPAHISTIGVCSMTDIAKKPTETICAPLTVFFDGRVDGQVDLFRNARNGVLITEGSVKGLQPSVGPKQASLNGVTLIGEAVKTQFNYYKKVDGVVQQLPETYFTQSRNLQEFKPRSQMEIDFLELAMDEFIERYKLEGYAFEHIVYGDFSHSQLGGLHLLIGLAKRFKESPFELEDFIPMDSTVKNYFITDAQTGSSKCVCSVIDLLLDDFVEIIKSQDLSVVSKVVKVTIDYTEISFMLWCKDGHVETFYPKLQSSQAWQPGVAMPNLYKMQRMLLEKCDLQNYGDSATLPKGIMMNVAKYTQLCQYLNTLTLAVPYNMRVIHFGAGSDKGVAPGTAVLRQWLPTGTLLVDSDLNDFVSDADSTLIGDCATVHTANKWDLIISDMYDPKTKNVTKENDSKEGFFTYICGFIQQKLALGGSVAIKITEHSWNADLYKLMGHFAWWTAFVTNVNASSSEAFLIGCNYLGKPREQIDGYVMHANYIFWRNTNPIQLSSYSLFDMSKFPLKLRGTAVMSLKEGQINDMILSLLSKGRLIIRENNRVVISSDVLVNN*TNNVCFSCFIATSL*SVC*SYNQNSITPCIH*FFHTWCLLP*QSFQILSFTFNSGLVLTFLFQCYLVPCYTCLWDQWY*EV**PCPTI**WCLFCFH*EV*HNKRLDFWYYFRFEDPVPTYC**RY*CCY*SL*ISIL**SIFGCLLPQKQQKLDGK*VQSLF*CE*LHF*ICLSAFSYGP*RKTG*FQKS*GICV*EY*WLF*NIF*AHAY*FSA*SPSGFFGFRTIGRFANRY*HH*VSNFTCFT*KLFDSW*FFFRLDSWCCSLLCGLSST*DFSIKI**KWNHYRCCRLCT*PSLRNKVYVEILHCRKRNLSNF*L*SPTNRIYC*IS*YYKLVPFW*SF*RHQICICLCLEQEENQQLCC*LFCPI*FRIIFHF*VLWSVSY*IK*SLLY*CLCRFICN*R**SQTNRSRANWKDC*L*L*ITR*FYRLRYSLEF*QS*F*GWW*L*LPV*IV*EV*SQTF*ERYFN*NLSGR*HTL*WC*RF*LLLSFTIIWFPTH*WCWLPTIQSSSTFF*TSTCTSNCLWT*KVY*FG*KQMCQFQLQWFNRHRCSY*V*QKVSAFPTIWQRHC*HY*CCP*STDT*DS*HYTMFFWWCQCYNTRNKYF*PGCCSLSGC*LHRSPCCYSCRSTYSYLACLFYRF*CFSNTCRLFNRG*TCQQLI*V*HTHWCRYMR*LSDSD*FSSAGT*CS*SIHHCLHYVTWCRKFSCLL**LYCHTHKFYY*CYHRNSTSVYDQDISRLYNVHLW*FN*MQQSFVAIWQFLYTIKPCFNWNSC*TRQKHPRSFCTSQTNLQNTTN*RFWWF*FFTNITRSIKTKQEVIY*RSTFQQSDTCRCWLHQTIW*LPW*YCC*RPHLCTKV*RPYCFATFAHR*NDCSIHFCTVSGYNHFWLDLWCRCCITNTICYANGL*V*WYWSYTECSL*EPKIDCQPI**CYWQNSRLTFFHSKCTWKTSRCGQPKCTSFKHAC*TT*LQFWCNFKCFK*YPFTS*QS*G*SAN**VDHRQTSKFADICDSTIN*SCRNQSFC*SCCY*NVRVCTWTIKKS*FLWKGLSSYVLPSVSTSWCSLLACDLCPCTRKELHNCSCHLS*WKSTLSS*RCLCFKWHTLVCNTKEFL*TTNHYYRQHICVW*L*CCNRNCQQHSL*SFAT*IRLIQGGVR*IF*ESYITRC*FR*HLWH*CFSCKHSKRN*PPQ*GCQEFK*ISHRSPRTWKV*AVYKMAMVHLARFYSWLDCHSNGDNYALLYDQLL*LSQGLLFLWILLQI**RRL*ASAQRSQITLHINELMDLFMRIFTIGTVTLKQGEIKDATPSDFVRATATIPIQASLPFGWLIVGVALLAVFQSASKIITLKKRWQLALSKGVHFVCNLLLLFVTVYSHLLLVAAGLEAPFLYLYALVYFLQSINFVRIIMRLWLCWKCRSKNPLLYDANYFLCWHTNCYDYCIPYNSVTSSIVITSGDGTTSPISEHDYQIGGYTEKWESGVKDCVVLHSYFTSDYYQLYSTQLSTDTGVEHVTFFIYNKIVDEPEEHVQIHTIDGSSGVVNPVMEPIYDEPTTTTSVPL*AQADEYELMYSFVSEETGTLIVNSVLLFLAFVVFLLVTLAILTALRLCAYCCNIVNVSLVKPSFYVYSRVKNLNSSRVPDLLV*TN*ILY*FFCLEL*F*PWQIPTVLLPLKSLKSSLNNGT***VSYSLHGFVFYNLPMPTGIGFCI*LS*FSSGCYGQ*L*LVLCLLLFTE*IGSPVELLSQWLVL*A*CGSATSLLLSDCLRVRVPCGHSIQKLTFFSTCHSMALF*PDRF*KVNS*SEL*SFVDIFVLLDTI*DAVTSRTCLKKSLLLHHERFLITNWELRSV*QVTQVLLHTVATGLATIN*TQTIPVAVTILLCLYSK*QQMFHLVDFQVTIAEILLIIMRTFKVSIWNLDYIINLIIKNLSKSLTENKYSQLDEEQPMEID*TNMKIILFLALITLATCELYHYQECVRGTTVLLKEPCSSGTYEGNSPFHPLADNKFALTCFSTQFAFACPDGVKHVYQLRARSVSPKLFIRQEEVQELYSPIFLIVAAIVFITLCFTLKRKTE*LNFH*LTSICAF*PFCYSLF*LCLLSFGSHLNCKIIMKLVTPKRT*NFLFS*ESSQL*LHFTKNVVYSHVLNINHM*LMTRVLFTSILNGILE*ELENQHL*LNCAWMRLVLNHPFSTSISVIIQFPVYLLQLIARNLNWVVL*CVVRSMKTF*SIMTFVLF*ISSKRTN*NV**WTPKSAKCTPHYVWWTLRFNWQ*PEWRTQWGAIKTTSAPRFTQ*YCVLVHRSHSTWQGRP*IPSRTRRSN*HQ*QSR*PNWLLPKSYQTNSWW*R*NERSQSKMVFLLPRNWARSWTSLWC*QRRHHMGCN*GSLEYTKRSHWHPQSC*QCCNRATTSSRNNIAKRLLRRREQRRQSSLFSFLIT*SQQFKKFNSRQQ*GNFSC*NGWQWR*CCSCFAAA*QIEPA*EQNVW*RPTTTRPNCH*EICC*GF*EASAKTYCH*SIQCNTSFRQTWSRTNPRKFWGPGTNQTRN*LQTLAANCTICPQRFSVLRNVAHWHGSHTFGNVVDLHRCHQIG*QRSKFQRSSHFAE*AY*RIQNIPTNRA*KGQKEEG**NSSLTAETEETANCDSSSCCRFG*FLQTIATIHEQC*LNSGLNSCRPHKADGLYKRFRFSVYDI*STLVQNEFS*LHSTSRCS*L*SHIAIFNQCVTLGRT*KSHHIFTEATRSTIECTVNNARESCLYGRALMCKINFSSAIPM*F**LLRRMTKKKKKKKKKK'" | |
| ] | |
| }, | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "str(protein)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "total_a = virus_dna.count('A')\n", | |
| "total_c = virus_dna.count('C')\n", | |
| "total_g = virus_dna.count('G')\n", | |
| "total_t = virus_dna.count('T')\n", | |
| "dna_total = len(str(first_simple.seq))\n", | |
| "cg_percentage = float(total_c + total_g) / dna_total" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "8954" | |
| ] | |
| }, | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "total_a" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5492" | |
| ] | |
| }, | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "total_c" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5863" | |
| ] | |
| }, | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "total_g" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "9594" | |
| ] | |
| }, | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "total_t" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 22, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "29903" | |
| ] | |
| }, | |
| "execution_count": 22, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dna_total" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 23, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "37.97277865097147" | |
| ] | |
| }, | |
| "execution_count": 23, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "cg_percentage * 100" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python [conda env:dna_env] *", | |
| "language": "python", | |
| "name": "conda-env-dna_env-py" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.7" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment