Skip to content

Instantly share code, notes, and snippets.

@cplaisier
Created September 3, 2020 00:19
Show Gist options
  • Select an option

  • Save cplaisier/fc4d34d8f72b98eeb9653e64f2fcbc91 to your computer and use it in GitHub Desktop.

Select an option

Save cplaisier/fc4d34d8f72b98eeb9653e64f2fcbc91 to your computer and use it in GitHub Desktop.
import re
import pandas as pd
def compareMiRNANames(a, b):
if a==b:
return 1
if len(a)<len(b):
if a[-3:]=='-3p':
re1 = re.compile(a+'[a-oq-z]?(-\d)?-3p$')
else:
re1 = re.compile(a+'[a-oq-z]?(-\d)?(-5p)?$')
if re1.match(b):
return 1
else:
if b[-3:]=='-3p':
re1 = re.compile(b+'[a-oq-z]?(-\d)?-3p$')
else:
re1 = re.compile(b+'[a-oq-z]?(-\d)?(-5p)?$')
if re1.match(a):
return 1
return 0
def miRNAInDict(miRNA, dict1):
retMe = []
for i in dict1.keys():
if compareMiRNANames(miRNA, i):
retMe.append(miRNAIDs[i])
return retMe
# Load in miRNA information
miRNAs = []
miRNAIDs = {}
rev_miRNAIDs = {}
inFile = open('hsa.mature.fa','r')
while 1:
line = inFile.readline()
if not line:
break
tmp = line.strip().split(' ')
tmp[0] = re.sub('-5p','',tmp[0])
miRNAs.append(tmp)
miRNAIDs[tmp[0].lower()] = tmp[1]
rev_miRNAIDs[tmp[1]] = tmp[0].lower()
inFile.close()
m1 = pd.read_csv('GBM_miRNA_oldIDs.csv',index_col=0, header=0)
converted = {i:miRNAInDict(i.replace('_','-').lower(), miRNAIDs) for i in m1.index}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment