Last active
September 13, 2018 19:46
-
-
Save ssstrike/40a3c4bd503d577c62e536757da42820 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import pandas as pd | |
| from scipy.stats import pearsonr | |
| # Make a Biotapestry CSV file | |
| def biotapestry(filename, data, regions): | |
| writeMe = [] | |
| writeMe.append('"# Model Commands",,,,,,,,,,') | |
| writeMe.append('"# Command Type","Model Name","Parent Model",,,,,,,,') | |
| writeMe.append('"model","root",,,,,,,,,') | |
| for i in data: | |
| writeMe.append('"model","'+i+'","root",,,,,,,,') | |
| writeMe.append(',,,,,,,,,,') | |
| writeMe.append('"# Region Commands",,,,,,,,,,') | |
| writeMe.append('"# Command Type","Model Name","Region Name","Region Abbreviation",,,,,,,') | |
| for i in data: | |
| writeMe.append('"region","'+i+'","A","A",,,,,,,') | |
| #for j in regions: | |
| # writeMe.append('"region","'+i+'","Cluster-'+j+'","'+j+'",,,,,,,') | |
| writeMe.append(',,,,,,,,,,') | |
| writeMe.append('"# Standard Interactions",,,,,,,,,,') | |
| writeMe.append('"# Command Type","Model Name","Source Type","Source Name","Target Type","Target Name","Sign","Source Region Abbrev","Target Region Abbrev",,') | |
| for i in data: | |
| for j in data[i]: | |
| # j[0] = node1 type, j[1] = node1 name, j[2] = node2 type, j[3] = node2 name, j[4] = interaction sign (positive, negative, neutral), j[5] = node1 region, j[6] = node2 region | |
| writeMe.append('"general","'+i+'","'+j[0]+'","'+j[1]+'","'+j[2]+'","'+j[3]+'","'+j[4]+'","'+j[5]+'","'+j[6]+'",,') | |
| outFile = open(filename,'w') | |
| outFile.write('\n'.join(writeMe)) | |
| outFile.close() | |
| # Make an MDraw space delimited file | |
| def mdraw(filename, data): | |
| node = 0 | |
| nodeIds = {} | |
| writeMe1 = [] | |
| for region in data: | |
| for j in data[region]: | |
| if not j[1] in nodeIds: | |
| node += 1 | |
| nodeIds[j[1]] = node | |
| if not j[3] in nodeIds: | |
| node += 1 | |
| nodeIds[j[3]] = node | |
| int1 = 0 | |
| if j[4]=='positive': | |
| int1 = 1 | |
| if j[4]=='negative': | |
| int1 = 2 | |
| tmp1 = str(nodeIds[j[1]])+'\t'+str(nodeIds[j[3]])+'\t'+'1'+'\t'+'1'+'\t'+str(int1) #+' 1' # | |
| if not tmp1 in writeMe1 and not j[1]==j[3]: | |
| writeMe1.append(tmp1) | |
| outFile = open(filename+'_mdraw.txt','w') | |
| outFile.write('\n'.join(writeMe1)) | |
| outFile.close() | |
| outFile = open(filename+'_mdraw_LIST.txt','w') | |
| outFile.write('\n'.join([str(i)+' '+str(nodeIds[i]) for i in nodeIds])) | |
| outFile.close() | |
| # Load up TF target genes | |
| #inputTfs = [] | |
| #with open('../correlated_tf_regulators_CHIR_curve_R_0.8.csv','r') as inFile: | |
| # inFile.readline() | |
| # while 1: | |
| # inLine = inFile.readline() | |
| # if not inLine: | |
| # break | |
| # splitUp = inLine.strip().split(',') | |
| # inputTfs += splitUp[1].split(';')+splitUp[2].split(';')+splitUp[3].split(';') | |
| #inputTfs = list(set(inputTfs)) | |
| #print 'inputTfs', len(inputTfs) | |
| inputTfs = ['430', '1052', '1053', '1385', '84699', '9586', '1871', '1874', '144455', '79733', '1960', '1997', '2002', '2004', '80712', '2114', '2115', '2120', '51513', '2551', '2623', '2624', '2625', '9421', '3232', '10320', '3659', '3662', '3670', '91464', '3726', '10661', '11278', '128209', '10365', '9314', '1316', '51176', '9935', '23269', '4602', '4774', '4790', '7025', '9480', '5468', '5914', '5916', '3516', '5971', '864', '6257', '4093', '6659', '6660', '6662', '25803', '347853', '30009', '9496', '6929', '6925', '8463', '7022', '29842', '10155', '6935', '132625', '23051', '85416', '7707', '7764', '23528', '201516'] | |
| # Load TF->target gene dictionary | |
| with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as inFile: | |
| data = json.load(inFile) | |
| # Load up id to motif thesaurus | |
| id2Motif = {} | |
| with open('id_conversion/humanTFs_All.CSV','r') as inFile: | |
| header = inFile.readline().strip().split(',') | |
| while 1: | |
| inLine = inFile.readline() | |
| if not inLine: | |
| break | |
| split = inLine.strip().split(',') | |
| if not split[2] in id2Motif: | |
| id2Motif[split[2]] = [] | |
| id2Motif[split[2]].append(split[0]) | |
| # To translate gene ids later | |
| symbol2entrez = {} | |
| with open('gene2entrezId.CSV','r') as inFile: | |
| while 1: | |
| inLine = inFile.readline() | |
| if not inLine: | |
| break | |
| split = inLine.strip().split(',') | |
| symbol2entrez[split[1]] = split[0] | |
| # TF family expansion | |
| family2Id = {} | |
| id2Family = {} | |
| with open('id_conversion/tfFamilies.CSV','r') as inFile: | |
| header = inFile.readline() | |
| while 1: | |
| inLine = inFile.readline() | |
| if not inLine: | |
| break | |
| split = inLine.split(',') | |
| split[2] = split[2].replace(' ',',').strip().split(',') | |
| family2Id[split[0]] = split[2] | |
| for splitId in split[2]: | |
| id2Family[splitId] = split[0] | |
| # Load up exprssion data | |
| #df = pd.read_table('../../gexp_cMonkey_norm.tsv', header=0) | |
| #df = pd.read_csv('../../chir_gradient_EntrezID.csv', header=0, index_col=0) | |
| df = pd.read_csv('tfExp.csv', header=0, index_col=0) | |
| # Create a TFreg -> TFtarg dictionary | |
| # TFreg -> TFtarg | |
| # Type I Family Expansion = expand to all possible TFtargs via all motifs from family members | |
| # Type II Family Expansion = If no motif for TFreg then expand to family members | |
| print ' Building network' | |
| tfCascade = {} | |
| corTfCascade = {} | |
| output = {'CHIR_curve':[]} | |
| famExpType = 2 | |
| for TFreg in inputTfs: | |
| motifs = [] | |
| if TFreg in id2Motif: | |
| motifs += id2Motif[TFreg] | |
| if TFreg in id2Family: | |
| if famExpType==1 or (famExpType==2 and len(motifs)==0): | |
| for TFregExp in family2Id[id2Family[TFreg]]: | |
| if (not TFregExp==TFreg) and TFregExp in id2Motif: | |
| motifs += id2Motif[TFregExp] | |
| # Iterate through motifs | |
| for motif in motifs: | |
| if motif in data: | |
| for geneTarg in data[motif]: | |
| if geneTarg in inputTfs: | |
| if not TFreg in tfCascade: | |
| tfCascade[TFreg] = [] | |
| if not geneTarg in tfCascade[TFreg]: | |
| tfCascade[TFreg].append(geneTarg) | |
| if int(TFreg) in list(df.index) and int(geneTarg) in list(df.index): #new stuff | |
| r1 = pearsonr(df.loc[int(TFreg)],df.loc[int(geneTarg)]) | |
| if abs(r1[0])>0.4 and r1[1]<=0.05: | |
| print 'in',TFreg, geneTarg, r1 | |
| if not TFreg in corTfCascade: | |
| corTfCascade[TFreg] = [] | |
| if not geneTarg in corTfCascade[TFreg]: | |
| corTfCascade[TFreg].append(geneTarg) | |
| if r1[0]>0 and TFreg in symbol2entrez and geneTarg in symbol2entrez: | |
| output['CHIR_curve'].append(['gene',symbol2entrez[TFreg],'gene',symbol2entrez[geneTarg],'positive','A','A']) | |
| if r1[0]<0 and TFreg in symbol2entrez and geneTarg in symbol2entrez: | |
| output['CHIR_curve'].append(['gene',symbol2entrez[TFreg],'gene',symbol2entrez[geneTarg],'negative','A','A']) | |
| #tfCascade = {'2002':['1874'],'1874':['1053']} FOR TESTING | |
| #subGeneNetwork = {} | |
| def down(key, tfCascade): | |
| """Function to identify all downstream targets of TF | |
| key. | |
| Args: | |
| key: starting node label. | |
| tfCascade: the full gene network as dict. | |
| Returns: | |
| A list of TF targets of key. | |
| """ | |
| if key in tfCascade: | |
| print 'down', tfCascade[key] | |
| return tfCascade[key] | |
| else: | |
| return [] | |
| def up(key,tfCascade): | |
| """Function to identify all upstream regulators of TF | |
| key. | |
| Args: | |
| key: starting node label. | |
| tfCascade: the full gene network as dict. | |
| Returns: | |
| A list of TF regulators of key. | |
| """ | |
| outList = [] | |
| for TFreg in tfCascade: | |
| if key in tfCascade[TFreg]: | |
| outList.append(TFreg) | |
| print 'up', outList | |
| return outList | |
| def subNetwork2(key,hops,tfCascade): | |
| """Function to grab out subnetwork from tfCascade given | |
| a specific starting node (key) and for a given number of | |
| hops. | |
| Args: | |
| key: starting node label. | |
| hops: number of node jumps away from starting node. | |
| tfCascade: the full gene network as dict. | |
| Returns: | |
| A dict of the subnetwork where the keys are TFregs | |
| and values are TFtargs. | |
| """ | |
| mainList = [key] | |
| count = 0 | |
| while count < hops: | |
| for g in mainList: | |
| tempList = [] | |
| tempList += down(g,tfCascade) | |
| tempList += up(g,tfCascade) | |
| temp2List = [] | |
| for x in tempList: | |
| if not x in mainList: | |
| temp2List.append(x) | |
| mainList = temp2List | |
| count += 1 | |
| print mainList | |
| subNetwork = {} | |
| for TFreg in mainList: | |
| if not TFreg in subNetwork: | |
| subNetwork[TFreg] = [] | |
| if TFreg in tfCascade: | |
| for TFtarg in tfCascade[TFreg]: | |
| if TFtarg in mainList: | |
| subNetwork[TFreg].append(TFtarg) | |
| return subNetwork | |
| #subGeneNetwork = subNetwork2(sPoint,hop,tfCascade) | |
| # Create .sif file | |
| interact = 'r2t' | |
| writeMe = [] | |
| for key in tfCascade: | |
| for gene in tfCascade[key]: | |
| if key in symbol2entrez: | |
| key1 = symbol2entrez[key] | |
| else: | |
| key1 = key | |
| if gene in symbol2entrez: | |
| gene1 = symbol2entrez[gene] | |
| else: | |
| gene1 = gene | |
| writeMe.append(key1+'\t'+interact+'\t'+gene1) | |
| with open('gene2GeneNetwork.sif','w') as outFile: | |
| outFile.write('\n'.join(writeMe)) | |
| # Create .sif file | |
| interact = 'r2t' | |
| writeMe = [] | |
| for key in corTfCascade: | |
| for gene in corTfCascade[key]: | |
| if key in symbol2entrez: | |
| key1 = symbol2entrez[key] | |
| else: | |
| key1 = key | |
| if gene in symbol2entrez: | |
| gene1 = symbol2entrez[gene] | |
| else: | |
| gene1 = gene | |
| writeMe.append(key1+'\t'+interact+'\t'+gene1) | |
| with open('gene2GeneNetwork_correlated.sif','w') as outFile: | |
| outFile.write('\n'.join(writeMe)) | |
| biotapestry('biotapestry_CHIR_curve.csv', output, ['CHIR_curve']) | |
| mdraw('mdraw_CHIR_curve', output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment