-
-
Save cplaisier/8df86b3471ad3fb035df03b943086d33 to your computer and use it in GitHub Desktop.
Updated version.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Mon Jun 18 21:14:50 2018 | |
| @author: Fuzzy | |
| """ | |
| import numpy as np | |
| import pandas as pd | |
| import networkx as nx | |
| import matplotlib.pyplot as plt | |
| from scipy.stats import pearsonr | |
| ### Load sub-networks ### | |
| # Load up FANMOD subgraph enumeration results | |
| inFile = open('mdraw_CHIR_curve_mdraw.txt.OUT','r') | |
| # Get rid of headers | |
| while 1: | |
| line = inFile.readline() | |
| if line.strip()=='Result overview:': | |
| inFile.readline() | |
| inFile.readline() | |
| inFile.readline() | |
| inFile.readline() | |
| break | |
| subnetworks = [] | |
| filter_pv = 0.05 | |
| filter_z = 2 | |
| while 1: | |
| line = inFile.readline() | |
| if not line: | |
| break | |
| line2 = inFile.readline().strip() # Get rid of second adjacency matrix line | |
| line3 = inFile.readline().strip() # Get rid of third adjacency matrix line | |
| #line4 = inFile.readline().strip() # Get rid of fourth adjacency matrix line | |
| #print line4 | |
| inFile.readline() # Get rid of blank line | |
| splitUp = [i for i in line.strip().split(' ') if i] | |
| id = splitUp[1]+line2+line3 #+line4 | |
| if float(splitUp[6]) <= filter_pv and float(splitUp[5]) >= filter_z: | |
| subnetworks.append(id) | |
| inFile.close() | |
| # Create dictionary to convert | |
| id2gene = {} | |
| inFile = open('mdraw_CHIR_curve_mdraw_LIST.txt','r') | |
| while 1: | |
| line = inFile.readline() | |
| if not line: | |
| break | |
| splitUp = line.strip().split(' ') | |
| id2gene[splitUp[1]] = splitUp[0] | |
| inFile.close() | |
| # Load up network motifs | |
| networkMotifs = {} | |
| inFile = open('mdraw_CHIR_curve_mdraw.txt.OUT.dump','r') | |
| inFile.readline() # Get rid of header | |
| inFile.readline() # Get rid of header | |
| while 1: | |
| line = inFile.readline() | |
| if not line: | |
| break | |
| splitUp = line.strip().split(',') | |
| #numMotif = int(splitUp.pop(0).replace('2','1'), 2) | |
| motif = splitUp.pop(0) | |
| if motif in subnetworks: | |
| if not motif in networkMotifs: | |
| networkMotifs[motif] = [] | |
| networkMotifs[motif].append([id2gene[i] for i in splitUp]) | |
| inFile.close() | |
| #Up not needed? | |
| # Load up genesets and netMatrices | |
| data = {} | |
| consistent = {} | |
| gene2probe = {} | |
| subsets = ['all'] | |
| rsq = 0.9 | |
| for subset in subsets: | |
| inFile = open('results_'+subset+'.csv','r') | |
| inFile.readline() # Get rid of header | |
| while 1: | |
| line = inFile.readline() | |
| if not line: | |
| break | |
| splitUp = line.strip().split(',') | |
| if not splitUp[2] in data: | |
| data[splitUp[2]] = {} | |
| consistent[splitUp[2]] = {} | |
| if not splitUp[3] in data[splitUp[2]]: | |
| data[splitUp[2]][splitUp[3]] = {} | |
| consistent[splitUp[2]][splitUp[3]] = {} | |
| data[splitUp[2]][splitUp[3]][subset] = splitUp | |
| if not (splitUp[6]=='Inconsistent' or splitUp[9]=='Inconsistent' or splitUp[12]=='Inconsistent') and (splitUp[6]=='NA' or float(splitUp[6])>=rsq) and (splitUp[9]=='NA' or float(splitUp[9])>=rsq) and (splitUp[12]=='NA' or float(splitUp[12])>=rsq): | |
| consistent[splitUp[2]][splitUp[3]][subset] = 'Yes' | |
| else: | |
| consistent[splitUp[2]][splitUp[3]][subset] = 'No' | |
| inFile.close() | |
| # Read in Biotapesty file | |
| inEdges = {} | |
| inFile = open('biotapestry_CHIR_curve.csv','r') | |
| while 1: | |
| line = inFile.readline() | |
| if not line: | |
| break | |
| splitUp = line.strip().split(',') | |
| if splitUp[0]=='"# Standard Interactions"': | |
| inFile.readline() # Remove header | |
| break | |
| while 1: | |
| line = inFile.readline() | |
| if not line: | |
| break | |
| splitUp = line.strip().split(',') | |
| node1 = splitUp[3].strip('"') | |
| node2 = splitUp[5].strip('"') | |
| for i in node2.split(';'): | |
| if not i in inEdges: | |
| inEdges[i] = {} | |
| if not node1 in inEdges[i]: | |
| inEdges[i][node1] = splitUp[6].strip('"') | |
| inFile.close() | |
| # To translate gene ids later | |
| symbol2entrez = {} | |
| entrez2symbol = {} | |
| with open('gene2entrezId.csv','r') as inFile: | |
| while 1: | |
| inLine = inFile.readline() | |
| if not inLine: | |
| break | |
| split = inLine.strip().split(',') | |
| entrez2symbol[split[1]] = split[0] | |
| symbol2entrez[split[0]] = split[1] | |
| # Gather data | |
| geneSets = [] | |
| #probeSets = [] | |
| ids = [] | |
| netMatrices = [] | |
| consistencies = [] | |
| networks = [] | |
| for netMotif in data: | |
| for instance in data[netMotif]: | |
| # Only plot if significant amount of variance explained | |
| if len([i for i in subsets if consistent[netMotif][instance][i]=='Yes']) > 0: | |
| genes = data[netMotif][instance]['all'][3].split(';') | |
| if len(set(genes))==len(genes): | |
| geneSets.append(genes) | |
| ids.append('id'+data[netMotif][instance]['all'][1]+' '+data[netMotif][instance]['all'][2]) | |
| netMatrices.append([i for i in data[netMotif][instance]['all'][2]]) | |
| tmp = {'all':[data[netMotif][instance]['all'][i] for i in [6,9,12]]} | |
| for i in tmp: | |
| for j in range(0,len(tmp[i])): | |
| if tmp[i][j]=='Inconsistent': | |
| tmp[i][j] = '-0.25' | |
| consistencies.append(tmp) | |
| tmp = dict(zip(genes,[dict(zip(genes,[0 for gene in genes])) for gene in genes])) | |
| for gene1 in genes: | |
| if gene1 in inEdges: | |
| for gene2 in inEdges[gene1]: | |
| if gene2 in genes: | |
| if inEdges[gene1][gene2]=='positive': | |
| tmp[gene2][gene1] = 1 | |
| elif inEdges[gene1][gene2]=='negative': | |
| tmp[gene2][gene1] = 2 | |
| networks.append(','.join([','.join([str(tmp[gene2][gene1]) for gene1 in genes]) for gene2 in genes])) | |
| #Pull in Expression Data for plotting | |
| df = pd.read_csv('tfExp.csv', header=0, index_col=0)#.transpose() | |
| #move code section | |
| lindict = {} | |
| for a in data.keys(): | |
| for b in range(len(data[a])): | |
| lindict[data[a].items()[b][0]]=data[a].items()[b][1]['all'] | |
| nodeColors = ['k','r','g'] | |
| for fModId in data: | |
| for tmp in data[fModId]: | |
| tf3s = tmp.split(';') | |
| nodes = dict(zip(tf3s,nodeColors)) | |
| if len([i for i in tf3s if int(symbol2entrez[i]) in df.index])==len(tf3s): | |
| gl1 = df.loc[[int(symbol2entrez[str(tf3s[0])]),int(symbol2entrez[str(tf3s[1])]),int(symbol2entrez[str(tf3s[2])])]] | |
| #sort lists for graphs, minimum, by row | |
| cols1 = gl1.columns[gl1.mean().argsort()] | |
| gl2 = gl1[cols1] | |
| G = nx.DiGraph() | |
| for tfo in tf3s: | |
| if tfo in inEdges: | |
| for tfi in inEdges[tfo]: | |
| if tfi in tf3s: | |
| if inEdges[tfo][tfi]=='positive': | |
| G.add_edge(tfi,tfo,color='g') | |
| else: | |
| G.add_edge(tfi,tfo,color='r') | |
| #G = nx.DiGraph(tempNet) | |
| node_colors = [nodes[i] for i in list(G.nodes)] | |
| edges = G.edges() | |
| edge_colors = [G[u][v]['color'] for u,v in edges] | |
| #create figure with three subplots | |
| fig = plt.figure(figsize=(12,12)) | |
| grid = plt.GridSpec(2, 2, wspace=0.4, hspace=0.3) | |
| #Network Figure | |
| plt.subplot(grid[1,0]) | |
| nx.draw(G, pos=nx.circular_layout(G),label_pos=3,with_labels=False,node_size=500,node_color=node_colors,edge_color=edge_colors,width=3,arrowsize=25,font_color='w') | |
| #createing label offset on network figure | |
| label_ratio = 0.27 | |
| pos_labels = {} | |
| #For each node in the Graph | |
| pos = nx.circular_layout(G) | |
| p=0 | |
| for aNode in G.nodes(): | |
| #Get the node's position from the layout | |
| x,y = pos[aNode] | |
| #Set Offset | |
| if p==0: | |
| pos_labels[aNode] = (x-label_ratio, y) | |
| else: | |
| pos_labels[aNode] = (x+label_ratio, y) | |
| p+=1 | |
| nx.draw_networkx_labels(G,pos=pos_labels,fontsize=3) | |
| #Gene Plot Figure | |
| plt.subplot(grid[0,0:]) | |
| plt.plot(gl2.loc[int(symbol2entrez[str(tf3s[0])])].tolist(),nodes[tf3s[0]],gl2.loc[int(symbol2entrez[str(tf3s[1])])].tolist(),nodes[tf3s[1]],gl2.loc[int(symbol2entrez[str(tf3s[2])])].tolist(),nodes[tf3s[2]]) | |
| plt.tight_layout() | |
| #Bar Graph Figure | |
| ax = plt.subplot(grid[1,1]) | |
| str3 = str(tf3s[0])+';'+str(tf3s[1])+';'+str(tf3s[2]) | |
| fulllist = data[fModId][';'.join(tf3s)]['all'] | |
| linlist = [float(fulllist[6]),float(fulllist[9]),float(fulllist[12])] | |
| index = np.arange(len(tf3s)) | |
| # graph1 = plt.bar(index[0], linlist[0],align='center',color = nodeColors[0]) | |
| # graph2 = plt.bar(index[1], linlist[1],align='center',color = nodeColors[1]) | |
| # graph3 = plt.bar(index[2], linlist[2],align='center',color = nodeColors[2]) | |
| plt.xticks(index, tf3s) | |
| plt.ylim((0,1)) | |
| plt.ylabel('R^2') | |
| plt.xlabel('Gene') | |
| g1, g2, g3 = plt.bar(index, linlist) | |
| g1.set_facecolor(nodes[tf3s[0]]) | |
| g2.set_facecolor(nodes[tf3s[1]]) | |
| g3.set_facecolor(nodes[tf3s[2]]) | |
| plt.show() | |
| break | |
| break | |
| #temp=data[data.keys()[0]].items()[0][1]['all'][6] | |
| #plotly code | |
| #import plotly.plotly as py | |
| #import plotly.tools as tls | |
| #mpl_fig = plt.figure() | |
| #pl_fig = plt.figure() | |
| #ax = mpl_fig.add_subplot(111) | |
| # | |
| #N = 5 | |
| #ind = np.arange(N) | |
| #width = 0.3 | |
| #vals = [1,2,3,4,5] | |
| #barlist = ax.bar(ind, vals, width) | |
| # | |
| #barlist[0].set_color('g') | |
| #barlist[1].set_color('b') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment