cplaisier · June 26, 2018 18:04
diff --git a/const.py b/const.py
 # -*- coding: utf-8 -*-
 """
 Created on Mon Jun 18 21:14:50 2018

 @author: Fuzzy
 """

 import numpy as np
 import pandas as pd
 import networkx as nx
 import matplotlib.pyplot as plt
 from scipy.stats import pearsonr



 ### Load sub-networks ###
 # Load up FANMOD subgraph enumeration results
 inFile = open('mdraw_CHIR_curve_mdraw.txt.OUT','r')
 # Get rid of headers
 while 1:
    line = inFile.readline()
    if line.strip()=='Result overview:':
        inFile.readline()
        inFile.readline()
        inFile.readline()
        inFile.readline()
        break
 subnetworks = []
 filter_pv = 0.05
 filter_z = 2
 while 1:
    line = inFile.readline()
    if not line:
        break
    line2 = inFile.readline().strip() # Get rid of second adjacency matrix line 
    line3 = inFile.readline().strip() # Get rid of third adjacency matrix line
    #line4 = inFile.readline().strip() # Get rid of fourth adjacency matrix line
    #print line4
    inFile.readline() # Get rid of blank line
    splitUp = [i for i in line.strip().split(' ') if i]
    id = splitUp[1]+line2+line3 #+line4
    if float(splitUp[6]) <= filter_pv and float(splitUp[5]) >= filter_z:
        subnetworks.append(id)
 inFile.close()

 # Create dictionary to convert
 id2gene = {}
 inFile = open('mdraw_CHIR_curve_mdraw_LIST.txt','r')
 while 1:
    line = inFile.readline()
    if not line:
        break
    splitUp = line.strip().split(' ')
    id2gene[splitUp[1]] = splitUp[0]
 inFile.close()

 # Load up network motifs
 networkMotifs = {}
 inFile = open('mdraw_CHIR_curve_mdraw.txt.OUT.dump','r')
 inFile.readline() # Get rid of header
 inFile.readline() # Get rid of header
 while 1:
    line = inFile.readline()
    if not line:
        break
    splitUp = line.strip().split(',')
    #numMotif = int(splitUp.pop(0).replace('2','1'), 2)
    motif = splitUp.pop(0)
    if motif in subnetworks:
        if not motif in networkMotifs:
            networkMotifs[motif]  = []
        networkMotifs[motif].append([id2gene[i] for i in splitUp])
 inFile.close()
 #Up not needed?

 # Load up genesets and netMatrices
 data = {}
 consistent = {}
 gene2probe = {}
 subsets = ['all']
 rsq = 0.9
 for subset in subsets:
    inFile = open('results_'+subset+'.csv','r')
    inFile.readline() # Get rid of header
    while 1:
        line = inFile.readline()
        if not line:
            break
        splitUp = line.strip().split(',')
        if not splitUp[2] in data:
            data[splitUp[2]] = {}
            consistent[splitUp[2]] = {}
        if not splitUp[3] in data[splitUp[2]]:
            data[splitUp[2]][splitUp[3]] = {}
            consistent[splitUp[2]][splitUp[3]] = {}
        data[splitUp[2]][splitUp[3]][subset] = splitUp
        if not (splitUp[6]=='Inconsistent' or splitUp[9]=='Inconsistent' or splitUp[12]=='Inconsistent') and (splitUp[6]=='NA' or float(splitUp[6])>=rsq) and (splitUp[9]=='NA' or float(splitUp[9])>=rsq) and (splitUp[12]=='NA' or float(splitUp[12])>=rsq):
            consistent[splitUp[2]][splitUp[3]][subset] = 'Yes'
        else:
            consistent[splitUp[2]][splitUp[3]][subset] = 'No'
    inFile.close()

 # Read in Biotapesty file
 inEdges = {}
 inFile = open('biotapestry_CHIR_curve.csv','r')
 while 1:
    line = inFile.readline()
    if not line:
        break
    splitUp = line.strip().split(',')
    if splitUp[0]=='"# Standard Interactions"':
        inFile.readline() # Remove header
        break
 while 1:
    line = inFile.readline()
    if not line:
        break
    splitUp = line.strip().split(',')
    node1 = splitUp[3].strip('"')
    node2 = splitUp[5].strip('"')
    for i in node2.split(';'):
        if not i in inEdges:
            inEdges[i] = {}
        if not node1 in inEdges[i]:
            inEdges[i][node1] = splitUp[6].strip('"')
 inFile.close()

 # To translate gene ids later
 symbol2entrez = {}
 entrez2symbol = {}
 with open('gene2entrezId.csv','r') as inFile:
    while 1:
        inLine = inFile.readline()
        if not inLine:
            break
        split = inLine.strip().split(',')
        entrez2symbol[split[1]] = split[0]
        symbol2entrez[split[0]] = split[1]

 # Gather data
 geneSets = []
 #probeSets = []
 ids = []
 netMatrices = []
 consistencies = []
 networks = []
 for netMotif in data:
    for instance in data[netMotif]:
        # Only plot if significant amount of variance explained
        if len([i for i in subsets if consistent[netMotif][instance][i]=='Yes']) > 0:
            genes = data[netMotif][instance]['all'][3].split(';')
            if len(set(genes))==len(genes):
                geneSets.append(genes)
                
                ids.append('id'+data[netMotif][instance]['all'][1]+' '+data[netMotif][instance]['all'][2])
                netMatrices.append([i for i in data[netMotif][instance]['all'][2]])
                tmp = {'all':[data[netMotif][instance]['all'][i] for i in [6,9,12]]}
                for i in tmp:
                    for j in range(0,len(tmp[i])):
                        if tmp[i][j]=='Inconsistent':
                            tmp[i][j] = '-0.25'
                consistencies.append(tmp)
                tmp = dict(zip(genes,[dict(zip(genes,[0 for gene in genes])) for gene in genes]))
                for gene1 in genes:
                    if gene1 in inEdges:
                        for gene2 in inEdges[gene1]:
                            if gene2 in genes:
                                if inEdges[gene1][gene2]=='positive':
                                    tmp[gene2][gene1] = 1
                                elif inEdges[gene1][gene2]=='negative':
                                    tmp[gene2][gene1] = 2
                networks.append(','.join([','.join([str(tmp[gene2][gene1]) for gene1 in genes]) for gene2 in genes]))

 #Pull in Expression Data for plotting     
 df = pd.read_csv('tfExp.csv', header=0, index_col=0)#.transpose()  

 #move code section
 lindict = {}
 for a in data.keys():
    for b in range(len(data[a])):
        lindict[data[a].items()[b][0]]=data[a].items()[b][1]['all']
             
 nodeColors = ['k','r','g']
 for fModId in data:
    for tmp in data[fModId]:
        tf3s = tmp.split(';')
        nodes = dict(zip(tf3s,nodeColors))
        if len([i for i in tf3s if int(symbol2entrez[i]) in df.index])==len(tf3s):
            gl1 = df.loc[[int(symbol2entrez[str(tf3s[0])]),int(symbol2entrez[str(tf3s[1])]),int(symbol2entrez[str(tf3s[2])])]]
            #sort lists for graphs, minimum, by row
            cols1 = gl1.columns[gl1.mean().argsort()]
            gl2 = gl1[cols1]
            G = nx.DiGraph()
            for tfo in tf3s:
                if tfo in inEdges:
                    for tfi in inEdges[tfo]:
                        if tfi in tf3s:
                            if inEdges[tfo][tfi]=='positive':
                                G.add_edge(tfi,tfo,color='g')
                            else:
                                G.add_edge(tfi,tfo,color='r')
            #G = nx.DiGraph(tempNet)
            node_colors = [nodes[i] for i in list(G.nodes)]
            edges = G.edges()
            edge_colors = [G[u][v]['color'] for u,v in edges]
            #create figure with three subplots
            fig = plt.figure(figsize=(12,12))
            grid = plt.GridSpec(2, 2, wspace=0.4, hspace=0.3)
            #Network Figure
            plt.subplot(grid[1,0])
            nx.draw(G, pos=nx.circular_layout(G),label_pos=3,with_labels=False,node_size=500,node_color=node_colors,edge_color=edge_colors,width=3,arrowsize=25,font_color='w')
            #createing label offset on network figure
            label_ratio = 0.27
            pos_labels = {} 
            #For each node in the Graph
            pos = nx.circular_layout(G)
            p=0
            for aNode in G.nodes():
                #Get the node's position from the layout
                x,y = pos[aNode]
                #Set Offset
                if p==0:
                    pos_labels[aNode] = (x-label_ratio, y)
                else:
                    pos_labels[aNode] = (x+label_ratio, y)
                p+=1
            nx.draw_networkx_labels(G,pos=pos_labels,fontsize=3)
            #Gene Plot Figure
            plt.subplot(grid[0,0:])
            plt.plot(gl2.loc[int(symbol2entrez[str(tf3s[0])])].tolist(),nodes[tf3s[0]],gl2.loc[int(symbol2entrez[str(tf3s[1])])].tolist(),nodes[tf3s[1]],gl2.loc[int(symbol2entrez[str(tf3s[2])])].tolist(),nodes[tf3s[2]])
            plt.tight_layout()
            #Bar Graph Figure
            ax = plt.subplot(grid[1,1])
            str3 = str(tf3s[0])+';'+str(tf3s[1])+';'+str(tf3s[2])
            fulllist = data[fModId][';'.join(tf3s)]['all']
            linlist = [float(fulllist[6]),float(fulllist[9]),float(fulllist[12])]
            index = np.arange(len(tf3s))
 #            graph1 = plt.bar(index[0], linlist[0],align='center',color = nodeColors[0])
 #            graph2 = plt.bar(index[1], linlist[1],align='center',color = nodeColors[1])
 #            graph3 = plt.bar(index[2], linlist[2],align='center',color = nodeColors[2])
            plt.xticks(index, tf3s)
            plt.ylim((0,1))
            plt.ylabel('R^2')
            plt.xlabel('Gene')
            g1, g2, g3 = plt.bar(index, linlist)
            g1.set_facecolor(nodes[tf3s[0]])
            g2.set_facecolor(nodes[tf3s[1]])
            g3.set_facecolor(nodes[tf3s[2]])
            plt.show()
        break
    break
    
    

 #temp=data[data.keys()[0]].items()[0][1]['all'][6]

 #plotly code
 #import plotly.plotly as py
 #import plotly.tools as tls
 #mpl_fig = plt.figure()
 #pl_fig = plt.figure()
 #ax = mpl_fig.add_subplot(111)
 #
 #N = 5
 #ind = np.arange(N)
 #width = 0.3
 #vals = [1,2,3,4,5]
 #barlist = ax.bar(ind, vals, width)
 #
 #barlist[0].set_color('g')
 #barlist[1].set_color('b')
	# -- coding: utf-8 --
	"""
	Created on Mon Jun 18 21:14:50 2018

	@author: Fuzzy
	"""

	import numpy as np
	import pandas as pd
	import networkx as nx
	import matplotlib.pyplot as plt
	from scipy.stats import pearsonr



	### Load sub-networks ###
	# Load up FANMOD subgraph enumeration results
	inFile = open('mdraw_CHIR_curve_mdraw.txt.OUT','r')
	# Get rid of headers
	while 1:
	line = inFile.readline()
	if line.strip()=='Result overview:':
	inFile.readline()
	inFile.readline()
	inFile.readline()
	inFile.readline()
	break
	subnetworks = []
	filter_pv = 0.05
	filter_z = 2
	while 1:
	line = inFile.readline()
	if not line:
	break
	line2 = inFile.readline().strip() # Get rid of second adjacency matrix line
	line3 = inFile.readline().strip() # Get rid of third adjacency matrix line
	#line4 = inFile.readline().strip() # Get rid of fourth adjacency matrix line
	#print line4
	inFile.readline() # Get rid of blank line
	splitUp = [i for i in line.strip().split(' ') if i]
	id = splitUp[1]+line2+line3 #+line4
	if float(splitUp[6]) <= filter_pv and float(splitUp[5]) >= filter_z:
	subnetworks.append(id)
	inFile.close()

	# Create dictionary to convert
	id2gene = {}
	inFile = open('mdraw_CHIR_curve_mdraw_LIST.txt','r')
	while 1:
	line = inFile.readline()
	if not line:
	break
	splitUp = line.strip().split(' ')
	id2gene[splitUp[1]] = splitUp[0]
	inFile.close()

	# Load up network motifs
	networkMotifs = {}
	inFile = open('mdraw_CHIR_curve_mdraw.txt.OUT.dump','r')
	inFile.readline() # Get rid of header
	inFile.readline() # Get rid of header
	while 1:
	line = inFile.readline()
	if not line:
	break
	splitUp = line.strip().split(',')
	#numMotif = int(splitUp.pop(0).replace('2','1'), 2)
	motif = splitUp.pop(0)
	if motif in subnetworks:
	if not motif in networkMotifs:
	networkMotifs[motif] = []
	networkMotifs[motif].append([id2gene[i] for i in splitUp])
	inFile.close()
	#Up not needed?

	# Load up genesets and netMatrices
	data = {}
	consistent = {}
	gene2probe = {}
	subsets = ['all']
	rsq = 0.9
	for subset in subsets:
	inFile = open('results_'+subset+'.csv','r')
	inFile.readline() # Get rid of header
	while 1:
	line = inFile.readline()
	if not line:
	break
	splitUp = line.strip().split(',')
	if not splitUp[2] in data:
	data[splitUp[2]] = {}
	consistent[splitUp[2]] = {}
	if not splitUp[3] in data[splitUp[2]]:
	data[splitUp[2]][splitUp[3]] = {}
	consistent[splitUp[2]][splitUp[3]] = {}
	data[splitUp[2]][splitUp[3]][subset] = splitUp
	if not (splitUp[6]=='Inconsistent' or splitUp[9]=='Inconsistent' or splitUp[12]=='Inconsistent') and (splitUp[6]=='NA' or float(splitUp[6])>=rsq) and (splitUp[9]=='NA' or float(splitUp[9])>=rsq) and (splitUp[12]=='NA' or float(splitUp[12])>=rsq):
	consistent[splitUp[2]][splitUp[3]][subset] = 'Yes'
	else:
	consistent[splitUp[2]][splitUp[3]][subset] = 'No'
	inFile.close()

	# Read in Biotapesty file
	inEdges = {}
	inFile = open('biotapestry_CHIR_curve.csv','r')
	while 1:
	line = inFile.readline()
	if not line:
	break
	splitUp = line.strip().split(',')
	if splitUp[0]=='"# Standard Interactions"':
	inFile.readline() # Remove header
	break
	while 1:
	line = inFile.readline()
	if not line:
	break
	splitUp = line.strip().split(',')
	node1 = splitUp[3].strip('"')
	node2 = splitUp[5].strip('"')
	for i in node2.split(';'):
	if not i in inEdges:
	inEdges[i] = {}
	if not node1 in inEdges[i]:
	inEdges[i][node1] = splitUp[6].strip('"')
	inFile.close()

	# To translate gene ids later
	symbol2entrez = {}
	entrez2symbol = {}
	with open('gene2entrezId.csv','r') as inFile:
	while 1:
	inLine = inFile.readline()
	if not inLine:
	break
	split = inLine.strip().split(',')
	entrez2symbol[split[1]] = split[0]
	symbol2entrez[split[0]] = split[1]

	# Gather data
	geneSets = []
	#probeSets = []
	ids = []
	netMatrices = []
	consistencies = []
	networks = []
	for netMotif in data:
	for instance in data[netMotif]:
	# Only plot if significant amount of variance explained
	if len([i for i in subsets if consistent[netMotif][instance][i]=='Yes']) > 0:
	genes = data[netMotif][instance]['all'][3].split(';')
	if len(set(genes))==len(genes):
	geneSets.append(genes)

	ids.append('id'+data[netMotif][instance]['all'][1]+' '+data[netMotif][instance]['all'][2])
	netMatrices.append([i for i in data[netMotif][instance]['all'][2]])
	tmp = {'all':[data[netMotif][instance]['all'][i] for i in [6,9,12]]}
	for i in tmp:
	for j in range(0,len(tmp[i])):
	if tmp[i][j]=='Inconsistent':
	tmp[i][j] = '-0.25'
	consistencies.append(tmp)
	tmp = dict(zip(genes,[dict(zip(genes,[0 for gene in genes])) for gene in genes]))
	for gene1 in genes:
	if gene1 in inEdges:
	for gene2 in inEdges[gene1]:
	if gene2 in genes:
	if inEdges[gene1][gene2]=='positive':
	tmp[gene2][gene1] = 1
	elif inEdges[gene1][gene2]=='negative':
	tmp[gene2][gene1] = 2
	networks.append(','.join([','.join([str(tmp[gene2][gene1]) for gene1 in genes]) for gene2 in genes]))

	#Pull in Expression Data for plotting
	df = pd.read_csv('tfExp.csv', header=0, index_col=0)#.transpose()

	#move code section
	lindict = {}
	for a in data.keys():
	for b in range(len(data[a])):
	lindict[data[a].items()[b][0]]=data[a].items()[b][1]['all']

	nodeColors = ['k','r','g']
	for fModId in data:
	for tmp in data[fModId]:
	tf3s = tmp.split(';')
	nodes = dict(zip(tf3s,nodeColors))
	if len([i for i in tf3s if int(symbol2entrez[i]) in df.index])==len(tf3s):
	gl1 = df.loc[[int(symbol2entrez[str(tf3s[0])]),int(symbol2entrez[str(tf3s[1])]),int(symbol2entrez[str(tf3s[2])])]]
	#sort lists for graphs, minimum, by row
	cols1 = gl1.columns[gl1.mean().argsort()]
	gl2 = gl1[cols1]
	G = nx.DiGraph()
	for tfo in tf3s:
	if tfo in inEdges:
	for tfi in inEdges[tfo]:
	if tfi in tf3s:
	if inEdges[tfo][tfi]=='positive':
	G.add_edge(tfi,tfo,color='g')
	else:
	G.add_edge(tfi,tfo,color='r')
	#G = nx.DiGraph(tempNet)
	node_colors = [nodes[i] for i in list(G.nodes)]
	edges = G.edges()
	edge_colors = [G[u][v]['color'] for u,v in edges]
	#create figure with three subplots
	fig = plt.figure(figsize=(12,12))
	grid = plt.GridSpec(2, 2, wspace=0.4, hspace=0.3)
	#Network Figure
	plt.subplot(grid[1,0])
	nx.draw(G, pos=nx.circular_layout(G),label_pos=3,with_labels=False,node_size=500,node_color=node_colors,edge_color=edge_colors,width=3,arrowsize=25,font_color='w')
	#createing label offset on network figure
	label_ratio = 0.27
	pos_labels = {}
	#For each node in the Graph
	pos = nx.circular_layout(G)
	p=0
	for aNode in G.nodes():
	#Get the node's position from the layout
	x,y = pos[aNode]
	#Set Offset
	if p==0:
	pos_labels[aNode] = (x-label_ratio, y)
	else:
	pos_labels[aNode] = (x+label_ratio, y)
	p+=1
	nx.draw_networkx_labels(G,pos=pos_labels,fontsize=3)
	#Gene Plot Figure
	plt.subplot(grid[0,0:])
	plt.plot(gl2.loc[int(symbol2entrez[str(tf3s[0])])].tolist(),nodes[tf3s[0]],gl2.loc[int(symbol2entrez[str(tf3s[1])])].tolist(),nodes[tf3s[1]],gl2.loc[int(symbol2entrez[str(tf3s[2])])].tolist(),nodes[tf3s[2]])
	plt.tight_layout()
	#Bar Graph Figure
	ax = plt.subplot(grid[1,1])
	str3 = str(tf3s[0])+';'+str(tf3s[1])+';'+str(tf3s[2])
	fulllist = data[fModId][';'.join(tf3s)]['all']
	linlist = [float(fulllist[6]),float(fulllist[9]),float(fulllist[12])]
	index = np.arange(len(tf3s))
	# graph1 = plt.bar(index[0], linlist[0],align='center',color = nodeColors[0])
	# graph2 = plt.bar(index[1], linlist[1],align='center',color = nodeColors[1])
	# graph3 = plt.bar(index[2], linlist[2],align='center',color = nodeColors[2])
	plt.xticks(index, tf3s)
	plt.ylim((0,1))
	plt.ylabel('R^2')
	plt.xlabel('Gene')
	g1, g2, g3 = plt.bar(index, linlist)
	g1.set_facecolor(nodes[tf3s[0]])
	g2.set_facecolor(nodes[tf3s[1]])
	g3.set_facecolor(nodes[tf3s[2]])
	plt.show()
	break
	break



	#temp=data[data.keys()[0]].items()[0][1]['all'][6]

	#plotly code
	#import plotly.plotly as py
	#import plotly.tools as tls
	#mpl_fig = plt.figure()
	#pl_fig = plt.figure()
	#ax = mpl_fig.add_subplot(111)
	#
	#N = 5
	#ind = np.arange(N)
	#width = 0.3
	#vals = [1,2,3,4,5]
	#barlist = ax.bar(ind, vals, width)
	#
	#barlist[0].set_color('g')
	#barlist[1].set_color('b')
No results found