Skip to content

Instantly share code, notes, and snippets.

@ssstrike
Last active May 22, 2018 15:39
Show Gist options
  • Select an option

  • Save ssstrike/3bec39f584998aca8a3472ff840d5d47 to your computer and use it in GitHub Desktop.

Select an option

Save ssstrike/3bec39f584998aca8a3472ff840d5d47 to your computer and use it in GitHub Desktop.
input csv of gene expression, creates histogram for p and r values.
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 07 21:10:33 2018
@author: Fuzzy
"""
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from scipy import stats
#open json file for geneNetwork
import json
with open('geneNetork74.json', 'r') as f:
geneNetwork = json.load(f)
#creates dataframe and removes rows Unnamed 615-638 (they are empty)
df = pd.read_csv('tfExp.csv',header=0,index_col=0)
#ALL
allrList = []
allpList = []
allcorrectPList = []
alllogpList = []
allkey1List = [] #regulator list
allkey2List = [] #target list
#iterate through all ids.
for i in range(len(df.index.values)):
row1 = df.loc[df.index.values[i]]
#iterate through all ids for second comparison.
for j in range(i+1, len(df.index.values)):
row2 = df.loc[df.index.values[j]]
#person r function
p = stats.pearsonr(row1,row2)
allkey1List.append(df.index.values[i])
allkey2List.append(df.index.values[j])
allrList.append(p[0])
allpList.append(p[1])
#logarithmic transformation.
for x in allpList:
if not x==0:
alllogpList.append(-math.log10(x))
else:
alllogpList.append(302)
#FDR calculating the corrected pList.
size = len(allpList)
rankValue = dict(zip(np.argsort(allpList),range(len(allpList))))
allcorrectPList = allpList
for i in range(len(allpList)):
allcorrectPList[i] = allpList[i]*(float(len(allpList))/(rankValue[i]+1))
print stats.skew(allrList,nan_policy='omit')
print stats.skewtest(allrList,nan_policy='omit')
#lists for various p and r values and their associated TF gene
rList = []
pList = []
correctPList = []
logpList = []
key1List = []#regulator list
key2List = []#target list
#iterate through all ids.
for i in range(len(df.index.values)):
if(str(df.index.values[i]) in geneNetwork): #filter out unwanted IDs
row1 = df.loc[df.index.values[i]]
#iterate through all ids for second comparison.
for j in range(i+1, len(df.index.values)):
if (str(df.index.values[j]) in geneNetwork[str(df.index.values[i])]):#filter again
row2 = df.loc[df.index.values[j]]
#person r function
p = stats.pearsonr(row1,row2)
key1List.append(df.index.values[i])
key2List.append(df.index.values[j])
rList.append(p[0])
pList.append(p[1])
#logarithmic transformation.
for x in pList:
logpList.append(-math.log10(x))
#FDR calculating the corrected pList.
size = len(pList)
rankValue = dict(zip(np.argsort(pList),range(len(pList))))
correctPList = pList
for i in range(len(pList)):
correctPList[i] = pList[i]*(float(len(pList))/(rankValue[i]+1))
print stats.skew(rList)
print stats.skewtest(rList)
#create and show histogram of r values from -1 to 1
plt.hist(rList,bins='auto',range=(-1,1),density=True,alpha=0.5,label='Filtered')
plt.hist([i for i in allrList if not math.isnan(i)],bins='auto',range=(-1,1),density=True,alpha=0.5,label='All')
plt.title("r hist with auto bins")
plt.show()
#create and show histogram of p values
plt.hist(logpList,bins='auto') #range=(0,1))
plt.title("p-value hist with auto bins")
plt.show()
plt.hist([-math.log10(i) for i in correctPList],bins='auto') #range=(0,1))
plt.title("p-value hist with auto bins")
plt.show()
#plot size of list according to rvalue
numList = []
xList = []
x=1
while x>0.3:
numList.append(len([i for i in rList if abs(i)>=x]))
xList.append(x)
x-=0.01
plt.plot(xList,numList)
#pull out corrolative interations
r2List = []
indexList = []
nameList = []
r2List = [i for i in rList if abs(i)>=0.5]# and rList.index(i)<0.05]
for r in r2List:
try:
indexList.append(rList.index(r))
except ValueError:
indexList.append(rList.index((-r)))
tf2tfLink = {}
tf2tfNet = {}
for position in indexList:
reg = key1List[position]
targ = key2List[position]
tf2tfNet[reg] = []
tf2tfNet[reg].append(targ)
if not reg in tf2tfLink:
tf2tfLink[reg] = []
if not targ in tf2tfLink[reg]:
tf2tfLink[reg].append(targ)
#form picture using networkx
G = nx.from_dict_of_lists(tf2tfNet)
#G = nx.Graph(tf2tfNet)
nx.draw_shell(G,with_labels=True,node_size=600)#, pos=nx.circular_layout(G)
plt.show()
#for translation
name2Entrez = {}
with open('gene2entrezId.CSV','r') as inFile:
while 1:
inLine = inFile.readline()
if not inLine:
break
split = inLine.strip().split(',')
name2Entrez[split[1]] = split[0]
''' error
#write subNetwork .sif file
writeSub = []
interact = 'r2t'
for key in tf2tfNet:
for gene in tf2tfNet[key]:
if key in name2Entrez:
key1 = name2Entrez[key]
else:
key1 = key
if gene in name2Entrez:
gene1 = name2Entrez[gene]
else:
gene1 = gene
writeSub.append(key1+'\t'+interact+'\t'+gene1)
with open('tf2tfNet.sif','w') as outFile:
outFile.write('\n'.join(writeSub))
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment