Skip to content

Instantly share code, notes, and snippets.

@mkcook
Created November 14, 2018 06:01
Show Gist options
  • Select an option

  • Save mkcook/6326fe1ea37cb7ab13e6e66a5c80fff3 to your computer and use it in GitHub Desktop.

Select an option

Save mkcook/6326fe1ea37cb7ab13e6e66a5c80fff3 to your computer and use it in GitHub Desktop.
Practice Computational Task
# Import libraries up front
import json
# From Table S13 in Plaisier et al., Cell Systems 2016
# These are Entrez IDs (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3013746/)
input = ['430', '1052', '1053', '1385', '84699', '9586', '1871', '1874', '144455', '79733', '1960', '1997', '2002', '2004', '80712', '2114', '2115', '2120', '51513', '2551', '2623', '2624', '2625', '9421', '3232', '10320', '3659', '3662', '3670', '91464', '3726', '10661', '11278', '128209', '10365', '9314', '1316', '51176', '9935', '23269', '4602', '4774', '4790', '7025', '9480', '5468', '5914', '5916', '3516', '5971', '864', '6257', '4093', '6659', '6660', '6662', '25803', '347853', '30009', '9496', '6929', '6925', '8463', '7022', '29842', '10155', '6935', '132625', '23051', '85416', '7707', '7764', '23528', '201516']
# Loading JSON file
# https://www.safaribooksonline.com/library/view/python-cookbook-3rd/9781449357337/ch06s02.html
# Example:
# import json
#
# # Reading data back
# with open('data.json', 'r') as f:
# data = json.load(f)
# Reading TF regulator to TF target gene relationships into Python
# The json library we import takes care of most of the work
with open('C:/Users/mcook/OneDrive/Documents/ASU/Research/TF_Network/TF_Network/tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f:
tfbsDb = json.load(f)
# Example set of keys in tfbsDb, they are Motif IDs (http://jaspar.genereg.net/search?q=Homo%20sapiens&collection=CORE&tax_group=vertebrates)
print(list(tfbsDb.keys())[0:5])
# Example set of values under a specific Motif ID, they are Entrez IDs
print(tfbsDb[list(tfbsDb.keys())[0]][0:5])
# Read in humanTFs file
id2motif = {}
motif2id = {}
with open('C:/Users/mcook/OneDrive/Documents/ASU/Research/TF_Network/TF_Network/id_conversion/humanTFs_All.CSV','r') as inFile:
# csv file - going to assume this is the tfbsDB file?
# Use the readline() function to read in a single line
# strip() gets rid of the newline character at the end of the line
# split(',') splits up the line into columns based on commas
header = inFile.readline().strip().split(',')
print (header)
while 1:
inLine = inFile.readline()
if not inLine:
break
split = inLine.strip().split(',')
# this should create a list where split = ['motif',{'entrez id','entrez id'}]
# TODO Fill out the id2motif dictionary (key = Entrez ID, value = Motif Name)
# create the id2motif dictionary
# if the id is not yet in the dictionary, append it as one of the values to the motif key
if not split[2] in id2motif:
id2motif[split[2]] = []
id2motif[split[2]].append(split[0])
# TODO Fill out the motif2id dictionary (key = Motif Name, value = Entrez ID)
# same concept but opposite
# only one motif for an entrez id, do not need to check bc is not a list
motif2id[split[0]] = split[2]
# the key would be the entrez id and the value would be the motif
## To build a TF regulator to TF target gene network (constrained to TFs within the input list).
## This will require mapping from:
## 1. Input list of potential TF regulator Entrez Gene IDs (input)
## 2. List of Motif IDs for an Entrez Gene ID in the input list (either id2motif or motif2id)
## 3. TF target genes that are Entrez Gene IDs that are the values under a specific Motif ID in tfbsDb
## 4. Restrict TF target genes to only those in the input list
## 5. Add new entry to tfNetwork dictionary that has as the key the TF regulator and the values all the TF target genes
tfNetwork = {}
# there are some ids/motifs that are not in tfbsDb - account for that
# there are some ids that do not have motifs! - account for that?
# x has to run through all of input -> create a while loop for this
# see above output - the motif has to be in tfbsDb -> if in print(list(tfbsDb.keys()))
# turn these into dictionaries
# reset counter when x changes
## if input[x] in list(id2motif.keys()):
x = 0
counter = 0
while x < len(input):
if input[x] in list(id2motif.keys()):
while counter < len(id2motif[input[x]]):
## print (counter)
tfNetwork[input[counter]] = []
if id2motif[input[x]][counter] in list(tfbsDb.keys()):
LoTG=tfbsDb[id2motif[(input[x])][counter]]
else:
LoTG.clear()
## print ([i for i in LoTG if i in input])
tfNetwork[input[counter]].append([i for i in LoTG if i in input])
counter = counter + 1
counter = 0
x = x + 1
else:
x = x + 1
## testing
tfNetwork['430']
for a,b in tfNetwork.items():
print(a,b)
print(len(tfNetwork))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment