Created
September 22, 2016 01:54
-
-
Save steveandroulakis/b94b0dea225e623f42f9c5f0da9d8bbb to your computer and use it in GitHub Desktop.
really rough bam processing pipeline
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python | |
| import sys, getopt | |
| import glob | |
| import os | |
| import ntpath | |
| from time import strftime | |
| print '******bam_pipe******' | |
| print strftime("%Y-%m-%d %H:%M:%S") | |
| print '********************' | |
| print '\n' | |
| def sub_index_bam(bam_file): | |
| import subprocess | |
| process = subprocess.Popen(('samtools index %s') % \ | |
| (bam_file), | |
| stdout=subprocess.PIPE, | |
| stderr=subprocess.PIPE, | |
| shell=True) | |
| # todo handle non zero | |
| process.wait() | |
| for line in process.stdout: | |
| print line | |
| for line in process.stderr: | |
| print line | |
| #n = 10 | |
| #source = '/mnt/ceph/mbp/servers/bioinformatics-platform/home/steve/projects/aspree_bam_pipe/source' | |
| #dest = '/mnt/ceph/mbp/servers/bioinformatics-platform/home/steve/projects/aspree_bam_pipe/dest' | |
| def bam_pipe(source, dest, n): | |
| source_path = source | |
| if not source.startswith(os.path.sep): | |
| #relative | |
| source_path = os.path.join(os.getcwd(), source) | |
| print 'source path is: %s' % source | |
| print 'dest path is: %s' % dest | |
| if not n == None: | |
| print 'files to process in source (up to): %s' % n | |
| print '\n' | |
| skipped = 0 | |
| processed = 0 | |
| count = 0 | |
| for name in glob.glob('%s/*.bam' % source_path): | |
| if n == None: | |
| pass | |
| elif count == int(n): | |
| print 'ENDING run because file limit count reached: %s' % count | |
| break | |
| basename = ntpath.basename(name) | |
| print '*** bam file: %s ***' % basename | |
| dest_path = os.path.join(dest, basename) | |
| if not dest.startswith(os.path.sep): | |
| #relative | |
| dest_path = os.path.join(os.getcwd(), dest_path) | |
| source_file = os.path.join(source_path, basename) | |
| #print source_file | |
| #print dest_path | |
| if os.path.exists(dest_path): | |
| skipped = skipped + 1 | |
| print 'SKIPPING processing of file because it exists in destination' | |
| else: | |
| processed = processed + 1 | |
| print 'LINKING source file in destination location for processing' | |
| os.symlink(source_file, dest_path) | |
| print 'PROCESSING bam file (creating INDEX)' | |
| sub_index_bam(dest_path) | |
| count = count + 1 | |
| print '\n' | |
| print '\nDone. Files Processed: %s. Files Skipped: %s.' % (processed, skipped) | |
| return | |
| def main(argv): | |
| inputfile = '' | |
| outputfile = '' | |
| number = None | |
| fail = False | |
| try: | |
| opts, args = getopt.getopt(argv,"hi:o:n:",["ifile=","ofile=","number="]) | |
| except getopt.GetoptError: | |
| print 'test.py -i <inputfile> -o <outputfile> | tee -a logfile.txt)' | |
| print 'optional argument eg. -n 10 (to limit to 10 files)' | |
| sys.exit(2) | |
| for opt, arg in opts: | |
| if opt == '-h': | |
| print 'test.py -i <inputfile> -o <outputfile> (optional: -n <number of files> | tee -a logfile.txt)' | |
| print 'optional argument eg. -n 10 (to limit to 10 files)' | |
| sys.exit() | |
| elif opt in ("-i", "--ifile"): | |
| inputfile = arg | |
| elif opt in ("-o", "--ofile"): | |
| outputfile = arg | |
| elif opt in ("-n", "--number"): | |
| number = arg | |
| if not inputfile: | |
| print 'need input directory parameter (-i)' | |
| fail = True | |
| if not outputfile: | |
| print 'need output directory parameter (-o)' | |
| fail = True | |
| if not fail: | |
| bam_pipe(inputfile, outputfile, number) | |
| if __name__ == "__main__": | |
| main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment