Created
March 31, 2016 07:46
-
-
Save necrolyte2/fbd9188edbf58ee57a60e36771c75d78 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| from path import Path | |
| import argparse | |
| import sys | |
| def parse_args(): | |
| parser = argparse.ArgumentParser( | |
| description='Given a MiSeq run path, create directories for all samples' \ | |
| ' and symlink or copy the fastq.gz files into them for each sample' | |
| ) | |
| parser.add_argument( | |
| 'runpath', | |
| type=Path, | |
| help='Path to MiSeq run' | |
| ) | |
| parser.add_argument( | |
| 'outdir', | |
| type=Path, | |
| help='Path to create directories for each read' | |
| ) | |
| parser.add_argument( | |
| '--copy', | |
| dest='symlink', | |
| default=True, | |
| action='store_false', | |
| help='Copy the fastq into output dir. Default is to symlink' | |
| ) | |
| return parser.parse_args() | |
| def fastq_to_samplename(path): | |
| ''' | |
| >>> fastq_to_samplename(Path('anything_there.whatever')) | |
| u'anything' | |
| >>> fastq_to_samplename(Path('/path/to/john_doe.txt')) | |
| u'john' | |
| ''' | |
| return path.name.split('_')[0] | |
| def mk_readdir(fastq, outdir, symlink=True): | |
| samplename = fastq_to_samplename(fastq) | |
| sample_dir = outdir / samplename | |
| # Ensure sample dir is created | |
| sample_dir.makedirs_p() | |
| out_path = sample_dir / fastq.name | |
| if out_path.exists(): | |
| sys.stderr.write('{0} exists. Not recreating\n'.format(out_path)) | |
| return | |
| if symlink: | |
| sys.stdout.write('Symlinking {0} to {1}\n'.format(fastq, out_path)) | |
| fastq.symlink(out_path) | |
| else: | |
| sys.stdout.write('Copying {0} to {1}\n'.format(fastq, out_path)) | |
| fastq.copyfile(out_path) | |
| def mk_readdirs(fastqs, outdir, symlink=True): | |
| for fastq in fastqs: | |
| mk_readdir(fastq, outdir, symlink) | |
| def main(): | |
| args = parse_args() | |
| rundir = args.runpath | |
| fastq_path = rundir / 'Data' / 'Intensities' / 'BaseCalls' | |
| all_fastq = fastq_path.glob('*.fastq.gz') | |
| mk_readdirs(all_fastq, args.outdir, args.symlink) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment