Last active
March 2, 2026 17:31
-
-
Save fomightez/f036794b91d10761466341644b3c1cac to your computer and use it in GitHub Desktop.
Evaluating date timestamp info in typical long and short read pipeline.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # meant to be run with `uv run https://gist.githubusercontent.com/fomightez/f036794b91d10761466341644b3c1cac/raw/15da0209f7b09c1ba0130cf66646635c80a58bae/evaluate_date_timestamps_in_pipeline_stdout.py out.txt`, or similar | |
| # This handles evaluating date timestamp info in typical long and short read pipeline. | |
| #####*****------------------------------------------------------------*****##### | |
| # This is meant to use with `uv` to run. | |
| # First install `uv` with `pip install uv` then run `!uv run {script_url} {input_text_filepath}` where defined those variables prior | |
| #-------------------------------------------------------------# | |
| # Times printed for now. (Make a dataframe?) | |
| #-------------------------------------------------------------# | |
| # /// script | |
| # requires-python = ">=3.12" | |
| # dependencies = [ | |
| # "numpy", | |
| # "pandas", | |
| # "openpyxl", | |
| # ] | |
| # /// | |
| def collect_time_info(input_text_filepath): | |
| ''' | |
| Take the entire row of columns and return that row of columns plus extra | |
| columns with the details gleaned from timestamps in the the corresponding | |
| `logs/???????_<accession>.out` | |
| ''' | |
| with open(input_text_filepath, 'r') as thelog_stdout_file: | |
| std_out_string=thelog_stdout_file.read() | |
| # with std_out log read in, parse it for the informaiton in the three timestamps | |
| start_ts = std_out_string.split('Current timestamp at start: ')[1].split('\n')[0].strip() | |
| after_data_obtained_ts = std_out_string.split('Current timestamp before other steps but after fastq obtained: ')[1].split('\n')[0].strip() | |
| after_main_events_ts = std_out_string.split('Current timestamp after: ')[1].split('\n')[0].strip() | |
| # determine time duration between events in minutes | |
| # For Total Time | |
| minutes_diff = round((datetime.strptime(after_main_events_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(start_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60) | |
| hours = int(minutes_diff // 60) | |
| mins = int(minutes_diff % 60) | |
| if minutes_diff > 60: | |
| print(f"Total time processing run: {minutes_diff}m ({hours}h {mins}m)") | |
| else: | |
| print(f"Total time processing run: {minutes_diff}m") | |
| # For Download Time | |
| minutes_diff = round((datetime.strptime(after_data_obtained_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(start_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60) | |
| hours = int(minutes_diff // 60) | |
| mins = int(minutes_diff % 60) | |
| if minutes_diff > 60: | |
| print(f"Download time: {minutes_diff}m ({hours}h {mins}m)") | |
| else: | |
| print(f"Download time: {minutes_diff}m") | |
| # For Main Processing Time | |
| minutes_diff = round((datetime.strptime(after_main_events_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(after_data_obtained_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60) | |
| hours = int(minutes_diff // 60) | |
| mins = int(minutes_diff % 60) | |
| if minutes_diff > 60: | |
| print(f"Main processing post-dowbload: {minutes_diff}m ({hours}h {mins}m)") | |
| else: | |
| print(f"Main processing post-dowbload: {minutes_diff}m") | |
| if __name__ == "__main__": | |
| import sys | |
| from datetime import datetime | |
| try: | |
| input_text_filepath = sys.argv[1] | |
| except IndexError: | |
| import rich | |
| rich.print("\n[bold red]I suspect you forgot to specify the file to read?[/bold red]\n **EXITING !!**[/bold red]\n"); sys.exit(1) | |
| import pandas as pd | |
| import pandas as pd | |
| from openpyxl import Workbook | |
| collect_time_info(input_text_filepath) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment