fomightez · March 2, 2026 17:31
diff --git a/evaluate_date_timestamps_in_pipeline_stdout.py b/evaluate_date_timestamps_in_pipeline_stdout.py
 # meant to be run with `uv run https://gist.githubusercontent.com/fomightez/f036794b91d10761466341644b3c1cac/raw/15da0209f7b09c1ba0130cf66646635c80a58bae/evaluate_date_timestamps_in_pipeline_stdout.py out.txt`, or similar
 # This handles evaluating date timestamp info in typical long and short read pipeline.
 #####*****------------------------------------------------------------*****#####
 # This is meant to use with `uv` to run. 
 # First install `uv` with `pip install uv` then run `!uv run {script_url} {input_text_filepath}` where defined those variables prior
 #-------------------------------------------------------------#
 # Times printed for now. (Make a dataframe?)
 #-------------------------------------------------------------#
 # /// script
 # requires-python = ">=3.12"
 # dependencies = [
 #   "numpy",
 #   "pandas",
 #   "openpyxl",
 # ]
 # ///
 def collect_time_info(input_text_filepath):
    '''
    Take the entire row of columns and return that row of columns plus extra
    columns with the details gleaned from timestamps in the the corresponding 
    `logs/???????_<accession>.out`
    '''
    with open(input_text_filepath, 'r') as thelog_stdout_file:
        std_out_string=thelog_stdout_file.read()
    # with std_out log read in, parse it for the informaiton in the three timestamps
    start_ts = std_out_string.split('Current timestamp at start: ')[1].split('\n')[0].strip()
    after_data_obtained_ts = std_out_string.split('Current timestamp before other steps but after fastq obtained: ')[1].split('\n')[0].strip()
    after_main_events_ts = std_out_string.split('Current timestamp after: ')[1].split('\n')[0].strip()
    # determine time duration between events in minutes
    # For Total Time
    minutes_diff = round((datetime.strptime(after_main_events_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(start_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60)
    hours = int(minutes_diff // 60)
    mins = int(minutes_diff % 60)
    if minutes_diff > 60:
        print(f"Total time processing run: {minutes_diff}m ({hours}h {mins}m)")
    else:
        print(f"Total time processing run: {minutes_diff}m")
    # For Download Time
    minutes_diff = round((datetime.strptime(after_data_obtained_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(start_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60)
    hours = int(minutes_diff // 60)
    mins = int(minutes_diff % 60)
    if minutes_diff > 60:
        print(f"Download time: {minutes_diff}m ({hours}h {mins}m)")
    else:
        print(f"Download time: {minutes_diff}m")
    # For Main Processing Time
    minutes_diff = round((datetime.strptime(after_main_events_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(after_data_obtained_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60)
    hours = int(minutes_diff // 60)
    mins = int(minutes_diff % 60)
    if minutes_diff > 60:
        print(f"Main processing post-dowbload: {minutes_diff}m ({hours}h {mins}m)")
    else:
        print(f"Main processing post-dowbload: {minutes_diff}m")


 if __name__ == "__main__":
    import sys
    from datetime import datetime
    try:
        input_text_filepath = sys.argv[1]
    except IndexError:
        import rich
        rich.print("\n[bold red]I suspect you forgot to specify the file to read?[/bold red]\n **EXITING !!**[/bold red]\n"); sys.exit(1)
    import pandas as pd
    import pandas as pd
    from openpyxl import Workbook
    collect_time_info(input_text_filepath)
	# meant to be run with `uv run https://gist.githubusercontent.com/fomightez/f036794b91d10761466341644b3c1cac/raw/15da0209f7b09c1ba0130cf66646635c80a58bae/evaluate_date_timestamps_in_pipeline_stdout.py out.txt`, or similar
	# This handles evaluating date timestamp info in typical long and short read pipeline.
	#####***------------------------------------------------------------***#####
	# This is meant to use with `uv` to run.
	# First install `uv` with `pip install uv` then run `!uv run {script_url} {input_text_filepath}` where defined those variables prior
	#-------------------------------------------------------------#
	# Times printed for now. (Make a dataframe?)
	#-------------------------------------------------------------#
	# /// script
	# requires-python = ">=3.12"
	# dependencies = [
	# "numpy",
	# "pandas",
	# "openpyxl",
	# ]
	# ///
	def collect_time_info(input_text_filepath):
	'''
	Take the entire row of columns and return that row of columns plus extra
	columns with the details gleaned from timestamps in the the corresponding
	`logs/???????_<accession>.out`
	'''
	with open(input_text_filepath, 'r') as thelog_stdout_file:
	std_out_string=thelog_stdout_file.read()
	# with std_out log read in, parse it for the informaiton in the three timestamps
	start_ts = std_out_string.split('Current timestamp at start: ')[1].split('\n')[0].strip()
	after_data_obtained_ts = std_out_string.split('Current timestamp before other steps but after fastq obtained: ')[1].split('\n')[0].strip()
	after_main_events_ts = std_out_string.split('Current timestamp after: ')[1].split('\n')[0].strip()
	# determine time duration between events in minutes
	# For Total Time
	minutes_diff = round((datetime.strptime(after_main_events_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(start_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60)
	hours = int(minutes_diff // 60)
	mins = int(minutes_diff % 60)
	if minutes_diff > 60:
	print(f"Total time processing run: {minutes_diff}m ({hours}h {mins}m)")
	else:
	print(f"Total time processing run: {minutes_diff}m")
	# For Download Time
	minutes_diff = round((datetime.strptime(after_data_obtained_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(start_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60)
	hours = int(minutes_diff // 60)
	mins = int(minutes_diff % 60)
	if minutes_diff > 60:
	print(f"Download time: {minutes_diff}m ({hours}h {mins}m)")
	else:
	print(f"Download time: {minutes_diff}m")
	# For Main Processing Time
	minutes_diff = round((datetime.strptime(after_main_events_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(after_data_obtained_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60)
	hours = int(minutes_diff // 60)
	mins = int(minutes_diff % 60)
	if minutes_diff > 60:
	print(f"Main processing post-dowbload: {minutes_diff}m ({hours}h {mins}m)")
	else:
	print(f"Main processing post-dowbload: {minutes_diff}m")


	if __name__ == "__main__":
	import sys
	from datetime import datetime
	try:
	input_text_filepath = sys.argv[1]
	except IndexError:
	import rich
	rich.print("\n[bold red]I suspect you forgot to specify the file to read?[/bold red]\n EXITING !![/bold red]\n"); sys.exit(1)
	import pandas as pd
	import pandas as pd
	from openpyxl import Workbook
	collect_time_info(input_text_filepath)
No results found