Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save fomightez/f036794b91d10761466341644b3c1cac to your computer and use it in GitHub Desktop.

Select an option

Save fomightez/f036794b91d10761466341644b3c1cac to your computer and use it in GitHub Desktop.
Evaluating date timestamp info in typical long and short read pipeline.
# meant to be run with `uv run https://gist.githubusercontent.com/fomightez/f036794b91d10761466341644b3c1cac/raw/15da0209f7b09c1ba0130cf66646635c80a58bae/evaluate_date_timestamps_in_pipeline_stdout.py out.txt`, or similar
# This handles evaluating date timestamp info in typical long and short read pipeline.
#####*****------------------------------------------------------------*****#####
# This is meant to use with `uv` to run.
# First install `uv` with `pip install uv` then run `!uv run {script_url} {input_text_filepath}` where defined those variables prior
#-------------------------------------------------------------#
# Times printed for now. (Make a dataframe?)
#-------------------------------------------------------------#
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "numpy",
# "pandas",
# "openpyxl",
# ]
# ///
def collect_time_info(input_text_filepath):
'''
Take the entire row of columns and return that row of columns plus extra
columns with the details gleaned from timestamps in the the corresponding
`logs/???????_<accession>.out`
'''
with open(input_text_filepath, 'r') as thelog_stdout_file:
std_out_string=thelog_stdout_file.read()
# with std_out log read in, parse it for the informaiton in the three timestamps
start_ts = std_out_string.split('Current timestamp at start: ')[1].split('\n')[0].strip()
after_data_obtained_ts = std_out_string.split('Current timestamp before other steps but after fastq obtained: ')[1].split('\n')[0].strip()
after_main_events_ts = std_out_string.split('Current timestamp after: ')[1].split('\n')[0].strip()
# determine time duration between events in minutes
# For Total Time
minutes_diff = round((datetime.strptime(after_main_events_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(start_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60)
hours = int(minutes_diff // 60)
mins = int(minutes_diff % 60)
if minutes_diff > 60:
print(f"Total time processing run: {minutes_diff}m ({hours}h {mins}m)")
else:
print(f"Total time processing run: {minutes_diff}m")
# For Download Time
minutes_diff = round((datetime.strptime(after_data_obtained_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(start_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60)
hours = int(minutes_diff // 60)
mins = int(minutes_diff % 60)
if minutes_diff > 60:
print(f"Download time: {minutes_diff}m ({hours}h {mins}m)")
else:
print(f"Download time: {minutes_diff}m")
# For Main Processing Time
minutes_diff = round((datetime.strptime(after_main_events_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(after_data_obtained_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60)
hours = int(minutes_diff // 60)
mins = int(minutes_diff % 60)
if minutes_diff > 60:
print(f"Main processing post-dowbload: {minutes_diff}m ({hours}h {mins}m)")
else:
print(f"Main processing post-dowbload: {minutes_diff}m")
if __name__ == "__main__":
import sys
from datetime import datetime
try:
input_text_filepath = sys.argv[1]
except IndexError:
import rich
rich.print("\n[bold red]I suspect you forgot to specify the file to read?[/bold red]\n **EXITING !!**[/bold red]\n"); sys.exit(1)
import pandas as pd
import pandas as pd
from openpyxl import Workbook
collect_time_info(input_text_filepath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment