Skip to content

Instantly share code, notes, and snippets.

@seblemaguer
Created December 16, 2020 09:54
Show Gist options
  • Select an option

  • Save seblemaguer/addf1da1e5061a71036abb17c028ace2 to your computer and use it in GitHub Desktop.

Select an option

Save seblemaguer/addf1da1e5061a71036abb17c028ace2 to your computer and use it in GitHub Desktop.
Helper to convert an HTS duration file to an HTK state-level label file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
AUTHOR
Sébastien Le Maguer <[email protected]>
DESCRIPTION
Helper to convert an HTS duration file to an HTK state-level label file
LICENSE
This script is in the public domain, free from copyrights or restrictions.
Created: 16 December 2020
"""
# System/default
import sys
import os
import re
# Arguments
import argparse
# Messaging/logging
import traceback
import time
import logging
from logging.config import dictConfig
###############################################################################
# global constants
###############################################################################
LEVEL = [logging.WARNING, logging.INFO, logging.DEBUG]
###############################################################################
# Main function
###############################################################################
def main():
"""Main entry function
"""
global args
# Prepare pattern
p = re.compile(r"(.*)\.state\[([2-6])\]: duration=([0-9]*) .*")
# Generate labels from durations
start = 0
list_lab = []
with open(args.duration_file) as f_dur:
for l in f_dur:
m = p.match(l)
if m:
dur = int(m.group(3)) * args.frameshift * 50000
lab = "%s[%s]" % (m.group(1), m.group(2))
list_lab.append("%d %d %s\n" % (start, start + dur, lab))
start += dur
# Save labels
with open(args.htk_lab_file, "w") as f_lab:
f_lab.writelines(list_lab)
###############################################################################
# Envelopping
###############################################################################
if __name__ == "__main__":
try:
parser = argparse.ArgumentParser(description="Convert an HTS duration file to an HTK label file.")
# Add options
parser.add_argument("-f", "--frameshift", type=int, default=5,
help="The frameshift used in HTS in milliseconds")
parser.add_argument("-l", "--log_file", default=None, help="Logger file")
parser.add_argument(
"-v",
"--verbosity",
action="count",
default=0,
help="increase output verbosity",
)
# Add arguments
parser.add_argument("duration_file")
parser.add_argument("htk_lab_file")
# Parsing arguments
args = parser.parse_args()
# create logger and formatter
logger = logging.getLogger()
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
# Verbose level => logging level
log_level = args.verbosity
if args.verbosity >= len(LEVEL):
log_level = len(LEVEL) - 1
# logging.warning("verbosity level is too high, I'm gonna assume you're taking the highest (%d)" % log_level)
logging_config = dict(
version=1,
disable_existing_logger=True,
formatters={
"f": {
"format": "[%(asctime)s] [%(levelname)s] — [%(name)s — %(funcName)s:%(lineno)d] %(message)s",
"datefmt": "%d/%b/%Y: %H:%M:%S ",
}
},
handlers={
"h": {
"class": "logging.StreamHandler",
"formatter": "f",
"level": LEVEL[log_level],
}
},
root={"handlers": ["h"], "level": LEVEL[log_level]},
)
if args.log_file is not None:
logging_config["handlers"]["f"] = {
"class": "logging.FileHandler",
"formatter": "f",
"level": LEVEL[log_level],
"filename": args.log_file
}
logging_config["root"]["handlers"] = ["h", "f"]
dictConfig(logging_config)
logger = logging.getLogger(__name__)
# # create file handler
# if args.log_file is not None:
# fh = logging.FileHandler(args.log_file)
# logger.addHandler(fh)
# Debug time
start_time = time.time()
logger.info("start time = " + time.asctime())
# Running main function <=> run application
main()
# Debug time
logger.info("end time = " + time.asctime())
logger.info(
"TOTAL TIME IN MINUTES: %02.2f" % ((time.time() - start_time) / 60.0)
)
# Exit program
sys.exit(0)
except KeyboardInterrupt as e: # Ctrl-C
raise e
except SystemExit: # sys.exit()
pass
except Exception as e:
logging.error("ERROR, UNEXPECTED EXCEPTION")
logging.error(str(e))
traceback.print_exc(file=sys.stderr)
sys.exit(-1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment