Last active
October 22, 2020 13:37
-
-
Save seblemaguer/09846f7e2bc6853d7ab3dc6f3b781111 to your computer and use it in GitHub Desktop.
Generate F0 sinusoidal signal from original wavefile using pyworld
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| AUTHOR | |
| Sébastien Le Maguer <[email protected]> | |
| DESCRIPTION | |
| LICENSE | |
| This script is in the public domain, free from copyrights or restrictions. | |
| Created: 24 October 2019 | |
| """ | |
| # System/default | |
| import sys | |
| import os | |
| # Arguments | |
| import argparse | |
| # Messaging/logging | |
| import traceback | |
| import time | |
| import logging | |
| # Linear algebra | |
| import numpy as np | |
| # Audio, dsp | |
| import librosa | |
| import soundfile as sf | |
| import pyworld as pw | |
| import scipy | |
| from scipy.signal import resample | |
| # Plotting | |
| from pyqtgraph.dockarea import * | |
| from pyqtgraph.Qt import QtCore, QtGui | |
| import pyqtgraph as pg | |
| ############################################################################### | |
| # global constants | |
| ############################################################################### | |
| LEVEL = [logging.WARNING, logging.INFO, logging.DEBUG] | |
| # Interpret image data as row-major instead of col-major | |
| pg.setConfigOptions(imageAxisOrder='row-major') | |
| ############################################################################### | |
| # Functions | |
| ############################################################################### | |
| def extract_f0_and_energy(f_wav, f0_sr=200): | |
| # Load wavefile | |
| x, fs = librosa.load(f_wav, dtype=np.float64) | |
| # Extract f0 | |
| _f0, t = pw.dio(x, fs, frame_period=1/f0_sr*1000) # raw pitch extractor | |
| f0 = pw.stonemask(x, _f0, t, fs) # pitch refinement | |
| # Interpolate f0 | |
| unvoiced, x_f= (f0 == 0.0), lambda z: z.nonzero()[0] | |
| f0[unvoiced]= np.interp(x_f(unvoiced), x_f(~unvoiced), f0[~unvoiced]) | |
| # Extract energy | |
| fs_pt = fs // f0_sr | |
| e = np.zeros(len(f0)) | |
| for t in range(len(f0)): | |
| e[t] = np.sum(np.power(x[t*fs_pt:(t+1)*fs_pt], 2)) | |
| # pg.plot(f0) | |
| # if sys.flags.interactive != 1 or not hasattr(pg.QtCore, 'PYQT_VERSION'): | |
| # pg.QtGui.QApplication.exec_() | |
| return (f0, e, unvoiced) | |
| def generate_shirt_signal(f0, e, f0_sr=200, output_sr=16000): | |
| # Generate sin wave | |
| f0_int = np.cumsum(f0) / f0_sr | |
| sin_wave = e * np.sin(2*np.pi*f0_int) | |
| # # Debug | |
| # pg.plot(sin_wave) | |
| # if sys.flags.interactive != 1 or not hasattr(pg.QtCore, 'PYQT_VERSION'): | |
| # pg.QtGui.QApplication.exec_() | |
| # sys.exit(-1) | |
| # Resample | |
| nb_samples = len(f0) / f0_sr * output_sr | |
| sin_wave = resample(sin_wave, int(nb_samples)) | |
| return sin_wave | |
| ############################################################################### | |
| # Main function | |
| ############################################################################### | |
| def main(): | |
| """Main entry function | |
| """ | |
| global args | |
| # Extract F0 | |
| f0_sr = 1000 | |
| [f0, e, mask] = extract_f0_and_energy(args.wav_in, f0_sr) | |
| # Generate shirp signal | |
| signal_sr = 16000 | |
| x = generate_shirt_signal(f0, e, f0_sr, signal_sr) | |
| # Save new wavefile | |
| sf.write(args.wav_out, x, signal_sr) | |
| ############################################################################### | |
| # Envelopping | |
| ############################################################################### | |
| if __name__ == '__main__': | |
| try: | |
| parser = argparse.ArgumentParser(description="") | |
| # Add options | |
| parser.add_argument("-l", "--log_file", default=None, | |
| help="Logger file") | |
| parser.add_argument("-v", "--verbosity", action="count", default=0, | |
| help="increase output verbosity") | |
| # Add arguments | |
| parser.add_argument("wav_in") | |
| parser.add_argument("wav_out") | |
| # Parsing arguments | |
| args = parser.parse_args() | |
| # create logger and formatter | |
| logger = logging.getLogger() | |
| formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
| # Verbose level => logging level | |
| log_level = args.verbosity | |
| if (args.verbosity >= len(LEVEL)): | |
| log_level = len(LEVEL) - 1 | |
| logger.setLevel(log_level) | |
| logging.warning("verbosity level is too high, I'm gonna assume you're taking the highest (%d)" % log_level) | |
| else: | |
| logger.setLevel(LEVEL[log_level]) | |
| # create console handler | |
| ch = logging.StreamHandler() | |
| ch.setFormatter(formatter) | |
| logger.addHandler(ch) | |
| # create file handler | |
| if args.log_file is not None: | |
| fh = logging.FileHandler(args.log_file) | |
| logger.addHandler(fh) | |
| # Debug time | |
| start_time = time.time() | |
| logger.info("start time = " + time.asctime()) | |
| # Running main function <=> run application | |
| main() | |
| # Debug time | |
| logging.info("end time = " + time.asctime()) | |
| logging.info('TOTAL TIME IN MINUTES: %02.2f' % | |
| ((time.time() - start_time) / 60.0)) | |
| # Exit program | |
| sys.exit(0) | |
| except KeyboardInterrupt as e: # Ctrl-C | |
| raise e | |
| except SystemExit: # sys.exit() | |
| pass | |
| except Exception as e: | |
| logging.error('ERROR, UNEXPECTED EXCEPTION') | |
| logging.error(str(e)) | |
| traceback.print_exc(file=sys.stderr) | |
| sys.exit(-1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment