Skip to content

Instantly share code, notes, and snippets.

@kalw
Created October 5, 2025 17:57
Show Gist options
  • Select an option

  • Save kalw/58db5b9aa801cba14bdddddf3467d077 to your computer and use it in GitHub Desktop.

Select an option

Save kalw/58db5b9aa801cba14bdddddf3467d077 to your computer and use it in GitHub Desktop.
wakewordbourre.py
# Generates 1 sample of the target word for manual verification.
target_word = 'je suis malade' # Phonetic spellings may produce better samples
tts_models_to_use = ['en_US-libritts_r-medium.pt', 'fr_FR-mls-medium.pt'] # Add more model filenames here if needed
# Dictionary to store specific JSON config URLs for models that don't follow the standard pattern
tts_config_urls = {
'fr_FR-mls-medium.pt': 'https://raw.githubusercontent.com/rhasspy/piper-sample-generator/refs/heads/master/models/fr_FR-mls-medium.pt.json'
}
import os
import sys
import platform
import shutil
from IPython.display import Audio
if os.path.exists("./piper-sample-generator"):
shutil.rmtree("./piper-sample-generator")
if not os.path.exists("./piper-sample-generator"):
!pip install piper-tts piper-phonemize-cross
if platform.system() == "Darwin":
!git clone -b mps-support https://github.com/kahrendt/piper-sample-generator
else:
!git clone https://github.com/rhasspy/piper-sample-generator
# Download models
successfully_downloaded_models = []
for tts_to_use in tts_models_to_use:
model_url = f'https://github.com/rhasspy/piper-sample-generator/releases/download/v2.0.0/{tts_to_use}'
model_path = f'piper-sample-generator/models/{tts_to_use}'
config_path = f'piper-sample-generator/models/{tts_to_use}.json'
# Get the config URL from the dictionary or use the default pattern
config_url = tts_config_urls.get(tts_to_use, f'https://raw.githubusercontent.com/rhasspy/piper-sample-generator/refs/heads/master/models/{tts_to_use}.json')
print(f"Attempting to download {tts_to_use} from {model_url}")
download_result = !wget -O {model_path} {model_url}
if any("ERROR 404: Not Found" in line for line in download_result):
print(f"Download failed for {tts_to_use}. The file was not found.")
elif any("saved" in line for line in download_result):
print(f"Successfully downloaded {tts_to_use}")
# Attempt to download the corresponding JSON config file
print(f"Attempting to download {tts_to_use}.json from {config_url}")
config_download_result = !wget -O {config_path} {config_url}
if any("ERROR 404: Not Found" in line for line in config_download_result):
print(f"Download failed for {tts_to_use}.json. The file was not found.")
print("Please manually find and download the correct JSON configuration file for this model.")
elif any("saved" in line for line in config_download_result):
print(f"Successfully downloaded {tts_to_use}.json")
successfully_downloaded_models.append(tts_to_use)
else:
print(f"An unknown error occurred during the download of {tts_to_use}.json.")
print("\n".join(config_download_result))
else:
print(f"An unknown error occurred during the download of {tts_to_use}.")
print("\n".join(download_result))
# Install system dependencies
!pip install torch torchaudio piper-phonemize-cross==1.2.1
if "piper-sample-generator/" not in sys.path:
sys.path.append("piper-sample-generator/")
from generate_samples import generate_samples
def text_to_speech(text, model_path, output_filename, length_scales=[1.1], noise_scales=[0.7], noise_scale_ws=[0.7]):
# Phonetic spellings can be crucial and may differ between languages and models.
# Experiment with different parameters for different models if needed.
generate_samples(text = text,
max_samples=1,
length_scales=length_scales,
noise_scales=noise_scales, noise_scale_ws = noise_scale_ws,
output_dir = './generated_samples', batch_size=1, auto_reduce_batch_size=True,
file_names=[output_filename],
model=model_path,
)
for tts_to_use in successfully_downloaded_models:
model_path = f'piper-sample-generator/models/{tts_to_use}'
output_filename = f"{os.path.splitext(tts_to_use)[0]}.wav" # Create a filename based on the model name
if os.path.exists(model_path) and os.path.exists(f"{model_path}.json"):
# You can adjust parameters here based on the model filename if needed
if 'mls' in tts_to_use:
text_to_speech(target_word, model_path=model_path, output_filename=output_filename, length_scales=[1.2], noise_scales=[0.6], noise_scale_ws=[0.6])
else:
text_to_speech(target_word, model_path=model_path, output_filename=output_filename)
display(Audio(f"generated_samples/{output_filename}", autoplay=True)) # Display the correct audio file
else:
print(f"Skipping text-to-speech for {tts_to_use} as the model file or config was not found.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment