Skip to content

Instantly share code, notes, and snippets.

@r0t0shell
Created October 14, 2020 06:12
Show Gist options
  • Select an option

  • Save r0t0shell/6bffc270140c132646cbcd76f1ecc6d0 to your computer and use it in GitHub Desktop.

Select an option

Save r0t0shell/6bffc270140c132646cbcd76f1ecc6d0 to your computer and use it in GitHub Desktop.
Text-to-speech with audio file (.wav) caching in Python 3.x using Google Cloud Platform and PyAudio.
# import necessary packages
from google.cloud import texttospeech
import pyaudio
import hashlib
import wave
import os
class SpeechSynthesizer:
def __init__(self, key_json):
# store Google Cloud key in a system environment variable
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_json
# initialize and store GC TextToSpeech client
self.client = texttospeech.TextToSpeechClient()
def __generate(self, text):
# returns the generated speech using the following params
# from Google Cloud's TTS Engine
return self.client.synthesize_speech(
input=texttospeech.SynthesisInput(text=text),
voice=texttospeech.VoiceSelectionParams(
language_code="en-GB",
name="en-GB-Wavenet-B", # british male wavenet
sml_gender=texttospeech.SsmlVoiceGender.MALE
),
audio_config=texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.LINEAR16, # wav
pitch=-1,
speaking_rate=1
)
)
def speak(self, text):
# generate a hash of the text and use it to cache the audio in a file
audio_cachefile = "/tmp/tts/%s.wav" % (hashlib.sha256(bytes(str(text),
encoding="utf-8")).hexdigest())
# initialize variables for pyaudio
chunk = 1000
format = pyaudio.paInt16
channels = 1
bitrate = 24000
# initialize pyaudio and an audio stream
p = pyaudio.PyAudio()
stream = p.open(
format=format,
rate=bitrate,
channels=channels,
output=True,
frames_per_buffer=chunk
)
# to save on API requests (quota), check for the existence of
# a cache file for this speech, and generate the speech if
# not exist
if not os.path.exists(audio_cachefile):
audio_response = self.__generate(text).audio_content
# cache the speech
with open(audio_cachefile, 'wb') as f:
f.write(audio_response)
# play the speech from the cached file
wf = wave.open(audio_cachefile, 'rb')
response = wf.readframes(chunk)
while len(response) > 0:
stream.write(response)
response = wf.readframes(chunk)
wf.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment