Created
October 14, 2020 06:12
-
-
Save r0t0shell/6bffc270140c132646cbcd76f1ecc6d0 to your computer and use it in GitHub Desktop.
Text-to-speech with audio file (.wav) caching in Python 3.x using Google Cloud Platform and PyAudio.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # import necessary packages | |
| from google.cloud import texttospeech | |
| import pyaudio | |
| import hashlib | |
| import wave | |
| import os | |
| class SpeechSynthesizer: | |
| def __init__(self, key_json): | |
| # store Google Cloud key in a system environment variable | |
| os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_json | |
| # initialize and store GC TextToSpeech client | |
| self.client = texttospeech.TextToSpeechClient() | |
| def __generate(self, text): | |
| # returns the generated speech using the following params | |
| # from Google Cloud's TTS Engine | |
| return self.client.synthesize_speech( | |
| input=texttospeech.SynthesisInput(text=text), | |
| voice=texttospeech.VoiceSelectionParams( | |
| language_code="en-GB", | |
| name="en-GB-Wavenet-B", # british male wavenet | |
| sml_gender=texttospeech.SsmlVoiceGender.MALE | |
| ), | |
| audio_config=texttospeech.AudioConfig( | |
| audio_encoding=texttospeech.AudioEncoding.LINEAR16, # wav | |
| pitch=-1, | |
| speaking_rate=1 | |
| ) | |
| ) | |
| def speak(self, text): | |
| # generate a hash of the text and use it to cache the audio in a file | |
| audio_cachefile = "/tmp/tts/%s.wav" % (hashlib.sha256(bytes(str(text), | |
| encoding="utf-8")).hexdigest()) | |
| # initialize variables for pyaudio | |
| chunk = 1000 | |
| format = pyaudio.paInt16 | |
| channels = 1 | |
| bitrate = 24000 | |
| # initialize pyaudio and an audio stream | |
| p = pyaudio.PyAudio() | |
| stream = p.open( | |
| format=format, | |
| rate=bitrate, | |
| channels=channels, | |
| output=True, | |
| frames_per_buffer=chunk | |
| ) | |
| # to save on API requests (quota), check for the existence of | |
| # a cache file for this speech, and generate the speech if | |
| # not exist | |
| if not os.path.exists(audio_cachefile): | |
| audio_response = self.__generate(text).audio_content | |
| # cache the speech | |
| with open(audio_cachefile, 'wb') as f: | |
| f.write(audio_response) | |
| # play the speech from the cached file | |
| wf = wave.open(audio_cachefile, 'rb') | |
| response = wf.readframes(chunk) | |
| while len(response) > 0: | |
| stream.write(response) | |
| response = wf.readframes(chunk) | |
| wf.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment