-
-
Save milljm/52830a77b9017159b29dd52427836277 to your computer and use it in GitHub Desktop.
| #!/usr/bin/env python3 | |
| """ a quick text to speech tool """ | |
| import os | |
| import sys | |
| import argparse | |
| import sounddevice as sd | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| try: | |
| from IPython.display import display, Audio | |
| from kokoro import KPipeline | |
| import soundfile as sf | |
| except ImportError: | |
| print('There were errors importing the necessary libraries.\nBe sure to have the following ' | |
| 'libraries installed\n(all available via Conda and PIP):\n\n\tkokoro, IPython,' | |
| 'soundfile, and sounddevice\n\n') | |
| sys.exit(1) | |
| def verify_args(args): | |
| if not args.input and not args.args: | |
| print('You need to specify an input file I will read, or just throw what you want said as ' | |
| 'an argument') | |
| sys.exit(1) | |
| return args | |
| def parse_args(argv, piped_input): | |
| """ parses arguments """ | |
| parser = argparse.ArgumentParser(description='Using kokoro to generate text to speech') | |
| parser.add_argument('-i', '--input', nargs='?', | |
| help='treated as what to speak') | |
| parser.add_argument('-a', '--accent', nargs='?', default='b', | |
| help='Accent. Choose from: a (american), b (british). Default: b') | |
| parser.add_argument('--voice', nargs='?', default='af_heart', | |
| help='Voice to use. Default: af_heart. ' | |
| 'Combined voices with: af_heart,af_bella') | |
| parser.add_argument('-s', '--speed', nargs='?', type=float, default=1.0, | |
| help='speed at which to speek') | |
| parser.add_argument('--stream', action='store_const', const=True, default=False, | |
| help='Stream audio directly') | |
| parser.add_argument('args', nargs=argparse.REMAINDER) | |
| args = parser.parse_args(argv) | |
| if piped_input: | |
| args.args = [piped_input] | |
| return verify_args(args) | |
| def txt2voice(args): | |
| pipeline = KPipeline(lang_code=args.accent, repo_id='hexgrad/Kokoro-82M') | |
| if args.input and os.path.exists(args.input): | |
| with open(args.input, 'r', encoding="utf-8") as f: | |
| text = f.read() | |
| elif args.input and not os.path.exists(args.input): | |
| print('path to input file not found or readable') | |
| sys.exit(1) | |
| else: | |
| text = ' '.join(args.args) | |
| generator = pipeline(text, voice=args.voice, speed=args.speed, split_pattern=r'\n+') | |
| for i, (gs, ps, audio) in enumerate(generator): | |
| print(f'Voice:\t{args.voice}\nGraphemes/Text:\n{gs}\n\nPhonemes:\n{ps}\n') | |
| if args.stream: | |
| sd.play(audio, 24000) | |
| sd.wait() | |
| else: | |
| print(f'writing file:\t{i}.wav') | |
| sf.write(f'{i}.wav', audio, 24000) | |
| if __name__ == '__main__': | |
| piped_input = None | |
| if len(sys.argv) > 1: | |
| arguments = sys.argv[1:] | |
| if not sys.stdin.isatty(): | |
| piped_input = sys.stdin.read() | |
| args = parse_args(sys.argv[1:], piped_input) | |
| txt2voice(args) |
Yeah, me too. I think I am going to try and reproduce Amelia Tyler's "Authority" line in BG3 😄
You should also try mixing some voices. I found --voice af_nicole,af_heart --accent b --speed .8 pretty darn good. Oh, and be sure to try af_nicole by itself! It is a very distinctive voice.
Yeah, me too. I think I am going to try and reproduce Amelia Tyler's "Authority" line in BG3 😄
That's going to be super hard! She's got such perfect intonation :-)
hahaha just posting to possibly get ahead of this, and because I don't want to strike a nerve with anyone:
I only mention 'Amelia Tyler' because she is perfect. And I only want to learn the phonetics/intonation process. Might as well pick from the best!
OK, after 34 minutes i admit defeat. My computer will never turn into Amelia Tyler, boohoo.
hahah same. The closest I got:
ɔˈθːːˈɒɹɪtˌiː
Wow, well done! That is actually really good !! :-)
Hmmm, i can see i could spend a long time having fun with the intonation!