Last active
April 11, 2025 02:05
-
-
Save milljm/52830a77b9017159b29dd52427836277 to your computer and use it in GitHub Desktop.
Kokoro txt --> speech
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ a quick text to speech tool """ | |
| import os | |
| import sys | |
| import argparse | |
| import sounddevice as sd | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| try: | |
| from IPython.display import display, Audio | |
| from kokoro import KPipeline | |
| import soundfile as sf | |
| except ImportError: | |
| print('There were errors importing the necessary libraries.\nBe sure to have the following ' | |
| 'libraries installed\n(all available via Conda and PIP):\n\n\tkokoro, IPython,' | |
| 'soundfile, and sounddevice\n\n') | |
| sys.exit(1) | |
| def verify_args(args): | |
| if not args.input and not args.args: | |
| print('You need to specify an input file I will read, or just throw what you want said as ' | |
| 'an argument') | |
| sys.exit(1) | |
| return args | |
| def parse_args(argv, piped_input): | |
| """ parses arguments """ | |
| parser = argparse.ArgumentParser(description='Using kokoro to generate text to speech') | |
| parser.add_argument('-i', '--input', nargs='?', | |
| help='treated as what to speak') | |
| parser.add_argument('-a', '--accent', nargs='?', default='b', | |
| help='Accent. Choose from: a (american), b (british). Default: b') | |
| parser.add_argument('--voice', nargs='?', default='af_heart', | |
| help='Voice to use. Default: af_heart. ' | |
| 'Combined voices with: af_heart,af_bella') | |
| parser.add_argument('-s', '--speed', nargs='?', type=float, default=1.0, | |
| help='speed at which to speek') | |
| parser.add_argument('--stream', action='store_const', const=True, default=False, | |
| help='Stream audio directly') | |
| parser.add_argument('args', nargs=argparse.REMAINDER) | |
| args = parser.parse_args(argv) | |
| if piped_input: | |
| args.args = [piped_input] | |
| return verify_args(args) | |
| def txt2voice(args): | |
| pipeline = KPipeline(lang_code=args.accent, repo_id='hexgrad/Kokoro-82M') | |
| if args.input and os.path.exists(args.input): | |
| with open(args.input, 'r', encoding="utf-8") as f: | |
| text = f.read() | |
| elif args.input and not os.path.exists(args.input): | |
| print('path to input file not found or readable') | |
| sys.exit(1) | |
| else: | |
| text = ' '.join(args.args) | |
| generator = pipeline(text, voice=args.voice, speed=args.speed, split_pattern=r'\n+') | |
| for i, (gs, ps, audio) in enumerate(generator): | |
| print(f'Voice:\t{args.voice}\nGraphemes/Text:\n{gs}\n\nPhonemes:\n{ps}\n') | |
| if args.stream: | |
| sd.play(audio, 24000) | |
| sd.wait() | |
| else: | |
| print(f'writing file:\t{i}.wav') | |
| sf.write(f'{i}.wav', audio, 24000) | |
| if __name__ == '__main__': | |
| piped_input = None | |
| if len(sys.argv) > 1: | |
| arguments = sys.argv[1:] | |
| if not sys.stdin.isatty(): | |
| piped_input = sys.stdin.read() | |
| args = parse_args(sys.argv[1:], piped_input) | |
| txt2voice(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Wow, well done! That is actually really good !! :-)