Last active
October 24, 2023 01:06
-
-
Save shehbajdhillon/5e51c109fbf48754b0914d62402eefd5 to your computer and use it in GitHub Desktop.
Voice Cloning and Text To Speech
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Previous Code (Unchanged) | |
| import elevenlabs | |
| elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY") | |
| elevenlabs.set_api_key(elevenlabs_api_key) | |
| def clone_voice(file_path): | |
| voice = elevenlabs.clone( | |
| name="Cloned Voice", | |
| files=[file_path], | |
| ) | |
| return voice | |
| def generate_speech(input_text, voice_object, output_file_path): | |
| audio = elevenlabs.generate( | |
| text=input_text, | |
| voice=voice_object, | |
| model="eleven_multilingual_v2" | |
| ) | |
| with open(output_file_path, "wb") as f: | |
| f.write(audio) | |
| def get_file_name_and_format(file_path): | |
| file_tokens = file_path.split(".") | |
| return ".".join(file_tokens[:-1]), file_tokens[-1] | |
| def get_video_segment_path(file_path, segment): | |
| file_name, file_format = get_file_name_and_format(file_path) | |
| segment_id = segment['id'] | |
| return f"{file_name}_segment_{segment_id}.{file_format}" | |
| def get_audio_file_path(file_path, segment, input_language): | |
| file_name, _ = get_file_name_and_format(file_path) | |
| segment_id = segment['id'] | |
| return f"{file_name}_segment_{segment_id}_{input_language}.mp3" | |
| def cut_segment(file_path, start, end, segment_path): | |
| # The following ffmpeg command will slice the input video using the specified start and end times | |
| ffmpeg_cmd = f"ffmpeg -i file:{file_path} -ss {start} -to {end} -af 'volume=0' file:{segment_path}" | |
| return os.system(ffmpeg_cmd) | |
| def delete_files(files): | |
| # Utility function to help us delete files written to disk | |
| delete_cmd = f"rm -rf {' '.join(files)}" | |
| os.system(delete_cmd) | |
| def main(): | |
| file_path, input_language = sys.argv[1:] | |
| input_language = str(input_language).lower() | |
| transcript = transcribe_audio(file_path=file_path) | |
| segments = transcript.segments | |
| # Cut out the first segment from the input video for voice cloning | |
| initial_segment = segments[0] | |
| initial_segment_path = get_video_segment_path(file_path, initial_segment) | |
| cut_segment(file_path, initial_segment.start, initial_segment.end, initial_segment_path) | |
| voice_object = clone_voice(initial_segment_path) | |
| delete_files([initial_segment_path]) | |
| for segment in segments: | |
| translated_text = translate_segment(segment.text, input_language) | |
| audio_file_path = get_audio_file_path(file_path, segment, input_language) | |
| generate_speech(translated_text, voice_object, audio_file_path) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment