Skip to content

Instantly share code, notes, and snippets.

@shehbajdhillon
Last active October 24, 2023 01:06
Show Gist options
  • Select an option

  • Save shehbajdhillon/5e51c109fbf48754b0914d62402eefd5 to your computer and use it in GitHub Desktop.

Select an option

Save shehbajdhillon/5e51c109fbf48754b0914d62402eefd5 to your computer and use it in GitHub Desktop.
Voice Cloning and Text To Speech
# Previous Code (Unchanged)
import elevenlabs
elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
elevenlabs.set_api_key(elevenlabs_api_key)
def clone_voice(file_path):
voice = elevenlabs.clone(
name="Cloned Voice",
files=[file_path],
)
return voice
def generate_speech(input_text, voice_object, output_file_path):
audio = elevenlabs.generate(
text=input_text,
voice=voice_object,
model="eleven_multilingual_v2"
)
with open(output_file_path, "wb") as f:
f.write(audio)
def get_file_name_and_format(file_path):
file_tokens = file_path.split(".")
return ".".join(file_tokens[:-1]), file_tokens[-1]
def get_video_segment_path(file_path, segment):
file_name, file_format = get_file_name_and_format(file_path)
segment_id = segment['id']
return f"{file_name}_segment_{segment_id}.{file_format}"
def get_audio_file_path(file_path, segment, input_language):
file_name, _ = get_file_name_and_format(file_path)
segment_id = segment['id']
return f"{file_name}_segment_{segment_id}_{input_language}.mp3"
def cut_segment(file_path, start, end, segment_path):
# The following ffmpeg command will slice the input video using the specified start and end times
ffmpeg_cmd = f"ffmpeg -i file:{file_path} -ss {start} -to {end} -af 'volume=0' file:{segment_path}"
return os.system(ffmpeg_cmd)
def delete_files(files):
# Utility function to help us delete files written to disk
delete_cmd = f"rm -rf {' '.join(files)}"
os.system(delete_cmd)
def main():
file_path, input_language = sys.argv[1:]
input_language = str(input_language).lower()
transcript = transcribe_audio(file_path=file_path)
segments = transcript.segments
# Cut out the first segment from the input video for voice cloning
initial_segment = segments[0]
initial_segment_path = get_video_segment_path(file_path, initial_segment)
cut_segment(file_path, initial_segment.start, initial_segment.end, initial_segment_path)
voice_object = clone_voice(initial_segment_path)
delete_files([initial_segment_path])
for segment in segments:
translated_text = translate_segment(segment.text, input_language)
audio_file_path = get_audio_file_path(file_path, segment, input_language)
generate_speech(translated_text, voice_object, audio_file_path)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment