Skip to content

Instantly share code, notes, and snippets.

@LewisGet
Created September 25, 2025 08:21
Show Gist options
  • Select an option

  • Save LewisGet/512e67149b990407eeb9ee2efb93fc2e to your computer and use it in GitHub Desktop.

Select an option

Save LewisGet/512e67149b990407eeb9ee2efb93fc2e to your computer and use it in GitHub Desktop.
from pyannote.audio import Pipeline
import torch
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token="...")
import os
import glob
import torchaudio
from pydub import AudioSegment
from IPython.display import Audio, display
load_path = "/workspace/video/*.mkv"
for i in glob.glob(load_path):
audio = AudioSegment.from_file(i)
split_time = 1000 * 60 * 15
split = 0
filename = os.path.basename(i)
for ii in range(0, len(audio), split_time):
start_time = ii
end_time = ii + split_time
org_clip = audio[start_time:end_time]
feed_clip = audio[start_time:end_time]
split_name = f"./tmpdir/__removable_org_{filename}_{split}.wav"
org_clip.export(split_name, format="wav")
feed_clip = feed_clip.set_frame_rate(16000)
feed_clip = feed_clip.set_channels(1)
feed_clip = feed_clip.set_sample_width(2)
feed_model_name = f"./tmpdir/__removable_16k_{filename}_{split}.wav"
feed_clip.export(feed_model_name, format="wav")
split += 1
waveform, sample_rate = torchaudio.load(feed_model_name)
diarization = pipeline({"waveform": waveform, "sample_rate": sample_rate})
display(diarization)
for turn, _, speaker in diarization.itertracks(yield_label=True):
if any(non_overlapping_diarization.get_overlap(turn)):
_start_time = int(turn.start * 1000)
_stop_time = int(turn.end * 1000)
voice_clip = org_clip[_start_time:_stop_time]
voice_clip.export(f"__removable_voice_split_{filename}_{split}_s_{_start_time}_e_{_stop_time}.wav", format="wav")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment