Skip to content

Instantly share code, notes, and snippets.

@surya501
Created May 14, 2025 22:55
Show Gist options
  • Select an option

  • Save surya501/c40acff9739880311ca7961345fb0958 to your computer and use it in GitHub Desktop.

Select an option

Save surya501/c40acff9739880311ca7961345fb0958 to your computer and use it in GitHub Desktop.
Script to visually spot check the audio gaps in wav files
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = "<=3.12"
# dependencies = [
# "pydub",
# "numpy"
# ]
# ///
import argparse
import os
import sys
import numpy as np
from pydub import AudioSegment
def detect_silence(audio: AudioSegment, min_silence_ms=1000, db_offset=16):
"""Detect silent ranges in an audio segment using frame-based dBFS."""
silence_threshold = audio.dBFS - db_offset
sample_rate = audio.frame_rate
samples = np.array(audio.get_array_of_samples())
# Convert stereo to mono by averaging channels
if audio.channels > 1:
samples = samples.reshape((-1, audio.channels)).mean(axis=1)
frame_ms = 10
frame_len = int(sample_rate * frame_ms / 1000)
total_frames = int(len(samples) / frame_len)
frame_rms = np.array([
np.sqrt(np.mean(samples[i * frame_len:(i + 1) * frame_len] ** 2))
for i in range(total_frames)
])
frame_dbfs = 20 * np.log10(np.maximum(frame_rms, 1e-10)) # avoid log(0)
silent_flags = frame_dbfs < silence_threshold
silent_ranges = []
start_idx = None
for i, is_silent in enumerate(silent_flags):
if is_silent:
if start_idx is None:
start_idx = i
else:
if start_idx is not None:
duration_ms = (i - start_idx) * frame_ms
if duration_ms >= min_silence_ms:
silent_ranges.append((start_idx * frame_ms, i * frame_ms))
start_idx = None
# Check for trailing silence
if start_idx is not None:
duration_ms = (len(silent_flags) - start_idx) * frame_ms
if duration_ms >= min_silence_ms:
silent_ranges.append((start_idx * frame_ms, len(silent_flags) * frame_ms))
return [(s / 1000.0, e / 1000.0) for s, e in silent_ranges]
def print_results(file_path, audio, silent_ranges_sec):
total_silence = sum(e - s for s, e in silent_ranges_sec)
total_audio_length = len(audio) / 1000.0
silence_percent = (total_silence / total_audio_length) * 100 if total_audio_length > 0 else 0
print(f"\nSilence detected in: {file_path}")
print(f"Total silence: {total_silence:.2f} seconds ({silence_percent:.2f}%); Total Audio Length: {total_audio_length:.2f} seconds")
# Text histogram with color
granularity = 0.25
num_blocks = int(total_audio_length / granularity)
timeline = ['█'] * num_blocks
# ANSI color codes
RED = '\033[91m'
YELLOW = '\033[93m'
GREEN = '\033[92m'
RESET = '\033[0m'
silence_colors = [None] * num_blocks
# Color silence blocks based on duration
for start, end in silent_ranges_sec:
color = RED if end - start > 1.5 else YELLOW if end - start > 1.0 else GREEN
start_idx, end_idx = int(start / granularity), min(int(end / granularity), num_blocks)
for i in range(start_idx, end_idx):
silence_colors[i] = color
# Print timeline with 40 blocks per line
for i in range(0, len(timeline), 40):
chunk = timeline[i:i+40]
colors = silence_colors[i:i+40]
# Create colored string
colored_text = ''.join(f"{colors[j]}{c}{RESET}" if colors[j] else c
for j, c in enumerate(chunk))
start_time = i * granularity
end_time = min((i + 40) * granularity, total_audio_length)
print(f"{start_time:>5.1f}s–{end_time:>5.1f}s | {colored_text}")
def process_file(file_path, min_silence_len, silence_thresh_offset):
try:
audio = AudioSegment.from_wav(file_path)
except Exception as e:
print(f"Error loading {file_path}: {e}")
return
silent_ranges = detect_silence(audio, min_silence_len, silence_thresh_offset)
print_results(file_path, audio, silent_ranges)
def main():
parser = argparse.ArgumentParser(description="Fast Silence Detection in WAV files.")
parser.add_argument("wav_file", nargs="?", help="Path to a WAV file (optional)")
parser.add_argument("--min-silence-len", type=int, default=500, help="Minimum silence length in ms")
parser.add_argument("--silence-thresh-offset", type=float, default=16.0, help="dB offset from average for silence threshold")
args = parser.parse_args()
if args.wav_file and os.path.exists(args.wav_file):
process_file(args.wav_file, args.min_silence_len, args.silence_thresh_offset)
else:
wav_files = [f for f in os.listdir('.') if f.lower().endswith('.wav')]
# sort files by modification time
wav_files.sort(key=lambda x: os.path.getmtime(x), reverse=False)
if not wav_files:
print("No WAV files found.")
sys.exit(1)
for f in wav_files:
process_file(f, args.min_silence_len, args.silence_thresh_offset)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment