Created
May 14, 2025 22:55
-
-
Save surya501/c40acff9739880311ca7961345fb0958 to your computer and use it in GitHub Desktop.
Script to visually spot check the audio gaps in wav files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run --script | |
| # /// script | |
| # requires-python = "<=3.12" | |
| # dependencies = [ | |
| # "pydub", | |
| # "numpy" | |
| # ] | |
| # /// | |
| import argparse | |
| import os | |
| import sys | |
| import numpy as np | |
| from pydub import AudioSegment | |
| def detect_silence(audio: AudioSegment, min_silence_ms=1000, db_offset=16): | |
| """Detect silent ranges in an audio segment using frame-based dBFS.""" | |
| silence_threshold = audio.dBFS - db_offset | |
| sample_rate = audio.frame_rate | |
| samples = np.array(audio.get_array_of_samples()) | |
| # Convert stereo to mono by averaging channels | |
| if audio.channels > 1: | |
| samples = samples.reshape((-1, audio.channels)).mean(axis=1) | |
| frame_ms = 10 | |
| frame_len = int(sample_rate * frame_ms / 1000) | |
| total_frames = int(len(samples) / frame_len) | |
| frame_rms = np.array([ | |
| np.sqrt(np.mean(samples[i * frame_len:(i + 1) * frame_len] ** 2)) | |
| for i in range(total_frames) | |
| ]) | |
| frame_dbfs = 20 * np.log10(np.maximum(frame_rms, 1e-10)) # avoid log(0) | |
| silent_flags = frame_dbfs < silence_threshold | |
| silent_ranges = [] | |
| start_idx = None | |
| for i, is_silent in enumerate(silent_flags): | |
| if is_silent: | |
| if start_idx is None: | |
| start_idx = i | |
| else: | |
| if start_idx is not None: | |
| duration_ms = (i - start_idx) * frame_ms | |
| if duration_ms >= min_silence_ms: | |
| silent_ranges.append((start_idx * frame_ms, i * frame_ms)) | |
| start_idx = None | |
| # Check for trailing silence | |
| if start_idx is not None: | |
| duration_ms = (len(silent_flags) - start_idx) * frame_ms | |
| if duration_ms >= min_silence_ms: | |
| silent_ranges.append((start_idx * frame_ms, len(silent_flags) * frame_ms)) | |
| return [(s / 1000.0, e / 1000.0) for s, e in silent_ranges] | |
| def print_results(file_path, audio, silent_ranges_sec): | |
| total_silence = sum(e - s for s, e in silent_ranges_sec) | |
| total_audio_length = len(audio) / 1000.0 | |
| silence_percent = (total_silence / total_audio_length) * 100 if total_audio_length > 0 else 0 | |
| print(f"\nSilence detected in: {file_path}") | |
| print(f"Total silence: {total_silence:.2f} seconds ({silence_percent:.2f}%); Total Audio Length: {total_audio_length:.2f} seconds") | |
| # Text histogram with color | |
| granularity = 0.25 | |
| num_blocks = int(total_audio_length / granularity) | |
| timeline = ['█'] * num_blocks | |
| # ANSI color codes | |
| RED = '\033[91m' | |
| YELLOW = '\033[93m' | |
| GREEN = '\033[92m' | |
| RESET = '\033[0m' | |
| silence_colors = [None] * num_blocks | |
| # Color silence blocks based on duration | |
| for start, end in silent_ranges_sec: | |
| color = RED if end - start > 1.5 else YELLOW if end - start > 1.0 else GREEN | |
| start_idx, end_idx = int(start / granularity), min(int(end / granularity), num_blocks) | |
| for i in range(start_idx, end_idx): | |
| silence_colors[i] = color | |
| # Print timeline with 40 blocks per line | |
| for i in range(0, len(timeline), 40): | |
| chunk = timeline[i:i+40] | |
| colors = silence_colors[i:i+40] | |
| # Create colored string | |
| colored_text = ''.join(f"{colors[j]}{c}{RESET}" if colors[j] else c | |
| for j, c in enumerate(chunk)) | |
| start_time = i * granularity | |
| end_time = min((i + 40) * granularity, total_audio_length) | |
| print(f"{start_time:>5.1f}s–{end_time:>5.1f}s | {colored_text}") | |
| def process_file(file_path, min_silence_len, silence_thresh_offset): | |
| try: | |
| audio = AudioSegment.from_wav(file_path) | |
| except Exception as e: | |
| print(f"Error loading {file_path}: {e}") | |
| return | |
| silent_ranges = detect_silence(audio, min_silence_len, silence_thresh_offset) | |
| print_results(file_path, audio, silent_ranges) | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Fast Silence Detection in WAV files.") | |
| parser.add_argument("wav_file", nargs="?", help="Path to a WAV file (optional)") | |
| parser.add_argument("--min-silence-len", type=int, default=500, help="Minimum silence length in ms") | |
| parser.add_argument("--silence-thresh-offset", type=float, default=16.0, help="dB offset from average for silence threshold") | |
| args = parser.parse_args() | |
| if args.wav_file and os.path.exists(args.wav_file): | |
| process_file(args.wav_file, args.min_silence_len, args.silence_thresh_offset) | |
| else: | |
| wav_files = [f for f in os.listdir('.') if f.lower().endswith('.wav')] | |
| # sort files by modification time | |
| wav_files.sort(key=lambda x: os.path.getmtime(x), reverse=False) | |
| if not wav_files: | |
| print("No WAV files found.") | |
| sys.exit(1) | |
| for f in wav_files: | |
| process_file(f, args.min_silence_len, args.silence_thresh_offset) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment