surya501 · May 14, 2025 22:55
diff --git a/check_silence.py b/check_silence.py
 #!/usr/bin/env -S uv run --script
 # /// script
 # requires-python = "<=3.12"
 # dependencies = [
 #     "pydub",
 #     "numpy"
 # ]
 # ///

 import argparse
 import os
 import sys
 import numpy as np
 from pydub import AudioSegment

 def detect_silence(audio: AudioSegment, min_silence_ms=1000, db_offset=16):
    """Detect silent ranges in an audio segment using frame-based dBFS."""
    silence_threshold = audio.dBFS - db_offset
    sample_rate = audio.frame_rate
    samples = np.array(audio.get_array_of_samples())

    # Convert stereo to mono by averaging channels
    if audio.channels > 1:
        samples = samples.reshape((-1, audio.channels)).mean(axis=1)

    frame_ms = 10
    frame_len = int(sample_rate * frame_ms / 1000)
    total_frames = int(len(samples) / frame_len)

    frame_rms = np.array([
        np.sqrt(np.mean(samples[i * frame_len:(i + 1) * frame_len] ** 2))
        for i in range(total_frames)
    ])
    frame_dbfs = 20 * np.log10(np.maximum(frame_rms, 1e-10))  # avoid log(0)
    silent_flags = frame_dbfs < silence_threshold

    silent_ranges = []
    start_idx = None

    for i, is_silent in enumerate(silent_flags):
        if is_silent:
            if start_idx is None:
                start_idx = i
        else:
            if start_idx is not None:
                duration_ms = (i - start_idx) * frame_ms
                if duration_ms >= min_silence_ms:
                    silent_ranges.append((start_idx * frame_ms, i * frame_ms))
                start_idx = None

    # Check for trailing silence
    if start_idx is not None:
        duration_ms = (len(silent_flags) - start_idx) * frame_ms
        if duration_ms >= min_silence_ms:
            silent_ranges.append((start_idx * frame_ms, len(silent_flags) * frame_ms))

    return [(s / 1000.0, e / 1000.0) for s, e in silent_ranges]


 def print_results(file_path, audio, silent_ranges_sec):
    total_silence = sum(e - s for s, e in silent_ranges_sec)
    total_audio_length = len(audio) / 1000.0
    silence_percent = (total_silence / total_audio_length) * 100 if total_audio_length > 0 else 0

    print(f"\nSilence detected in: {file_path}")
    print(f"Total silence: {total_silence:.2f} seconds ({silence_percent:.2f}%); Total Audio Length: {total_audio_length:.2f} seconds")

    # Text histogram with color
    granularity = 0.25
    num_blocks = int(total_audio_length / granularity)
    timeline = ['█'] * num_blocks

    # ANSI color codes
    RED = '\033[91m'
    YELLOW = '\033[93m'
    GREEN = '\033[92m'
    RESET = '\033[0m'

    silence_colors = [None] * num_blocks

    # Color silence blocks based on duration
    for start, end in silent_ranges_sec:
        color = RED if end - start > 1.5 else YELLOW if end - start > 1.0 else GREEN
        start_idx, end_idx = int(start / granularity), min(int(end / granularity), num_blocks)
        for i in range(start_idx, end_idx):
            silence_colors[i] = color

    # Print timeline with 40 blocks per line
    for i in range(0, len(timeline), 40):
        chunk = timeline[i:i+40]
        colors = silence_colors[i:i+40]

        # Create colored string
        colored_text = ''.join(f"{colors[j]}{c}{RESET}" if colors[j] else c
                             for j, c in enumerate(chunk))

        start_time = i * granularity
        end_time = min((i + 40) * granularity, total_audio_length)
        print(f"{start_time:>5.1f}s–{end_time:>5.1f}s | {colored_text}")

 def process_file(file_path, min_silence_len, silence_thresh_offset):
    try:
        audio = AudioSegment.from_wav(file_path)
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return
    silent_ranges = detect_silence(audio, min_silence_len, silence_thresh_offset)
    print_results(file_path, audio, silent_ranges)

 def main():
    parser = argparse.ArgumentParser(description="Fast Silence Detection in WAV files.")
    parser.add_argument("wav_file", nargs="?", help="Path to a WAV file (optional)")
    parser.add_argument("--min-silence-len", type=int, default=500, help="Minimum silence length in ms")
    parser.add_argument("--silence-thresh-offset", type=float, default=16.0, help="dB offset from average for silence threshold")

    args = parser.parse_args()

    if args.wav_file and os.path.exists(args.wav_file):
        process_file(args.wav_file, args.min_silence_len, args.silence_thresh_offset)
    else:
        wav_files = [f for f in os.listdir('.') if f.lower().endswith('.wav')]
        # sort files by modification time
        wav_files.sort(key=lambda x: os.path.getmtime(x), reverse=False)
        if not wav_files:
            print("No WAV files found.")
            sys.exit(1)
        for f in wav_files:
            process_file(f, args.min_silence_len, args.silence_thresh_offset)

 if __name__ == "__main__":
    main()
	#!/usr/bin/env -S uv run --script
	# /// script
	# requires-python = "<=3.12"
	# dependencies = [
	# "pydub",
	# "numpy"
	# ]
	# ///

	import argparse
	import os
	import sys
	import numpy as np
	from pydub import AudioSegment

	def detect_silence(audio: AudioSegment, min_silence_ms=1000, db_offset=16):
	"""Detect silent ranges in an audio segment using frame-based dBFS."""
	silence_threshold = audio.dBFS - db_offset
	sample_rate = audio.frame_rate
	samples = np.array(audio.get_array_of_samples())

	# Convert stereo to mono by averaging channels
	if audio.channels > 1:
	samples = samples.reshape((-1, audio.channels)).mean(axis=1)

	frame_ms = 10
	frame_len = int(sample_rate * frame_ms / 1000)
	total_frames = int(len(samples) / frame_len)

	frame_rms = np.array([
	np.sqrt(np.mean(samples[i * frame_len:(i + 1) * frame_len] ** 2))
	for i in range(total_frames)
	])
	frame_dbfs = 20 * np.log10(np.maximum(frame_rms, 1e-10)) # avoid log(0)
	silent_flags = frame_dbfs < silence_threshold

	silent_ranges = []
	start_idx = None

	for i, is_silent in enumerate(silent_flags):
	if is_silent:
	if start_idx is None:
	start_idx = i
	else:
	if start_idx is not None:
	duration_ms = (i - start_idx) * frame_ms
	if duration_ms >= min_silence_ms:
	silent_ranges.append((start_idx * frame_ms, i * frame_ms))
	start_idx = None

	# Check for trailing silence
	if start_idx is not None:
	duration_ms = (len(silent_flags) - start_idx) * frame_ms
	if duration_ms >= min_silence_ms:
	silent_ranges.append((start_idx * frame_ms, len(silent_flags) * frame_ms))

	return [(s / 1000.0, e / 1000.0) for s, e in silent_ranges]


	def print_results(file_path, audio, silent_ranges_sec):
	total_silence = sum(e - s for s, e in silent_ranges_sec)
	total_audio_length = len(audio) / 1000.0
	silence_percent = (total_silence / total_audio_length) * 100 if total_audio_length > 0 else 0

	print(f"\nSilence detected in: {file_path}")
	print(f"Total silence: {total_silence:.2f} seconds ({silence_percent:.2f}%); Total Audio Length: {total_audio_length:.2f} seconds")

	# Text histogram with color
	granularity = 0.25
	num_blocks = int(total_audio_length / granularity)
	timeline = ['█'] * num_blocks

	# ANSI color codes
	RED = '\033[91m'
	YELLOW = '\033[93m'
	GREEN = '\033[92m'
	RESET = '\033[0m'

	silence_colors = [None] * num_blocks

	# Color silence blocks based on duration
	for start, end in silent_ranges_sec:
	color = RED if end - start > 1.5 else YELLOW if end - start > 1.0 else GREEN
	start_idx, end_idx = int(start / granularity), min(int(end / granularity), num_blocks)
	for i in range(start_idx, end_idx):
	silence_colors[i] = color

	# Print timeline with 40 blocks per line
	for i in range(0, len(timeline), 40):
	chunk = timeline[i:i+40]
	colors = silence_colors[i:i+40]

	# Create colored string
	colored_text = ''.join(f"{colors[j]}{c}{RESET}" if colors[j] else c
	for j, c in enumerate(chunk))

	start_time = i * granularity
	end_time = min((i + 40) * granularity, total_audio_length)
	print(f"{start_time:>5.1f}s–{end_time:>5.1f}s \| {colored_text}")

	def process_file(file_path, min_silence_len, silence_thresh_offset):
	try:
	audio = AudioSegment.from_wav(file_path)
	except Exception as e:
	print(f"Error loading {file_path}: {e}")
	return
	silent_ranges = detect_silence(audio, min_silence_len, silence_thresh_offset)
	print_results(file_path, audio, silent_ranges)

	def main():
	parser = argparse.ArgumentParser(description="Fast Silence Detection in WAV files.")
	parser.add_argument("wav_file", nargs="?", help="Path to a WAV file (optional)")
	parser.add_argument("--min-silence-len", type=int, default=500, help="Minimum silence length in ms")
	parser.add_argument("--silence-thresh-offset", type=float, default=16.0, help="dB offset from average for silence threshold")

	args = parser.parse_args()

	if args.wav_file and os.path.exists(args.wav_file):
	process_file(args.wav_file, args.min_silence_len, args.silence_thresh_offset)
	else:
	wav_files = [f for f in os.listdir('.') if f.lower().endswith('.wav')]
	# sort files by modification time
	wav_files.sort(key=lambda x: os.path.getmtime(x), reverse=False)
	if not wav_files:
	print("No WAV files found.")
	sys.exit(1)
	for f in wav_files:
	process_file(f, args.min_silence_len, args.silence_thresh_offset)

	if __name__ == "__main__":
	main()
No results found