|
#!/usr/bin/env python3 |
|
# pip install tqdm tenacity langdetect mutagen syncedlyrics |
|
|
|
import os |
|
import sys |
|
import argparse |
|
import contextlib |
|
from syncedlyrics import search |
|
from mutagen.flac import FLAC |
|
from mutagen.mp3 import MP3 |
|
from mutagen.mp4 import MP4 |
|
|
|
import re |
|
from langdetect import detect |
|
import tenacity |
|
from tqdm import tqdm |
|
import time |
|
|
|
# Verbosity levels |
|
DEBUG = 3 |
|
INFO = 2 |
|
WARN = 1 |
|
QUIET = 0 |
|
|
|
def log(message, level=INFO): |
|
"""Unified logging function that prints messages based on verbosity.""" |
|
if args.verbosity >= level: |
|
print(message) |
|
|
|
@contextlib.contextmanager |
|
def suppress_output(): |
|
"""Suppress stdout and stderr to avoid noisy output in quiet mode.""" |
|
with open(os.devnull, 'w') as devnull: |
|
old_stdout = sys.stdout |
|
old_stderr = sys.stderr |
|
sys.stdout = devnull |
|
sys.stderr = devnull |
|
try: |
|
yield |
|
finally: |
|
sys.stdout = old_stdout |
|
sys.stderr = old_stderr |
|
|
|
# Setup command-line arguments |
|
parser = argparse.ArgumentParser(description="Fetch enhanced lyrics for FLAC and MP3 files.") |
|
parser.add_argument("folder", help="Path to the music folder") |
|
parser.add_argument("--verbosity", type=int, choices=[0, 1, 2, 3], default=2, |
|
help="Set verbosity level: 0=QUIET, 1=WARN, 2=INFO (default), 3=DEBUG") |
|
parser.add_argument("--stats", action="store_true", help="Display processing statistics at the end") |
|
args = parser.parse_args() |
|
|
|
music_directory = args.folder |
|
|
|
# Counters for statistics |
|
skipped_count = 0 |
|
failed_count = 0 |
|
succeeded_count = 0 |
|
# Breakdown by lyric type: enhanced, line-by-line, plain |
|
lyric_type_counts = {"enhanced": 0, "line-by-line": 0, "plain": 0} |
|
|
|
def detect_lyric_type(lyrics): |
|
"""Determine the type of lyrics based on timestamp patterns.""" |
|
if re.search(r'<\d{1,2}:\d{2}\.\d{1,2}>', lyrics): |
|
return "enhanced" |
|
elif re.search(r'\[\d{1,2}:\d{2}\.\d{1,2}\]', lyrics): |
|
return "line-by-line" |
|
else: |
|
return "plain" |
|
|
|
# Gather list of FLAC and MP3 files |
|
music_files = [] |
|
for root, dirs, files in os.walk(music_directory): |
|
for file in files: |
|
if file.lower().endswith(('.flac', '.mp3', '.m4a')): |
|
music_files.append(os.path.join(root, file)) |
|
|
|
log(f"Found {len(music_files)} music files.", INFO) |
|
|
|
# Define a retryable search function using tenacity |
|
@tenacity.retry( |
|
wait=tenacity.wait_exponential(multiplier=1, min=1, max=10), |
|
stop=tenacity.stop_after_attempt(5), |
|
reraise=True |
|
) |
|
def attempt_search(term): |
|
"""Attempt to retrieve lyrics with retries.""" |
|
return search(term, enhanced=True, providers=['lrclib']) |
|
|
|
# Process each file with a progress bar |
|
for file_path in tqdm(music_files, desc="Processing music files"): |
|
try: |
|
if file_path.lower().endswith(".flac"): |
|
audio = FLAC(file_path) |
|
title = audio.get('title', [None])[0] |
|
artist = audio.get('artist', [None])[0] |
|
elif file_path.lower().endswith(".mp3"): |
|
audio = MP3(file_path) |
|
title = audio.get("TIT2", None) # MP3 metadata: title |
|
artist = audio.get("TPE1", None) # MP3 metadata: artist |
|
if title: |
|
title = title.text[0] |
|
if artist: |
|
artist = artist.text[0] |
|
elif file_path.lower().endswith(".m4a"): |
|
audio = MP4(file_path) |
|
title = audio.tags.get("\xa9nam", [None])[0] # M4A title |
|
artist = audio.tags.get("\xa9ART", [None])[0] # M4A artist |
|
else: |
|
continue # Shouldn't happen, since we filter for these |
|
|
|
if not (title and artist): |
|
skipped_count += 1 |
|
continue |
|
|
|
search_term = f"{title} {artist}" |
|
lrc_path = os.path.splitext(file_path)[0] + '.lrc' |
|
|
|
# Check for existing enhanced lyrics |
|
if os.path.exists(lrc_path): |
|
try: |
|
with open(lrc_path, 'r', encoding='utf-8') as lrc_file: |
|
existing_lyrics = lrc_file.read() |
|
if detect_lyric_type(existing_lyrics) == "enhanced": |
|
log(f"\nSkipping (already enhanced): {search_term}", INFO) |
|
skipped_count += 1 |
|
continue |
|
except Exception as e: |
|
log(f"\nError reading existing lyrics for {search_term}: {e}", WARN) |
|
|
|
# Attempt to fetch lyrics |
|
try: |
|
if args.verbosity == QUIET: |
|
with suppress_output(): |
|
lyrics = attempt_search(search_term) |
|
else: |
|
lyrics = attempt_search(search_term) |
|
except Exception as error: |
|
log(f"\nError processing {search_term}: {error}", WARN) |
|
failed_count += 1 |
|
continue |
|
|
|
if lyrics: |
|
try: |
|
with open(lrc_path, 'w', encoding='utf-8') as lrc_file: |
|
lrc_file.write(lyrics) |
|
log(f"\nLyrics saved for: {search_term}", INFO) |
|
succeeded_count += 1 |
|
# Track lyric type |
|
lyric_type_counts[detect_lyric_type(lyrics)] += 1 |
|
except Exception as e: |
|
log(f"\nError writing lyrics for {search_term}: {e}", WARN) |
|
failed_count += 1 |
|
else: |
|
log(f"\nNo suitable lyrics found for: {search_term}", INFO) |
|
skipped_count += 1 |
|
|
|
except Exception as e: |
|
log(f"\nUnexpected error processing {file_path}: {e}", WARN) |
|
failed_count += 1 |
|
|
|
# Print statistics if requested |
|
if args.stats: |
|
total_files = len(music_files) |
|
print("\n--- Processing Statistics ---") |
|
print(f"Total files processed: {total_files}") |
|
print(f"Skipped: {skipped_count}") |
|
print(f"Failed: {failed_count}") |
|
print(f"Succeeded: {succeeded_count}") |
|
print("Lyric type breakdown (for succeeded files):") |
|
for lt, count in lyric_type_counts.items(): |
|
print(f" {lt}: {count}") |