Skip to content

Instantly share code, notes, and snippets.

@muness
Last active February 26, 2025 14:42
Show Gist options
  • Select an option

  • Save muness/539d9c6325ff38e07185bb9e2cfae012 to your computer and use it in GitHub Desktop.

Select an option

Save muness/539d9c6325ff38e07185bb9e2cfae012 to your computer and use it in GitHub Desktop.
Fetch enhanced lyrics for FLAC, M4A and MP3 files

Lyric Fetcher for Music Libraries

This script pulls lyrics for FLAC, MP3, and M4A files in a given music folder. It is not Beets-specific but works well with a Beets-managed library. The script supports incremental processing by marking folders as processed using a .lyrical file.

Installation

  1. You'll need Python 3 and pip installed.
  2. Then install the required dependencies:
pip install tqdm tenacity langdetect mutagen syncedlyrics
  1. Then download the lyrical.py script and cd to the download location.

Usage

Run the script and specify your music folder:

python lyrical.py /path/to/your/music

The script will:

  • Scan the given folder for .flac, .mp3, and .m4a files.
  • Fetch lyrics from online sources.
  • Save lyrics as .lrc files next to each song.
  • Embed lyrics into the file metadata when supported (FLAC, MP3, and M4A).
  • Mark processed folders with a .lyrical file to prevent redundant fetching.

Incremental Processing

  • If a folder has already been processed (.lyrical file exists), it is skipped.

  • To force re-fetching lyrics, use:

    python lyrical.py /path/to/your/music --force

🔧 Additional Options

Flag Description
--force Ignore .lyrical files and re-fetch lyrics.
--verbosity 0-3 Set verbosity: 0=QUIET, 1=WARN, 2=INFO (default), 3=DEBUG.
--stats Show summary statistics after processing.

📝 Example

python lyrical.py ~/Music --verbosity 2 --stats

This fetches lyrics for all songs in ~/Music, skipping folders already processed, and prints a summary.

💡 Notes

  • If a song already has word-by-word lyrics, the script skips it even if there is no .lyrical file in that folder.
  • Works with Beets-managed libraries but does not require Beets.
  • Tries to pull enhanced (word-by-word) lyrics when available.
#!/usr/bin/env python3
import os
import sys
import argparse
from mutagen.flac import FLAC
from mutagen.mp3 import MP3
from mutagen.mp4 import MP4
from mutagen.id3 import USLT, ID3
import re
# Setup command-line arguments
parser = argparse.ArgumentParser(description="Embed lyrics into FLAC, MP3, and M4A files from LRC files.")
parser.add_argument("folder", help="Path to the music folder")
parser.add_argument("--verbosity", type=int, choices=[0, 1, 2, 3], default=2,
help="Set verbosity level: 0=QUIET, 1=WARN, 2=INFO (default), 3=DEBUG")
args = parser.parse_args()
music_directory = args.folder
# Verbosity levels
DEBUG = 3
INFO = 2
WARN = 1
QUIET = 0
def log(message, level=INFO):
"""Unified logging function that prints messages based on verbosity."""
if args.verbosity >= level:
print(message)
def detect_lyric_type(lyrics):
"""Determine the type of lyrics based on timestamp patterns."""
if re.search(r'<\d{1,2}:\d{2}\.\d{1,2}>', lyrics):
return "enhanced"
elif re.search(r'\[\d{1,2}:\d{2}\.\d{1,2}\]', lyrics):
return "line-by-line"
else:
return "plain"
# Gather audio files
music_files = []
for root, _, files in os.walk(music_directory):
for file in files:
if file.lower().endswith(('.flac', '.mp3', '.m4a')):
music_files.append(os.path.join(root, file))
log(f"Found {len(music_files)} music files.", INFO)
# Process each audio file
for file_path in music_files:
try:
lrc_path = os.path.splitext(file_path)[0] + '.lrc'
# Skip if no LRC file exists
if not os.path.exists(lrc_path):
log(f"Skipping {file_path} (no LRC file found)", INFO)
continue
# Read lyrics from LRC file
with open(lrc_path, 'r', encoding='utf-8') as lrc_file:
lyrics = lrc_file.read().strip()
if not lyrics:
log(f"Skipping {file_path} (LRC file is empty)", WARN)
continue
# Detect lyric type
lyric_type = detect_lyric_type(lyrics)
# Process audio file based on format
if file_path.lower().endswith(".flac"):
audio = FLAC(file_path)
if "LYRICS" in audio and audio["LYRICS"][0] == lyrics:
log(f"Skipping {file_path} (lyrics already embedded)", INFO)
continue
audio["LYRICS"] = lyrics
audio.save()
elif file_path.lower().endswith(".mp3"):
audio = MP3(file_path, ID3=ID3)
if audio.tags and "USLT::eng" in audio.tags and audio.tags["USLT::eng"].text == lyrics:
log(f"Skipping {file_path} (lyrics already embedded)", INFO)
continue
audio.setall("USLT", [USLT(encoding=3, text=lyrics)])
audio.save()
elif file_path.lower().endswith(".m4a"):
audio = MP4(file_path)
if "\xa9lyr" in audio.tags and audio.tags["\xa9lyr"][0] == lyrics:
log(f"Skipping {file_path} (lyrics already embedded)", INFO)
continue
audio.tags["\xa9lyr"] = [lyrics]
audio.save()
log(f"Embedded lyrics for {file_path} ({lyric_type})", INFO)
except Exception as e:
log(f"Error embedding lyrics for {file_path}: {e}", WARN)
log("\nLyrics embedding process completed.", INFO)
#!/usr/bin/env python3
# pip install tqdm tenacity langdetect mutagen syncedlyrics
import os
import sys
import argparse
import contextlib
from syncedlyrics import search
from mutagen.flac import FLAC
from mutagen.mp3 import MP3
from mutagen.mp4 import MP4
import re
from langdetect import detect
import tenacity
from tqdm import tqdm
import time
# Verbosity levels
DEBUG = 3
INFO = 2
WARN = 1
QUIET = 0
def log(message, level=INFO):
"""Unified logging function that prints messages based on verbosity."""
if args.verbosity >= level:
print(message)
@contextlib.contextmanager
def suppress_output():
"""Suppress stdout and stderr to avoid noisy output in quiet mode."""
with open(os.devnull, 'w') as devnull:
old_stdout = sys.stdout
old_stderr = sys.stderr
sys.stdout = devnull
sys.stderr = devnull
try:
yield
finally:
sys.stdout = old_stdout
sys.stderr = old_stderr
# Setup command-line arguments
parser = argparse.ArgumentParser(description="Fetch enhanced lyrics for FLAC and MP3 files.")
parser.add_argument("folder", help="Path to the music folder")
parser.add_argument("--verbosity", type=int, choices=[0, 1, 2, 3], default=2,
help="Set verbosity level: 0=QUIET, 1=WARN, 2=INFO (default), 3=DEBUG")
parser.add_argument("--stats", action="store_true", help="Display processing statistics at the end")
args = parser.parse_args()
music_directory = args.folder
# Counters for statistics
skipped_count = 0
failed_count = 0
succeeded_count = 0
# Breakdown by lyric type: enhanced, line-by-line, plain
lyric_type_counts = {"enhanced": 0, "line-by-line": 0, "plain": 0}
def detect_lyric_type(lyrics):
"""Determine the type of lyrics based on timestamp patterns."""
if re.search(r'<\d{1,2}:\d{2}\.\d{1,2}>', lyrics):
return "enhanced"
elif re.search(r'\[\d{1,2}:\d{2}\.\d{1,2}\]', lyrics):
return "line-by-line"
else:
return "plain"
# Gather list of FLAC and MP3 files
music_files = []
for root, dirs, files in os.walk(music_directory):
for file in files:
if file.lower().endswith(('.flac', '.mp3', '.m4a')):
music_files.append(os.path.join(root, file))
log(f"Found {len(music_files)} music files.", INFO)
# Define a retryable search function using tenacity
@tenacity.retry(
wait=tenacity.wait_exponential(multiplier=1, min=1, max=10),
stop=tenacity.stop_after_attempt(5),
reraise=True
)
def attempt_search(term):
"""Attempt to retrieve lyrics with retries."""
return search(term, enhanced=True, providers=['lrclib'])
# Process each file with a progress bar
for file_path in tqdm(music_files, desc="Processing music files"):
try:
if file_path.lower().endswith(".flac"):
audio = FLAC(file_path)
title = audio.get('title', [None])[0]
artist = audio.get('artist', [None])[0]
elif file_path.lower().endswith(".mp3"):
audio = MP3(file_path)
title = audio.get("TIT2", None) # MP3 metadata: title
artist = audio.get("TPE1", None) # MP3 metadata: artist
if title:
title = title.text[0]
if artist:
artist = artist.text[0]
elif file_path.lower().endswith(".m4a"):
audio = MP4(file_path)
title = audio.tags.get("\xa9nam", [None])[0] # M4A title
artist = audio.tags.get("\xa9ART", [None])[0] # M4A artist
else:
continue # Shouldn't happen, since we filter for these
if not (title and artist):
skipped_count += 1
continue
search_term = f"{title} {artist}"
lrc_path = os.path.splitext(file_path)[0] + '.lrc'
# Check for existing enhanced lyrics
if os.path.exists(lrc_path):
try:
with open(lrc_path, 'r', encoding='utf-8') as lrc_file:
existing_lyrics = lrc_file.read()
if detect_lyric_type(existing_lyrics) == "enhanced":
log(f"\nSkipping (already enhanced): {search_term}", INFO)
skipped_count += 1
continue
except Exception as e:
log(f"\nError reading existing lyrics for {search_term}: {e}", WARN)
# Attempt to fetch lyrics
try:
if args.verbosity == QUIET:
with suppress_output():
lyrics = attempt_search(search_term)
else:
lyrics = attempt_search(search_term)
except Exception as error:
log(f"\nError processing {search_term}: {error}", WARN)
failed_count += 1
continue
if lyrics:
try:
with open(lrc_path, 'w', encoding='utf-8') as lrc_file:
lrc_file.write(lyrics)
log(f"\nLyrics saved for: {search_term}", INFO)
succeeded_count += 1
# Track lyric type
lyric_type_counts[detect_lyric_type(lyrics)] += 1
except Exception as e:
log(f"\nError writing lyrics for {search_term}: {e}", WARN)
failed_count += 1
else:
log(f"\nNo suitable lyrics found for: {search_term}", INFO)
skipped_count += 1
except Exception as e:
log(f"\nUnexpected error processing {file_path}: {e}", WARN)
failed_count += 1
# Print statistics if requested
if args.stats:
total_files = len(music_files)
print("\n--- Processing Statistics ---")
print(f"Total files processed: {total_files}")
print(f"Skipped: {skipped_count}")
print(f"Failed: {failed_count}")
print(f"Succeeded: {succeeded_count}")
print("Lyric type breakdown (for succeeded files):")
for lt, count in lyric_type_counts.items():
print(f" {lt}: {count}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment