Skip to content

Instantly share code, notes, and snippets.

@SyedMSawaid
Last active November 15, 2025 22:14
Show Gist options
  • Select an option

  • Save SyedMSawaid/d81401e4f209b2e1eef2262713e1039a to your computer and use it in GitHub Desktop.

Select an option

Save SyedMSawaid/d81401e4f209b2e1eef2262713e1039a to your computer and use it in GitHub Desktop.
Extract metadata from Assimil audio files for Anki cards.
import os
import csv
import re
import shutil
from mutagen.easyid3 import EasyID3
from mutagen.mp3 import MP3
from mutagen.flac import FLAC
from mutagen.oggopus import OggOpus
from mutagen.oggvorbis import OggVorbis
from mutagen.mp4 import MP4
def rename_audio_files(folder_path, lesson_number):
"""Rename all audio files in the folder to include the lesson number prefix.
Skip files that already have a two-digit prefix (e.g., '02_')."""
supported_extensions = {'.mp3', '.flac', '.ogg', '.opus', '.m4a'}
for filename in os.listdir(folder_path):
ext = os.path.splitext(filename)[1].lower()
if ext in supported_extensions:
if re.match(r'^\d{2}_', filename):
print(f"Skipped (already prefixed): {filename}")
continue
new_filename = f"{lesson_number:02d}_{filename}"
old_path = os.path.join(folder_path, filename)
new_path = os.path.join(folder_path, new_filename)
os.rename(old_path, new_path)
print(f"Renamed: {filename} -> {new_filename}")
def extract_and_clean_titles(folder_path, csv_path):
"""Extract and clean titles from audio files, then write to a CSV file."""
supported_extensions = {
'.mp3': MP3,
'.flac': FLAC,
'.ogg': OggVorbis,
'.opus': OggOpus,
'.m4a': MP4,
}
files = sorted(os.listdir(folder_path), key=lambda x: (not x.startswith('S'), x))
with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['Front', 'Back', 'Audio'])
for filename in files:
file_path = os.path.join(folder_path, filename)
ext = os.path.splitext(filename)[1].lower()
if ext in supported_extensions:
try:
if ext == '.mp3':
audio = EasyID3(file_path)
else:
audio = supported_extensions[ext](file_path)
title = audio.get('title', ['No title found'])[0]
cleaned_title = re.sub(r'^[A-Z]\d{2}-', '', title).strip('"').strip()
rel_path = os.path.relpath(file_path, folder_path)
writer.writerow([cleaned_title, '', f'[sound:{rel_path}]'])
print(f"Processed: {filename} -> {cleaned_title}")
except Exception as e:
print(f"Error reading {filename}: {e}")
def move_audio_files_to_global(folder_path, global_audio_folder):
"""Move all renamed audio files to a global audio folder."""
if not os.path.exists(global_audio_folder):
os.makedirs(global_audio_folder)
for filename in os.listdir(folder_path):
if re.match(r'^\d{2}_', filename):
old_path = os.path.join(folder_path, filename)
new_path = os.path.join(global_audio_folder, filename)
shutil.move(old_path, new_path)
print(f"Moved: {filename} -> {global_audio_folder}")
def clean_filenames(global_audio_folder):
"""Clean up filenames in the global_audio folder by removing SXX-, TXX-, etc. prefixes."""
for filename in os.listdir(global_audio_folder):
if re.match(r'^\d{2}__[ST]\d{2}-', filename):
new_filename = re.sub(r'^\d{2}__(?:[ST]\d{2}-)', '', filename)
old_path = os.path.join(global_audio_folder, filename)
new_path = os.path.join(global_audio_folder, new_filename)
os.rename(old_path, new_path)
print(f"Renamed: {filename} -> {new_filename}")
def remove_unwanted_audio(global_audio_folder):
"""Remove audio files with names like '15_._T05.mp3'."""
for filename in os.listdir(global_audio_folder):
if re.match(r'^\d+_\.?_.*\.mp3$', filename):
file_path = os.path.join(global_audio_folder, filename)
os.remove(file_path)
print(f"Removed: {filename}")
def process_all_folders(base_folder, global_audio_folder):
"""Process all subfolders in the base folder."""
for folder_name in os.listdir(base_folder):
folder_path = os.path.join(base_folder, folder_name)
if os.path.isdir(folder_path) and "German ASSIMIL" in folder_name:
lesson_number = int(re.search(r'L(\d+)', folder_name).group(1))
rename_audio_files(folder_path, lesson_number)
csv_path = os.path.join(folder_path, 'anki_audio_cards_cleaned.csv')
extract_and_clean_titles(folder_path, csv_path)
move_audio_files_to_global(folder_path, global_audio_folder)
if __name__ == "__main__":
base_folder = os.getcwd()
global_audio_folder = os.path.join(base_folder, "global_audio")
process_all_folders(base_folder, global_audio_folder)
clean_filenames(global_audio_folder)
remove_unwanted_audio(global_audio_folder)
print(f"All audio files moved, filenames cleaned, and unwanted files removed in: {global_audio_folder}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment