Last active
November 15, 2025 22:14
-
-
Save SyedMSawaid/d81401e4f209b2e1eef2262713e1039a to your computer and use it in GitHub Desktop.
Extract metadata from Assimil audio files for Anki cards.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import csv | |
| import re | |
| import shutil | |
| from mutagen.easyid3 import EasyID3 | |
| from mutagen.mp3 import MP3 | |
| from mutagen.flac import FLAC | |
| from mutagen.oggopus import OggOpus | |
| from mutagen.oggvorbis import OggVorbis | |
| from mutagen.mp4 import MP4 | |
| def rename_audio_files(folder_path, lesson_number): | |
| """Rename all audio files in the folder to include the lesson number prefix. | |
| Skip files that already have a two-digit prefix (e.g., '02_').""" | |
| supported_extensions = {'.mp3', '.flac', '.ogg', '.opus', '.m4a'} | |
| for filename in os.listdir(folder_path): | |
| ext = os.path.splitext(filename)[1].lower() | |
| if ext in supported_extensions: | |
| if re.match(r'^\d{2}_', filename): | |
| print(f"Skipped (already prefixed): {filename}") | |
| continue | |
| new_filename = f"{lesson_number:02d}_{filename}" | |
| old_path = os.path.join(folder_path, filename) | |
| new_path = os.path.join(folder_path, new_filename) | |
| os.rename(old_path, new_path) | |
| print(f"Renamed: {filename} -> {new_filename}") | |
| def extract_and_clean_titles(folder_path, csv_path): | |
| """Extract and clean titles from audio files, then write to a CSV file.""" | |
| supported_extensions = { | |
| '.mp3': MP3, | |
| '.flac': FLAC, | |
| '.ogg': OggVorbis, | |
| '.opus': OggOpus, | |
| '.m4a': MP4, | |
| } | |
| files = sorted(os.listdir(folder_path), key=lambda x: (not x.startswith('S'), x)) | |
| with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile: | |
| writer = csv.writer(csvfile) | |
| writer.writerow(['Front', 'Back', 'Audio']) | |
| for filename in files: | |
| file_path = os.path.join(folder_path, filename) | |
| ext = os.path.splitext(filename)[1].lower() | |
| if ext in supported_extensions: | |
| try: | |
| if ext == '.mp3': | |
| audio = EasyID3(file_path) | |
| else: | |
| audio = supported_extensions[ext](file_path) | |
| title = audio.get('title', ['No title found'])[0] | |
| cleaned_title = re.sub(r'^[A-Z]\d{2}-', '', title).strip('"').strip() | |
| rel_path = os.path.relpath(file_path, folder_path) | |
| writer.writerow([cleaned_title, '', f'[sound:{rel_path}]']) | |
| print(f"Processed: {filename} -> {cleaned_title}") | |
| except Exception as e: | |
| print(f"Error reading {filename}: {e}") | |
| def move_audio_files_to_global(folder_path, global_audio_folder): | |
| """Move all renamed audio files to a global audio folder.""" | |
| if not os.path.exists(global_audio_folder): | |
| os.makedirs(global_audio_folder) | |
| for filename in os.listdir(folder_path): | |
| if re.match(r'^\d{2}_', filename): | |
| old_path = os.path.join(folder_path, filename) | |
| new_path = os.path.join(global_audio_folder, filename) | |
| shutil.move(old_path, new_path) | |
| print(f"Moved: {filename} -> {global_audio_folder}") | |
| def clean_filenames(global_audio_folder): | |
| """Clean up filenames in the global_audio folder by removing SXX-, TXX-, etc. prefixes.""" | |
| for filename in os.listdir(global_audio_folder): | |
| if re.match(r'^\d{2}__[ST]\d{2}-', filename): | |
| new_filename = re.sub(r'^\d{2}__(?:[ST]\d{2}-)', '', filename) | |
| old_path = os.path.join(global_audio_folder, filename) | |
| new_path = os.path.join(global_audio_folder, new_filename) | |
| os.rename(old_path, new_path) | |
| print(f"Renamed: {filename} -> {new_filename}") | |
| def remove_unwanted_audio(global_audio_folder): | |
| """Remove audio files with names like '15_._T05.mp3'.""" | |
| for filename in os.listdir(global_audio_folder): | |
| if re.match(r'^\d+_\.?_.*\.mp3$', filename): | |
| file_path = os.path.join(global_audio_folder, filename) | |
| os.remove(file_path) | |
| print(f"Removed: {filename}") | |
| def process_all_folders(base_folder, global_audio_folder): | |
| """Process all subfolders in the base folder.""" | |
| for folder_name in os.listdir(base_folder): | |
| folder_path = os.path.join(base_folder, folder_name) | |
| if os.path.isdir(folder_path) and "German ASSIMIL" in folder_name: | |
| lesson_number = int(re.search(r'L(\d+)', folder_name).group(1)) | |
| rename_audio_files(folder_path, lesson_number) | |
| csv_path = os.path.join(folder_path, 'anki_audio_cards_cleaned.csv') | |
| extract_and_clean_titles(folder_path, csv_path) | |
| move_audio_files_to_global(folder_path, global_audio_folder) | |
| if __name__ == "__main__": | |
| base_folder = os.getcwd() | |
| global_audio_folder = os.path.join(base_folder, "global_audio") | |
| process_all_folders(base_folder, global_audio_folder) | |
| clean_filenames(global_audio_folder) | |
| remove_unwanted_audio(global_audio_folder) | |
| print(f"All audio files moved, filenames cleaned, and unwanted files removed in: {global_audio_folder}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment