fakerybakery · December 6, 2025 02:04
diff --git a/data.py b/data.py
 import json
 from glob import glob
 from tqdm import tqdm
 import os
 from datasets import Dataset, Audio
 files = [f for f in glob('tagged/output_gemini/*.json') if not f.endswith('.timestamps.json') and os.path.exists(f.replace('.json', '.timestamps.json'))]
 datas = []
 for f in files:
    with open(f) as fp:
        data = json.load(fp)
    with open(f.replace('.json', '.timestamps.json')) as tfp:
        tdata = json.load(tfp)
    voice = data['voice'].strip()
    text = data['text'].strip()
    caption = tdata['items'][0]['caption'].replace('S1:', '').strip()
    path_mp3 = f.replace('.json', '.mp3')
    path_wav = f.replace('.json', '.wav')
    path = path_mp3 if os.path.exists(path_mp3) else path_wav
    
    datas.append({
        'voice': voice,
        'text': text,
        'caption': caption,
        'audio': path
    })
 ds = Dataset.from_list(datas).cast_column('audio', Audio())
 ds.push_to_hub('mrfakename/emoact_prompts')
	import json
	from glob import glob
	from tqdm import tqdm
	import os
	from datasets import Dataset, Audio
	files = [f for f in glob('tagged/output_gemini/*.json') if not f.endswith('.timestamps.json') and os.path.exists(f.replace('.json', '.timestamps.json'))]
	datas = []
	for f in files:
	with open(f) as fp:
	data = json.load(fp)
	with open(f.replace('.json', '.timestamps.json')) as tfp:
	tdata = json.load(tfp)
	voice = data['voice'].strip()
	text = data['text'].strip()
	caption = tdata['items'][0]['caption'].replace('S1:', '').strip()
	path_mp3 = f.replace('.json', '.mp3')
	path_wav = f.replace('.json', '.wav')
	path = path_mp3 if os.path.exists(path_mp3) else path_wav

	datas.append({
	'voice': voice,
	'text': text,
	'caption': caption,
	'audio': path
	})
	ds = Dataset.from_list(datas).cast_column('audio', Audio())
	ds.push_to_hub('mrfakename/emoact_prompts')
No results found