pineapplemachine · March 20, 2022 21:29
diff --git a/elden-vttbatch.py b/elden-vttbatch.py
 """
 This script can be used to perform automated speech-to-text on every
 *.ogg audio file in a directory, recursively.

 This was written in order to help better document the items in the
 `sounds` directory represented in Elden Ring's Data0.bhd archive file.

 Here were the steps taken:

 1. Extract files including sounds/pck/normal.pck from Data0.bhd in
 Elden Ring's Game/ directory using ER.BDT.Tool:
 https://github.com/Ekey/ER.BDT.Tool

 2. Extract OGG audio files from sounds/pck/normal.pck using
 this unpacker tool:
 https://github.com/Vextil/Wwise-Unpacker

 3. Run this script using the directory containing the unpacked audio
 files as input.
 """

 import os
 import subprocess
 import sys

 import speech_recognition

 class VTTBatch:
    def __init__(self, output_path, input_path):
        self.output_path = output_path
        self.input_path = input_path
        self.speech_data = dict()
        self.load_speech_data()
        self.recognizer = speech_recognition.Recognizer()
        self.handled_count = 0
    
    def load_speech_data(self):
        if not os.path.exists(self.output_path):
            return
        print("Reading existing data from", self.output_path)
        with open(self.output_path, "rt", encoding="utf-8") as data_file:
            for line in data_file.readlines():
                if line:
                    name, text = line.split("\t", 1)
                    self.speech_data[name] = text
    
    def write_speech_data(self):
        print("Writing data to", self.output_path)
        with open(self.output_path, "wt", encoding="utf-8") as data_file:
            for name, text in self.speech_data.items():
                data_file.write("%s\t%s\n" % (
                    name, text.strip().replace("\t", " ").replace("\n", " ")
                ))
                
    def iter_input_files(self):
        for root, dirs, files in os.walk(self.input_path):
            for file_name in files:
                if not file_name.endswith(".ogg"):
                    continue
                file_path = os.path.join(root, file_name)
                # Ignore small files
                if os.path.getsize(file_path) < 10000:
                    continue
                yield self.handle_input_file(file_path)
    
    def handle_input_file(self, file_path):
        # Check for existing data
        name = os.path.relpath(file_path, self.input_path)
        if name in self.speech_data:
            return (False, name, self.speech_data[name])
        # Convert to WAV
        file_path_wav = file_path + ".wav"
        if os.path.exists(file_path_wav):
            print("File already exists: %s" % file_path_wav)
        ffmpeg_process = subprocess.Popen(["ffmpeg", "-loglevel", "error", "-i", file_path, file_path_wav])
        ffmpeg_process.wait()
        # Speech to text
        text = ""
        try:
            with speech_recognition.AudioFile(file_path_wav) as audio_source:
                audio = self.recognizer.record(audio_source)
                text = self.recognizer.recognize_sphinx(audio)
        except speech_recognition.UnknownValueError:
            text = ""
        except (speech_recognition.RequestError, ValueError) as error:
            print("Sphinx error: {0}".format(error))
            text = "[Error]"
        # Clean up WAV file
        os.remove(file_path_wav)
        # All done
        self.speech_data[name] = text
        self.handled_count += 1
        return (True, name, text)

 def __main__():
    if len(sys.argv) != 3:
        print("Usage: python elden-vttbatch.py [output-file] [directory-path]")
        sys.exit(0)
    output_path = sys.argv[1]
    input_path = sys.argv[2]
    vtt_batch = VTTBatch(output_path, input_path)
    try:
        for new_file, name, text in vtt_batch.iter_input_files():
            if new_file:
                print(name, text)
            if new_file and vtt_batch.handled_count % 200 == 0:
                vtt_batch.write_speech_data()
    except:
        if vtt_batch.handled_count:
            vtt_batch.write_speech_data()
        raise
    vtt_batch.write_speech_data()

 if __name__ == "__main__":
    __main__()
	"""
	This script can be used to perform automated speech-to-text on every
	*.ogg audio file in a directory, recursively.

	This was written in order to help better document the items in the
	`sounds` directory represented in Elden Ring's Data0.bhd archive file.

	Here were the steps taken:

	1. Extract files including sounds/pck/normal.pck from Data0.bhd in
	Elden Ring's Game/ directory using ER.BDT.Tool:
	https://github.com/Ekey/ER.BDT.Tool

	2. Extract OGG audio files from sounds/pck/normal.pck using
	this unpacker tool:
	https://github.com/Vextil/Wwise-Unpacker

	3. Run this script using the directory containing the unpacked audio
	files as input.
	"""

	import os
	import subprocess
	import sys

	import speech_recognition

	class VTTBatch:
	def __init__(self, output_path, input_path):
	self.output_path = output_path
	self.input_path = input_path
	self.speech_data = dict()
	self.load_speech_data()
	self.recognizer = speech_recognition.Recognizer()
	self.handled_count = 0

	def load_speech_data(self):
	if not os.path.exists(self.output_path):
	return
	print("Reading existing data from", self.output_path)
	with open(self.output_path, "rt", encoding="utf-8") as data_file:
	for line in data_file.readlines():
	if line:
	name, text = line.split("\t", 1)
	self.speech_data[name] = text

	def write_speech_data(self):
	print("Writing data to", self.output_path)
	with open(self.output_path, "wt", encoding="utf-8") as data_file:
	for name, text in self.speech_data.items():
	data_file.write("%s\t%s\n" % (
	name, text.strip().replace("\t", " ").replace("\n", " ")
	))

	def iter_input_files(self):
	for root, dirs, files in os.walk(self.input_path):
	for file_name in files:
	if not file_name.endswith(".ogg"):
	continue
	file_path = os.path.join(root, file_name)
	# Ignore small files
	if os.path.getsize(file_path) < 10000:
	continue
	yield self.handle_input_file(file_path)

	def handle_input_file(self, file_path):
	# Check for existing data
	name = os.path.relpath(file_path, self.input_path)
	if name in self.speech_data:
	return (False, name, self.speech_data[name])
	# Convert to WAV
	file_path_wav = file_path + ".wav"
	if os.path.exists(file_path_wav):
	print("File already exists: %s" % file_path_wav)
	ffmpeg_process = subprocess.Popen(["ffmpeg", "-loglevel", "error", "-i", file_path, file_path_wav])
	ffmpeg_process.wait()
	# Speech to text
	text = ""
	try:
	with speech_recognition.AudioFile(file_path_wav) as audio_source:
	audio = self.recognizer.record(audio_source)
	text = self.recognizer.recognize_sphinx(audio)
	except speech_recognition.UnknownValueError:
	text = ""
	except (speech_recognition.RequestError, ValueError) as error:
	print("Sphinx error: {0}".format(error))
	text = "[Error]"
	# Clean up WAV file
	os.remove(file_path_wav)
	# All done
	self.speech_data[name] = text
	self.handled_count += 1
	return (True, name, text)

	def __main__():
	if len(sys.argv) != 3:
	print("Usage: python elden-vttbatch.py [output-file] [directory-path]")
	sys.exit(0)
	output_path = sys.argv[1]
	input_path = sys.argv[2]
	vtt_batch = VTTBatch(output_path, input_path)
	try:
	for new_file, name, text in vtt_batch.iter_input_files():
	if new_file:
	print(name, text)
	if new_file and vtt_batch.handled_count % 200 == 0:
	vtt_batch.write_speech_data()
	except:
	if vtt_batch.handled_count:
	vtt_batch.write_speech_data()
	raise
	vtt_batch.write_speech_data()

	if __name__ == "__main__":
	__main__()
No results found