CraftyCanine · July 7, 2023 08:57 · omgitsthomas · Aug 19, 2021 · CraftyCanine · Aug 19, 2021
diff --git a/episodesplit.py b/episodesplit.py
 #!/usr/bin/env python

 #----------------------------------------------------------
 #
 # Episode Split
 # CraftyCanine
 #
 # Description
 #
 # This script is meant to make the process of splitting
 # multi-episode files for 15 min TV shows into 1 episode
 # per file.
 #
 #
 # Requirements
 #
 # ffmpeg, mkvmerge
 #
 #
 # Instructions
 # 
 # Since this is a command-line tool, hopefully you know your way around the terminal.
 #
 # 1) Install ffmpeg, mkvmerge, and python less than python 3 from your favorite package manager (yum, dpkg, brew, etc) if you don't already have them.
 # 2) Download the file from gist by getting the raw link and copy pasting it into a file or run "wget <link>".
 # 3) Change into the directory where the files you want to split are stored.
 # 4) Run "python episodesplit.py -i <name of video>" to run with defaults. Depending on the quality of the video, it could take around 5 minutes.
 # 
 # Note: If the tool doesn't want to split or is coming back with a bunch of choices, you might want to adjust the thresholds.
 #       There's "-b <length in seconds>" to adjust the minimum black section length to search for. There's also 
 #       "-t <ratio of black pixels to non-black pixels>" for adjusting the sensitivity for what is considered a black frame.
 #       Run "python episodesplit.py -h" for more info on the arguments.
 #
 # --------------------------------------------------------

 import sys,os,math
 import logging
 from logging.handlers import RotatingFileHandler
 import argparse
 from subprocess import Popen, PIPE
 import re

 #ffmpeg -i tvshow.mkv -vf "blackdetect=d=1.5" -an -f null -
 #mkvmerge -o "test.mkv" --split "timecodes:00:11:37.321999998" "tvshow.mkv"

 # Configurables ------------------------------------------

 #Regex patterns for ffmpeg output. Both duration patterns are checked.
 black_pattern = '\[blackdetect @ .+?\] black_start:(\d+?\.\d+?) black_end:(\d+?\.\d+?) black_duration:(\d+?\.\d+?)\n'
 duration_pattern = 'DURATION\s+?: (\d+?):(\d+?):(\d+?\.\d+?)\n'
 duration_pattern2 = 'Duration: (\d+?):(\d+?):(\d+?\.\d+?),'
 #Regex pattern for old filename
 filename_pattern = "(.+?)\.?(S\d\d)(E\d\d)-(E\d\d)\.(.+?)\.([\w,\.,',\,,\-]+?)\.?\+\.([\w,\.,',\,,\-]+?)\.%s"
 #String format pattern for new filename
 filename_new_pattern = '{show_name}.{season}{episode}.{quality}.{epname}.{fileExt}'
 #Test data for fake data parameter
 test_ffmpeg = '''''' # insert new test ffmpeg output here
 test_mkvmerge = '''''' # insert new test mkvmerge output here

 # Globals ------------------------------------------------

 debug = False
 tryRename = False
 fakeData = False
 black_length = 1.0

 # Parse Arguments -----------------------------------------

 parser = argparse.ArgumentParser(description='Script designed to ease the process of splitting multi-episode files for 15 min TV shows into separate files with 1 episode per file.    NOTE: mkvmerge, the tool used to split the files, works using key frames. The split must occur on a key frame, so might not be directly at the episode mark.')
 parser.add_argument('-i', '--input',help='Input video file to be split.',required=True,nargs='+')
 parser.add_argument('-d','--debug',help='Enable debug mode NOTE: wouldn\'t recommend running debug mode with more than one file...',required=False,action='store_true')
 parser.add_argument('-n',help='Number of episodes in file (not working yet, don\'t use)',required=False,default=2)
 parser.add_argument('-l','--logpath',help='Path to log file',required=False,default=os.path.splitext(__file__)[0] + '.log')
 parser.add_argument('-r','--norename',help='Do not try to rename resulting episode files using original file name (Script will try to rename by default)',required=False,action='store_false')
 parser.add_argument('-f','--fakedata',help='Use fake data instead of running on a file (input file still required, but won\'t be used. NOTE: Test data configurable variables (top of source file) must be populated with test output of ffmpeg and mkvmerge.',required=False,action='store_true')
 parser.add_argument('-b','--black',help='Length of time in seconds that we are looking for black segments for potential split points (default is 1.0)',required=False,default=1.0,type=float)
 parser.add_argument('-t','--threshold',help='Threshold of black to non-black pixels (default is 0.98)',required=False,default=0.98,type=float)
 args = parser.parse_args()

 #Set debug flag
 if args.debug is not None:
    debug = args.debug

 #Set input video (multi-episodes)
 inputvid = args.input

 #set number of eps
 numEps = float(args.n)

 #Log file name
 name = args.logpath

 #Should we rename the resulting files?
 tryRename = args.norename

 #Should we use fake data?
 fakeData = args.fakedata

 #Length of black segment to look for
 black_length = '%.1f' % args.black

 black_threshold = '%.2f' % args.threshold

 # Logging ---------------------------------------
 #Setting up file name & log format
 fmt='%(asctime)s [%(levelname)s] %(message)s'

 #get root logger, set up log formatter & handlers
 rootLogger = logging.getLogger()
 logFormatter = logging.Formatter(fmt)
 fileHandler = RotatingFileHandler(name,maxBytes=5e+6, backupCount=10)
 consoleHandler = logging.StreamHandler(sys.stdout)

 #set formatters
 fileHandler.setFormatter(logFormatter)
 consoleHandler.setFormatter(logFormatter)

 #set log levels
 if debug:
    rootLogger.setLevel(logging.DEBUG)
 else:
    rootLogger.setLevel(logging.INFO)

 #add handlers
 rootLogger.addHandler(fileHandler)
 rootLogger.addHandler(consoleHandler)

 # Start Execution -----------------------------------------

 for vid in inputvid:
    if fakeData == False:
        logging.info('Splitting file: %s' % vid)
        logging.info('Scanning file for black segments with black ratio at least %s and at least %s second(s) in length...' % (black_threshold,black_length))
        session = Popen(['ffmpeg', '-i', vid, '-vf', 'blackdetect=d='+black_length+':pic_th='+black_threshold, '-an', '-f', 'null', '-'], stdin=PIPE, stdout=PIPE, stderr=PIPE)
        res_text = session.communicate()

        if debug: print res_text

        res_text = res_text[1]

        if debug: print '\n\n\n'
        if debug: print res_text
    else:
        res_text = test_ffmpeg

    blacks = re.findall(black_pattern,res_text)
    duration = re.findall(duration_pattern,res_text)

    if len(duration) == 0:
        duration = re.findall(duration_pattern2,res_text)

    #Check for issues with ffmpeg
    if len(blacks) == 0 or len(duration) == 0:
        logging.error('There was a problem parsing the video. Perhaps the black length value should be decreased?')
        logging.debug('Length blacks: %d   Length duration: %d' % (len(blacks),len(duration)))
        #exit(1)
        continue

    duration = duration[0]

    #do some math
    totalMins = float(duration[0])*60 + float(duration[1]) #calc the total minutes by multiplying the hours by 60 and adding the minutes
    seconds = float(duration[2])
    totalSeconds = totalMins*60 + seconds #calc the total seconds by multiplying the mins by 60 and adding the seconds
    lenEst = totalSeconds / numEps #calc the approx. ep length in secs by dividing the total seconds by number of episodes in the file
    margin = lenEst / 3 #calc the acceptible margin (1/3 of one episode)

    logging.debug(blacks)
    logging.debug(duration)
    logging.debug(totalSeconds)
    logging.debug('estimated episode length = %.4f' % lenEst)
    logging.debug('margin = %.4f - %.4f' % (lenEst - margin, lenEst + margin))

    selected_blacks = []

    #loop through all identified regions
    for black in blacks:
        start = float(black[0])
        end = float(black[1])
        duration = float(black[2])
        if start > lenEst - margin and start < lenEst + margin: #this will only pass if the identified black is within our acceptible margins
            selected_blacks.append(black)

    if len(selected_blacks) == 0:
        logging.info('No suitable black sections were found. Try changing the pixel ratio threshold and minimum black length (-t and -b)')
        #exit(0)
        continue

    if len(selected_blacks) == 2:
        secs1 = float(selected_blacks[0][0])
        secs2 = float(selected_blacks[1][0])
        if (secs2 - secs1) < 10:
            logging.info('Two possible black sections identified but they are within 10 seconds. Using first as the episode break...')
            selected_blacks = [selected_blacks[0]]

    if len(selected_blacks) > 2:
        print 'More than one black section has been identified.\nPlease select the section that you want to split on.'
        for i in range(len(selected_blacks)):
            mins = float(selected_blacks[i][0]) / 60
            secs = float(selected_blacks[i][0]) % 60
            print '%d: %d:%05.2f' % (i,mins,secs)
        print '%d: Skip this video' % len(selected_blacks)
        answer = ''
        print 'Please enter your choice:',
        while(True):
            answer = raw_input()
            try:
                answer = int(answer)
            except ValueError as verr:
                print 'Try again, please enter a number:',
                continue
            if answer < len(selected_blacks) and answer >= 0:
                print 'Continuing processing...'
                break
            elif answer == len(selected_blacks):
                print 'Skipping...'
                break
            else:
                print 'Please enter the number of one of the choices above:',
        if answer == len(selected_blacks):
            continue
        else:
            selected_blacks = [selected_blacks[answer]]

    black = selected_blacks[0]
    start = float(black[0])
    end = float(black[1])
    duration = float(black[2])

    #split = start + (duration/2) #calc the time to split the file as the duration of the black divided by 2 added on to the start
    split = start
    split_tc = (int(math.floor(split/60/60)),int(math.floor(split/60)),split%60) #calc the number of hours, minutes, seconds from number of seconds
    logging.debug('Identified episode break: %s' % (black,))
    logging.debug('split time: %.4f' % split)
    logging.debug('timecodes:%.4f:%.4f:%06.4f' % split_tc)

    #now we want to execute the below on the video with the identified time code as the target of the split
    #mkvmerge -o "test.mkv" --split "timecodes:00:xx:yy.zzzzzzz" "tvshow.mkv"
    if fakeData == False:
        time = "timecodes:%d:%d:%06.4f" % split_tc
        logging.debug('Splitting with...'+time)
        session = Popen(['mkvmerge', '-o', 'converted.mkv', '--split', time, vid], stdin=PIPE, stdout=PIPE, stderr=PIPE)
        res_text = session.communicate()
        res_text = res_text[0]
    else:
        res_text = test_mkvmerge
        if debug: print res_text

    #check if split was successful
    if re.search('Progress: 100%',res_text) != None:
        logging.info('Great Success! Episodes split at %d:%d:%06.4f.' % split_tc)
        #now lets do a rename
        if numEps == 2 and tryRename == True:
            fileName = os.path.basename(vid)
            fileExt = os.path.splitext(vid)[1][1:]
            name_info = re.findall(filename_pattern % fileExt,fileName)
            #if the regex works
            if len(name_info) > 0:
                name_info = name_info[0]
                show_name = name_info[0]
                season = name_info[1]
                episode1 = name_info[2]
                episode2 = name_info[3]
                quality = name_info[4]
                epname1 = name_info[5]
                epname2 = name_info[6]
                new_name_1 = filename_new_pattern.format(show_name=show_name,season=season,episode=episode1,quality=quality,epname=epname1,fileExt=fileExt)
                new_name_2 = filename_new_pattern.format(show_name=show_name,season=season,episode=episode2,quality=quality,epname=epname2,fileExt=fileExt)
                logging.info('File 1: ' + new_name_1)
                logging.info('File 2: ' + new_name_2)
                #Renaming files
                os.rename('converted-001.mkv', new_name_1)
                os.rename('converted-002.mkv', new_name_2)
                logging.info('New episode files renamed!')
            #regex didn't work
            else:
                logging.warning('Could not parse input file name, using original name')
                name_ext = os.path.splitext(vid)
                os.rename('converted-001.mkv',name_ext[0]+'-Part1'+name_ext[1])
                os.rename('converted-002.mkv',name_ext[0]+'-Part2'+name_ext[1])
    #split didn't work :(
    else:
        logging.error('The split may have failed')
        continue
	#!/usr/bin/env python

	#----------------------------------------------------------
	#
	# Episode Split
	# CraftyCanine
	#
	# Description
	#
	# This script is meant to make the process of splitting
	# multi-episode files for 15 min TV shows into 1 episode
	# per file.
	#
	#
	# Requirements
	#
	# ffmpeg, mkvmerge
	#
	#
	# Instructions
	#
	# Since this is a command-line tool, hopefully you know your way around the terminal.
	#
	# 1) Install ffmpeg, mkvmerge, and python less than python 3 from your favorite package manager (yum, dpkg, brew, etc) if you don't already have them.
	# 2) Download the file from gist by getting the raw link and copy pasting it into a file or run "wget <link>".
	# 3) Change into the directory where the files you want to split are stored.
	# 4) Run "python episodesplit.py -i <name of video>" to run with defaults. Depending on the quality of the video, it could take around 5 minutes.
	#
	# Note: If the tool doesn't want to split or is coming back with a bunch of choices, you might want to adjust the thresholds.
	# There's "-b <length in seconds>" to adjust the minimum black section length to search for. There's also
	# "-t <ratio of black pixels to non-black pixels>" for adjusting the sensitivity for what is considered a black frame.
	# Run "python episodesplit.py -h" for more info on the arguments.
	#
	# --------------------------------------------------------

	import sys,os,math
	import logging
	from logging.handlers import RotatingFileHandler
	import argparse
	from subprocess import Popen, PIPE
	import re

	#ffmpeg -i tvshow.mkv -vf "blackdetect=d=1.5" -an -f null -
	#mkvmerge -o "test.mkv" --split "timecodes:00:11:37.321999998" "tvshow.mkv"

	# Configurables ------------------------------------------

	#Regex patterns for ffmpeg output. Both duration patterns are checked.
	black_pattern = '\[blackdetect @ .+?\] black_start:(\d+?\.\d+?) black_end:(\d+?\.\d+?) black_duration:(\d+?\.\d+?)\n'
	duration_pattern = 'DURATION\s+?: (\d+?):(\d+?):(\d+?\.\d+?)\n'
	duration_pattern2 = 'Duration: (\d+?):(\d+?):(\d+?\.\d+?),'
	#Regex pattern for old filename
	filename_pattern = "(.+?)\.?(S\d\d)(E\d\d)-(E\d\d)\.(.+?)\.([\w,\.,',\,,\-]+?)\.?\+\.([\w,\.,',\,,\-]+?)\.%s"
	#String format pattern for new filename
	filename_new_pattern = '{show_name}.{season}{episode}.{quality}.{epname}.{fileExt}'
	#Test data for fake data parameter
	test_ffmpeg = '''''' # insert new test ffmpeg output here
	test_mkvmerge = '''''' # insert new test mkvmerge output here

	# Globals ------------------------------------------------

	debug = False
	tryRename = False
	fakeData = False
	black_length = 1.0

	# Parse Arguments -----------------------------------------

	parser = argparse.ArgumentParser(description='Script designed to ease the process of splitting multi-episode files for 15 min TV shows into separate files with 1 episode per file. NOTE: mkvmerge, the tool used to split the files, works using key frames. The split must occur on a key frame, so might not be directly at the episode mark.')
	parser.add_argument('-i', '--input',help='Input video file to be split.',required=True,nargs='+')
	parser.add_argument('-d','--debug',help='Enable debug mode NOTE: wouldn\'t recommend running debug mode with more than one file...',required=False,action='store_true')
	parser.add_argument('-n',help='Number of episodes in file (not working yet, don\'t use)',required=False,default=2)
	parser.add_argument('-l','--logpath',help='Path to log file',required=False,default=os.path.splitext(__file__)[0] + '.log')
	parser.add_argument('-r','--norename',help='Do not try to rename resulting episode files using original file name (Script will try to rename by default)',required=False,action='store_false')
	parser.add_argument('-f','--fakedata',help='Use fake data instead of running on a file (input file still required, but won\'t be used. NOTE: Test data configurable variables (top of source file) must be populated with test output of ffmpeg and mkvmerge.',required=False,action='store_true')
	parser.add_argument('-b','--black',help='Length of time in seconds that we are looking for black segments for potential split points (default is 1.0)',required=False,default=1.0,type=float)
	parser.add_argument('-t','--threshold',help='Threshold of black to non-black pixels (default is 0.98)',required=False,default=0.98,type=float)
	args = parser.parse_args()

	#Set debug flag
	if args.debug is not None:
	debug = args.debug

	#Set input video (multi-episodes)
	inputvid = args.input

	#set number of eps
	numEps = float(args.n)

	#Log file name
	name = args.logpath

	#Should we rename the resulting files?
	tryRename = args.norename

	#Should we use fake data?
	fakeData = args.fakedata

	#Length of black segment to look for
	black_length = '%.1f' % args.black

	black_threshold = '%.2f' % args.threshold

	# Logging ---------------------------------------
	#Setting up file name & log format
	fmt='%(asctime)s [%(levelname)s] %(message)s'

	#get root logger, set up log formatter & handlers
	rootLogger = logging.getLogger()
	logFormatter = logging.Formatter(fmt)
	fileHandler = RotatingFileHandler(name,maxBytes=5e+6, backupCount=10)
	consoleHandler = logging.StreamHandler(sys.stdout)

	#set formatters
	fileHandler.setFormatter(logFormatter)
	consoleHandler.setFormatter(logFormatter)

	#set log levels
	if debug:
	rootLogger.setLevel(logging.DEBUG)
	else:
	rootLogger.setLevel(logging.INFO)

	#add handlers
	rootLogger.addHandler(fileHandler)
	rootLogger.addHandler(consoleHandler)

	# Start Execution -----------------------------------------

	for vid in inputvid:
	if fakeData == False:
	logging.info('Splitting file: %s' % vid)
	logging.info('Scanning file for black segments with black ratio at least %s and at least %s second(s) in length...' % (black_threshold,black_length))
	session = Popen(['ffmpeg', '-i', vid, '-vf', 'blackdetect=d='+black_length+':pic_th='+black_threshold, '-an', '-f', 'null', '-'], stdin=PIPE, stdout=PIPE, stderr=PIPE)
	res_text = session.communicate()

	if debug: print res_text

	res_text = res_text[1]

	if debug: print '\n\n\n'
	if debug: print res_text
	else:
	res_text = test_ffmpeg

	blacks = re.findall(black_pattern,res_text)
	duration = re.findall(duration_pattern,res_text)

	if len(duration) == 0:
	duration = re.findall(duration_pattern2,res_text)

	#Check for issues with ffmpeg
	if len(blacks) == 0 or len(duration) == 0:
	logging.error('There was a problem parsing the video. Perhaps the black length value should be decreased?')
	logging.debug('Length blacks: %d Length duration: %d' % (len(blacks),len(duration)))
	#exit(1)
	continue

	duration = duration[0]

	#do some math
	totalMins = float(duration[0])*60 + float(duration[1]) #calc the total minutes by multiplying the hours by 60 and adding the minutes
	seconds = float(duration[2])
	totalSeconds = totalMins*60 + seconds #calc the total seconds by multiplying the mins by 60 and adding the seconds
	lenEst = totalSeconds / numEps #calc the approx. ep length in secs by dividing the total seconds by number of episodes in the file
	margin = lenEst / 3 #calc the acceptible margin (1/3 of one episode)

	logging.debug(blacks)
	logging.debug(duration)
	logging.debug(totalSeconds)
	logging.debug('estimated episode length = %.4f' % lenEst)
	logging.debug('margin = %.4f - %.4f' % (lenEst - margin, lenEst + margin))

	selected_blacks = []

	#loop through all identified regions
	for black in blacks:
	start = float(black[0])
	end = float(black[1])
	duration = float(black[2])
	if start > lenEst - margin and start < lenEst + margin: #this will only pass if the identified black is within our acceptible margins
	selected_blacks.append(black)

	if len(selected_blacks) == 0:
	logging.info('No suitable black sections were found. Try changing the pixel ratio threshold and minimum black length (-t and -b)')
	#exit(0)
	continue

	if len(selected_blacks) == 2:
	secs1 = float(selected_blacks[0][0])
	secs2 = float(selected_blacks[1][0])
	if (secs2 - secs1) < 10:
	logging.info('Two possible black sections identified but they are within 10 seconds. Using first as the episode break...')
	selected_blacks = [selected_blacks[0]]

	if len(selected_blacks) > 2:
	print 'More than one black section has been identified.\nPlease select the section that you want to split on.'
	for i in range(len(selected_blacks)):
	mins = float(selected_blacks[i][0]) / 60
	secs = float(selected_blacks[i][0]) % 60
	print '%d: %d:%05.2f' % (i,mins,secs)
	print '%d: Skip this video' % len(selected_blacks)
	answer = ''
	print 'Please enter your choice:',
	while(True):
	answer = raw_input()
	try:
	answer = int(answer)
	except ValueError as verr:
	print 'Try again, please enter a number:',
	continue
	if answer < len(selected_blacks) and answer >= 0:
	print 'Continuing processing...'
	break
	elif answer == len(selected_blacks):
	print 'Skipping...'
	break
	else:
	print 'Please enter the number of one of the choices above:',
	if answer == len(selected_blacks):
	continue
	else:
	selected_blacks = [selected_blacks[answer]]

	black = selected_blacks[0]
	start = float(black[0])
	end = float(black[1])
	duration = float(black[2])

	#split = start + (duration/2) #calc the time to split the file as the duration of the black divided by 2 added on to the start
	split = start
	split_tc = (int(math.floor(split/60/60)),int(math.floor(split/60)),split%60) #calc the number of hours, minutes, seconds from number of seconds
	logging.debug('Identified episode break: %s' % (black,))
	logging.debug('split time: %.4f' % split)
	logging.debug('timecodes:%.4f:%.4f:%06.4f' % split_tc)

	#now we want to execute the below on the video with the identified time code as the target of the split
	#mkvmerge -o "test.mkv" --split "timecodes:00:xx:yy.zzzzzzz" "tvshow.mkv"
	if fakeData == False:
	time = "timecodes:%d:%d:%06.4f" % split_tc
	logging.debug('Splitting with...'+time)
	session = Popen(['mkvmerge', '-o', 'converted.mkv', '--split', time, vid], stdin=PIPE, stdout=PIPE, stderr=PIPE)
	res_text = session.communicate()
	res_text = res_text[0]
	else:
	res_text = test_mkvmerge
	if debug: print res_text

	#check if split was successful
	if re.search('Progress: 100%',res_text) != None:
	logging.info('Great Success! Episodes split at %d:%d:%06.4f.' % split_tc)
	#now lets do a rename
	if numEps == 2 and tryRename == True:
	fileName = os.path.basename(vid)
	fileExt = os.path.splitext(vid)[1][1:]
	name_info = re.findall(filename_pattern % fileExt,fileName)
	#if the regex works
	if len(name_info) > 0:
	name_info = name_info[0]
	show_name = name_info[0]
	season = name_info[1]
	episode1 = name_info[2]
	episode2 = name_info[3]
	quality = name_info[4]
	epname1 = name_info[5]
	epname2 = name_info[6]
	new_name_1 = filename_new_pattern.format(show_name=show_name,season=season,episode=episode1,quality=quality,epname=epname1,fileExt=fileExt)
	new_name_2 = filename_new_pattern.format(show_name=show_name,season=season,episode=episode2,quality=quality,epname=epname2,fileExt=fileExt)
	logging.info('File 1: ' + new_name_1)
	logging.info('File 2: ' + new_name_2)
	#Renaming files
	os.rename('converted-001.mkv', new_name_1)
	os.rename('converted-002.mkv', new_name_2)
	logging.info('New episode files renamed!')
	#regex didn't work
	else:
	logging.warning('Could not parse input file name, using original name')
	name_ext = os.path.splitext(vid)
	os.rename('converted-001.mkv',name_ext[0]+'-Part1'+name_ext[1])
	os.rename('converted-002.mkv',name_ext[0]+'-Part2'+name_ext[1])
	#split didn't work :(
	else:
	logging.error('The split may have failed')
	continue
No results found