-
-
Save CraftyCanine/3a1a86b2837092038edab2ea33182460 to your computer and use it in GitHub Desktop.
| #!/usr/bin/env python | |
| #---------------------------------------------------------- | |
| # | |
| # Episode Split | |
| # CraftyCanine | |
| # | |
| # Description | |
| # | |
| # This script is meant to make the process of splitting | |
| # multi-episode files for 15 min TV shows into 1 episode | |
| # per file. | |
| # | |
| # | |
| # Requirements | |
| # | |
| # ffmpeg, mkvmerge | |
| # | |
| # | |
| # Instructions | |
| # | |
| # Since this is a command-line tool, hopefully you know your way around the terminal. | |
| # | |
| # 1) Install ffmpeg, mkvmerge, and python less than python 3 from your favorite package manager (yum, dpkg, brew, etc) if you don't already have them. | |
| # 2) Download the file from gist by getting the raw link and copy pasting it into a file or run "wget <link>". | |
| # 3) Change into the directory where the files you want to split are stored. | |
| # 4) Run "python episodesplit.py -i <name of video>" to run with defaults. Depending on the quality of the video, it could take around 5 minutes. | |
| # | |
| # Note: If the tool doesn't want to split or is coming back with a bunch of choices, you might want to adjust the thresholds. | |
| # There's "-b <length in seconds>" to adjust the minimum black section length to search for. There's also | |
| # "-t <ratio of black pixels to non-black pixels>" for adjusting the sensitivity for what is considered a black frame. | |
| # Run "python episodesplit.py -h" for more info on the arguments. | |
| # | |
| # -------------------------------------------------------- | |
| import sys,os,math | |
| import logging | |
| from logging.handlers import RotatingFileHandler | |
| import argparse | |
| from subprocess import Popen, PIPE | |
| import re | |
| #ffmpeg -i tvshow.mkv -vf "blackdetect=d=1.5" -an -f null - | |
| #mkvmerge -o "test.mkv" --split "timecodes:00:11:37.321999998" "tvshow.mkv" | |
| # Configurables ------------------------------------------ | |
| #Regex patterns for ffmpeg output. Both duration patterns are checked. | |
| black_pattern = '\[blackdetect @ .+?\] black_start:(\d+?\.\d+?) black_end:(\d+?\.\d+?) black_duration:(\d+?\.\d+?)\n' | |
| duration_pattern = 'DURATION\s+?: (\d+?):(\d+?):(\d+?\.\d+?)\n' | |
| duration_pattern2 = 'Duration: (\d+?):(\d+?):(\d+?\.\d+?),' | |
| #Regex pattern for old filename | |
| filename_pattern = "(.+?)\.?(S\d\d)(E\d\d)-(E\d\d)\.(.+?)\.([\w,\.,',\,,\-]+?)\.?\+\.([\w,\.,',\,,\-]+?)\.%s" | |
| #String format pattern for new filename | |
| filename_new_pattern = '{show_name}.{season}{episode}.{quality}.{epname}.{fileExt}' | |
| #Test data for fake data parameter | |
| test_ffmpeg = '''''' # insert new test ffmpeg output here | |
| test_mkvmerge = '''''' # insert new test mkvmerge output here | |
| # Globals ------------------------------------------------ | |
| debug = False | |
| tryRename = False | |
| fakeData = False | |
| black_length = 1.0 | |
| # Parse Arguments ----------------------------------------- | |
| parser = argparse.ArgumentParser(description='Script designed to ease the process of splitting multi-episode files for 15 min TV shows into separate files with 1 episode per file. NOTE: mkvmerge, the tool used to split the files, works using key frames. The split must occur on a key frame, so might not be directly at the episode mark.') | |
| parser.add_argument('-i', '--input',help='Input video file to be split.',required=True,nargs='+') | |
| parser.add_argument('-d','--debug',help='Enable debug mode NOTE: wouldn\'t recommend running debug mode with more than one file...',required=False,action='store_true') | |
| parser.add_argument('-n',help='Number of episodes in file (not working yet, don\'t use)',required=False,default=2) | |
| parser.add_argument('-l','--logpath',help='Path to log file',required=False,default=os.path.splitext(__file__)[0] + '.log') | |
| parser.add_argument('-r','--norename',help='Do not try to rename resulting episode files using original file name (Script will try to rename by default)',required=False,action='store_false') | |
| parser.add_argument('-f','--fakedata',help='Use fake data instead of running on a file (input file still required, but won\'t be used. NOTE: Test data configurable variables (top of source file) must be populated with test output of ffmpeg and mkvmerge.',required=False,action='store_true') | |
| parser.add_argument('-b','--black',help='Length of time in seconds that we are looking for black segments for potential split points (default is 1.0)',required=False,default=1.0,type=float) | |
| parser.add_argument('-t','--threshold',help='Threshold of black to non-black pixels (default is 0.98)',required=False,default=0.98,type=float) | |
| args = parser.parse_args() | |
| #Set debug flag | |
| if args.debug is not None: | |
| debug = args.debug | |
| #Set input video (multi-episodes) | |
| inputvid = args.input | |
| #set number of eps | |
| numEps = float(args.n) | |
| #Log file name | |
| name = args.logpath | |
| #Should we rename the resulting files? | |
| tryRename = args.norename | |
| #Should we use fake data? | |
| fakeData = args.fakedata | |
| #Length of black segment to look for | |
| black_length = '%.1f' % args.black | |
| black_threshold = '%.2f' % args.threshold | |
| # Logging --------------------------------------- | |
| #Setting up file name & log format | |
| fmt='%(asctime)s [%(levelname)s] %(message)s' | |
| #get root logger, set up log formatter & handlers | |
| rootLogger = logging.getLogger() | |
| logFormatter = logging.Formatter(fmt) | |
| fileHandler = RotatingFileHandler(name,maxBytes=5e+6, backupCount=10) | |
| consoleHandler = logging.StreamHandler(sys.stdout) | |
| #set formatters | |
| fileHandler.setFormatter(logFormatter) | |
| consoleHandler.setFormatter(logFormatter) | |
| #set log levels | |
| if debug: | |
| rootLogger.setLevel(logging.DEBUG) | |
| else: | |
| rootLogger.setLevel(logging.INFO) | |
| #add handlers | |
| rootLogger.addHandler(fileHandler) | |
| rootLogger.addHandler(consoleHandler) | |
| # Start Execution ----------------------------------------- | |
| for vid in inputvid: | |
| if fakeData == False: | |
| logging.info('Splitting file: %s' % vid) | |
| logging.info('Scanning file for black segments with black ratio at least %s and at least %s second(s) in length...' % (black_threshold,black_length)) | |
| session = Popen(['ffmpeg', '-i', vid, '-vf', 'blackdetect=d='+black_length+':pic_th='+black_threshold, '-an', '-f', 'null', '-'], stdin=PIPE, stdout=PIPE, stderr=PIPE) | |
| res_text = session.communicate() | |
| if debug: print res_text | |
| res_text = res_text[1] | |
| if debug: print '\n\n\n' | |
| if debug: print res_text | |
| else: | |
| res_text = test_ffmpeg | |
| blacks = re.findall(black_pattern,res_text) | |
| duration = re.findall(duration_pattern,res_text) | |
| if len(duration) == 0: | |
| duration = re.findall(duration_pattern2,res_text) | |
| #Check for issues with ffmpeg | |
| if len(blacks) == 0 or len(duration) == 0: | |
| logging.error('There was a problem parsing the video. Perhaps the black length value should be decreased?') | |
| logging.debug('Length blacks: %d Length duration: %d' % (len(blacks),len(duration))) | |
| #exit(1) | |
| continue | |
| duration = duration[0] | |
| #do some math | |
| totalMins = float(duration[0])*60 + float(duration[1]) #calc the total minutes by multiplying the hours by 60 and adding the minutes | |
| seconds = float(duration[2]) | |
| totalSeconds = totalMins*60 + seconds #calc the total seconds by multiplying the mins by 60 and adding the seconds | |
| lenEst = totalSeconds / numEps #calc the approx. ep length in secs by dividing the total seconds by number of episodes in the file | |
| margin = lenEst / 3 #calc the acceptible margin (1/3 of one episode) | |
| logging.debug(blacks) | |
| logging.debug(duration) | |
| logging.debug(totalSeconds) | |
| logging.debug('estimated episode length = %.4f' % lenEst) | |
| logging.debug('margin = %.4f - %.4f' % (lenEst - margin, lenEst + margin)) | |
| selected_blacks = [] | |
| #loop through all identified regions | |
| for black in blacks: | |
| start = float(black[0]) | |
| end = float(black[1]) | |
| duration = float(black[2]) | |
| if start > lenEst - margin and start < lenEst + margin: #this will only pass if the identified black is within our acceptible margins | |
| selected_blacks.append(black) | |
| if len(selected_blacks) == 0: | |
| logging.info('No suitable black sections were found. Try changing the pixel ratio threshold and minimum black length (-t and -b)') | |
| #exit(0) | |
| continue | |
| if len(selected_blacks) == 2: | |
| secs1 = float(selected_blacks[0][0]) | |
| secs2 = float(selected_blacks[1][0]) | |
| if (secs2 - secs1) < 10: | |
| logging.info('Two possible black sections identified but they are within 10 seconds. Using first as the episode break...') | |
| selected_blacks = [selected_blacks[0]] | |
| if len(selected_blacks) > 2: | |
| print 'More than one black section has been identified.\nPlease select the section that you want to split on.' | |
| for i in range(len(selected_blacks)): | |
| mins = float(selected_blacks[i][0]) / 60 | |
| secs = float(selected_blacks[i][0]) % 60 | |
| print '%d: %d:%05.2f' % (i,mins,secs) | |
| print '%d: Skip this video' % len(selected_blacks) | |
| answer = '' | |
| print 'Please enter your choice:', | |
| while(True): | |
| answer = raw_input() | |
| try: | |
| answer = int(answer) | |
| except ValueError as verr: | |
| print 'Try again, please enter a number:', | |
| continue | |
| if answer < len(selected_blacks) and answer >= 0: | |
| print 'Continuing processing...' | |
| break | |
| elif answer == len(selected_blacks): | |
| print 'Skipping...' | |
| break | |
| else: | |
| print 'Please enter the number of one of the choices above:', | |
| if answer == len(selected_blacks): | |
| continue | |
| else: | |
| selected_blacks = [selected_blacks[answer]] | |
| black = selected_blacks[0] | |
| start = float(black[0]) | |
| end = float(black[1]) | |
| duration = float(black[2]) | |
| #split = start + (duration/2) #calc the time to split the file as the duration of the black divided by 2 added on to the start | |
| split = start | |
| split_tc = (int(math.floor(split/60/60)),int(math.floor(split/60)),split%60) #calc the number of hours, minutes, seconds from number of seconds | |
| logging.debug('Identified episode break: %s' % (black,)) | |
| logging.debug('split time: %.4f' % split) | |
| logging.debug('timecodes:%.4f:%.4f:%06.4f' % split_tc) | |
| #now we want to execute the below on the video with the identified time code as the target of the split | |
| #mkvmerge -o "test.mkv" --split "timecodes:00:xx:yy.zzzzzzz" "tvshow.mkv" | |
| if fakeData == False: | |
| time = "timecodes:%d:%d:%06.4f" % split_tc | |
| logging.debug('Splitting with...'+time) | |
| session = Popen(['mkvmerge', '-o', 'converted.mkv', '--split', time, vid], stdin=PIPE, stdout=PIPE, stderr=PIPE) | |
| res_text = session.communicate() | |
| res_text = res_text[0] | |
| else: | |
| res_text = test_mkvmerge | |
| if debug: print res_text | |
| #check if split was successful | |
| if re.search('Progress: 100%',res_text) != None: | |
| logging.info('Great Success! Episodes split at %d:%d:%06.4f.' % split_tc) | |
| #now lets do a rename | |
| if numEps == 2 and tryRename == True: | |
| fileName = os.path.basename(vid) | |
| fileExt = os.path.splitext(vid)[1][1:] | |
| name_info = re.findall(filename_pattern % fileExt,fileName) | |
| #if the regex works | |
| if len(name_info) > 0: | |
| name_info = name_info[0] | |
| show_name = name_info[0] | |
| season = name_info[1] | |
| episode1 = name_info[2] | |
| episode2 = name_info[3] | |
| quality = name_info[4] | |
| epname1 = name_info[5] | |
| epname2 = name_info[6] | |
| new_name_1 = filename_new_pattern.format(show_name=show_name,season=season,episode=episode1,quality=quality,epname=epname1,fileExt=fileExt) | |
| new_name_2 = filename_new_pattern.format(show_name=show_name,season=season,episode=episode2,quality=quality,epname=epname2,fileExt=fileExt) | |
| logging.info('File 1: ' + new_name_1) | |
| logging.info('File 2: ' + new_name_2) | |
| #Renaming files | |
| os.rename('converted-001.mkv', new_name_1) | |
| os.rename('converted-002.mkv', new_name_2) | |
| logging.info('New episode files renamed!') | |
| #regex didn't work | |
| else: | |
| logging.warning('Could not parse input file name, using original name') | |
| name_ext = os.path.splitext(vid) | |
| os.rename('converted-001.mkv',name_ext[0]+'-Part1'+name_ext[1]) | |
| os.rename('converted-002.mkv',name_ext[0]+'-Part2'+name_ext[1]) | |
| #split didn't work :( | |
| else: | |
| logging.error('The split may have failed') | |
| continue |
Thank you for commenting, I'm glad to hear that this helped someone! I know it's a bit of a niche area but I figured someone would be able to use it some day.
hey there. thanks for your work and sharing.
i would love to use this script aswell;
but for series longer than 15 minutes, which are 99% of the time only 2-part episodes cut in half at almost exactly 50% of each file.
so there is no need to search the whole file, only like from 48% to 52% in the file.
is there any chance you could enhance the script that way? i'm searching for years for a working script :/
thank you
Hello, glad to help! It's been a long time since I've worked on this but I would be glad to take a look at it for you. Is it not working at all? Seems like you're saying it still works, this would just make it quicker?
I just came across your script and just wanted to say THANK YOU! This saved me a ton of time. I really appreciate all the work you put into this :)