-
-
Save cooliscool/6739d9ad27014c25f7816c40fc7cce60 to your computer and use it in GitHub Desktop.
| PASCAL_CLASSES = [ | |
| 'none', | |
| 'aeroplane', | |
| 'bicycle', | |
| 'bird', | |
| 'boat', | |
| 'bottle', | |
| 'bus', | |
| 'car', | |
| 'cat', | |
| 'chair', | |
| 'cow', | |
| 'diningtable', | |
| 'dog', | |
| 'horse', | |
| 'motorbike', | |
| 'person', | |
| 'pottedplant', | |
| 'sheep', | |
| 'sofa', | |
| 'train', | |
| 'tvmonitor' | |
| ] | |
| # Fill in the classes you want to retain | |
| classesINeed = ['none', 'car'] | |
| # Define the relevant directories | |
| xmlDirectory = '/home/ajmal/data/VOCdevkit/VOC2012/Annotations/' | |
| modifiedXmlDir = '/home/ajmal/data/VOCdevkit/VOC2012/newAnnotations/' | |
| JPEGdirectory = '/home/ajmal/data/VOCdevkit/VOC2012/JPEGImages/' | |
| modifiedJPEGdir = '/home/ajmal/data/VOCdevkit/VOC2012/newJPEGImages/' | |
| listFile = '/home/ajmal/data/VOCdevkit/VOC2012/trainval.txt' | |
| labelMap = '/home/ajmal/data/VOCdevkit/VOC2012/labelmap_voc.prototxt' | |
| listfile = open(listFile, 'w') | |
| labelmap = open(labelMap, 'w') | |
| import os | |
| from shutil import copyfile | |
| from os.path import isfile, join | |
| # Get all the xml files into list | |
| onlyfiles = [f for f in os.listdir(xmlDirectory) if isfile(join(xmlDirectory,f))] | |
| # For saving the class - file dictionary | |
| fileDict = {} | |
| i = 0 | |
| # for limiting number of images | |
| imgnum = 0 | |
| for claz in classesINeed: | |
| fileDict[claz] = [] | |
| # generate labelmap file | |
| labelmap.write('item {\n name: "' + claz + '"\n label: ' + str(i) + '\n display_name: "' + claz + '"\n}\n') | |
| i += 1 | |
| labelmap.close() | |
| # Parse each XML file | |
| import xml.etree.ElementTree as ET | |
| for filename in onlyfiles: | |
| filelink = join(xmlDirectory,filename) | |
| tree = ET.parse (filelink) | |
| root = tree.getroot() | |
| objs = root.findall('object') | |
| objNum = 0 | |
| for obj in objs: | |
| objNum += 1 | |
| currentObj = obj.find('name').text | |
| if currentObj not in classesINeed: | |
| root.remove(obj) | |
| objNum -= 1 | |
| else: | |
| fileDict[currentObj].append(filename) | |
| if objNum == 0 : | |
| continue # drop the file, there are no objects of 'interest ' | |
| else : # write to the file as xml to the new folder | |
| fwrite = open(modifiedXmlDir + filename , 'w') | |
| tree.write(fwrite) | |
| fwrite.close() | |
| # copy the corresponding JPEG to modifiedJPEGDIr | |
| copyfile(JPEGdirectory + filename[:-3] + 'jpg' , modifiedJPEGdir + filename[:-3] + 'jpg') | |
| imgnum += 1 | |
| # make entry in the list file required for LMDB | |
| listfile.write('VOC2012/newJPEGImages/' + filename[:-3] + 'jpg' + ' VOC2012/newAnnotations/' + filename + '\n') | |
| # Take only 101 images to train | |
| if imgnum == 101 : | |
| break | |
| #print "found "+ str(objNum ) + " object(s) in " + filename[:-3] | |
| listfile.close() | |
| print len(fileDict['car']) |
| PASCAL_CLASSES = [ | |
| 'none', | |
| 'aeroplane', | |
| 'bicycle', | |
| 'bird', | |
| 'boat', | |
| 'bottle', | |
| 'bus', | |
| 'car', | |
| 'cat', | |
| 'chair', | |
| 'cow', | |
| 'diningtable', | |
| 'dog', | |
| 'horse', | |
| 'motorbike', | |
| 'person', | |
| 'pottedplant', | |
| 'sheep', | |
| 'sofa', | |
| 'train', | |
| 'tvmonitor' | |
| ] | |
| # Fill in the classes you want to retain | |
| classesINeed = ['none', 'car'] | |
| # Define the relevant directories | |
| xmlDirectory = '/home/ajmal/data/VOCdevkit/VOC2007/Annotations/' | |
| modifiedXmlDir = '/home/ajmal/data/VOCdevkit/VOC2007/newAnnotations/' | |
| JPEGdirectory = '/home/ajmal/data/VOCdevkit/VOC2007/JPEGImages/' | |
| modifiedJPEGdir = '/home/ajmal/data/VOCdevkit/VOC2007/newJPEGImages/' | |
| listFile = '/home/ajmal/data/VOCdevkit/VOC2007/test.txt' | |
| labelMap = '/home/ajmal/data/VOCdevkit/VOC2007/labelmap_voc.prototxt' | |
| listfile = open(listFile, 'w') | |
| labelmap = open(labelMap, 'w') | |
| import os | |
| from shutil import copyfile | |
| from os.path import isfile, join | |
| # Get all the xml files into list | |
| onlyfiles = [f for f in os.listdir(xmlDirectory) if isfile(join(xmlDirectory,f))] | |
| # For saving the class - file dictionary | |
| fileDict = {} | |
| i = 0 | |
| # for limiting number of images | |
| imgnum = 0 | |
| for claz in classesINeed: | |
| fileDict[claz] = [] | |
| # generate labelmap file | |
| labelmap.write('item {\n name: "' + claz + '"\n label: ' + str(i) + '\n display_name: "' + claz + '"\n}\n') | |
| i += 1 | |
| labelmap.close() | |
| # Parse each XML file | |
| import xml.etree.ElementTree as ET | |
| for filename in onlyfiles: | |
| filelink = join(xmlDirectory,filename) | |
| tree = ET.parse (filelink) | |
| root = tree.getroot() | |
| objs = root.findall('object') | |
| objNum = 0 | |
| for obj in objs: | |
| objNum += 1 | |
| currentObj = obj.find('name').text | |
| if currentObj not in classesINeed: | |
| root.remove(obj) | |
| objNum -= 1 | |
| else: | |
| fileDict[currentObj].append(filename) | |
| if objNum == 0 : | |
| continue # drop the file, there are no objects of 'interest ' | |
| else : # write to the file as xml to the new folder | |
| fwrite = open(modifiedXmlDir + filename , 'w') | |
| tree.write(fwrite) | |
| fwrite.close() | |
| # copy the corresponding JPEG to modifiedJPEGDIr | |
| copyfile(JPEGdirectory + filename[:-3] + 'jpg' , modifiedJPEGdir + filename[:-3] + 'jpg') | |
| imgnum += 1 | |
| # make entry in the list file required for LMDB | |
| listfile.write('VOC2007/newJPEGImages/' + filename[:-3] + 'jpg' + ' VOC2007/newAnnotations/' + filename + '\n') | |
| # Take only 21 images to test | |
| if imgnum == 21 : | |
| break | |
| #print "found "+ str(objNum ) + " object(s) in " + filename[:-3] | |
| listfile.close() | |
| print len(fileDict['car']) | |
| ''' | |
| run in terminal, in caffe root , for generating filesizelist | |
| ./build/tools/get_image_size /home/ajmal/data/VOCdevkit/ data/VOC0712/test.txt data/VOC0712/test_name_size.txt | |
| ''' |
did you fill in the classesINeed array with extra classes you need ?
did you fill in the
classesINeedarray with extra classes you need ?
Yes i did, and it only generated like 100 images and their annotations.
Change imag NUM value
Reading this code I wrote several years back, I realise the whole thing is put together in a hacky way. 😅
I'm creating a file 'trainval.txt' for keeping a list of training images I'm finally intending to use ( https://gist.github.com/cooliscool/6739d9ad27014c25f7816c40fc7cce60#file-create_dataset-py-L99) . This is not the trainval_2007.txt file which is part of the dataset. Rather than using the 'trainval' file with the dataset - which has the image to corresponding class mapping - I'm parsing this information from XML annotations in 'VOC2012/Annotations/' . Which I'm not sure if it's the best way to do this - because 'trainval_2012.txt' itself has this image to class mapping. There wouldn't be the need to parse multiple files xml files in that case.
Hello. I execute the code but only a very small number of images/annotations are copied to the new files. Any idea why it is happening?