-
-
Save terasakisatoshi/627ed051f01be150bc33e65405f59c31 to your computer and use it in GitHub Desktop.
MPI-INF-3DHP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import copy\n", | |
| "import cv2 as cv\n", | |
| "import math\n", | |
| "import numpy as np\n", | |
| "import random\n", | |
| "import imageio\n", | |
| "import scipy.io\n", | |
| "import skimage.transform\n", | |
| "from tqdm import tqdm\n", | |
| "from itertools import product\n", | |
| "from vectormath import Vector2" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "root = 'D:/data/MPI-INF-3DHP'\n", | |
| "\n", | |
| "available_subject = [1, 2, 3, 4, 5, 6, 7, 8, ]\n", | |
| "available_sequence = [1, 2, ]\n", | |
| "available_camera = [camera for camera in range(14)]\n", | |
| "\n", | |
| "# available_segment[subject][sequence]\n", | |
| "# segmented sequence = np.squeeze(np.where(available_segment[subject])) + 1\n", | |
| "available_segment = [\n", | |
| " [False, True], # subject 1\n", | |
| " [False, True], # subject 2\n", | |
| " [False, True],\n", | |
| " [False, True],\n", | |
| " [False, True],\n", | |
| " [False, True],\n", | |
| " [True, False],\n", | |
| " [True, False], # subject 8\n", | |
| "]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "class SequentialDictionary:\n", | |
| " '''The custom dictionary class\n", | |
| " \n", | |
| " You can use a dictionary with multiple indices, i.e. x['1st dim']['2nd dim'] = 2.\n", | |
| " '''\n", | |
| " \n", | |
| " def __init__(self):\n", | |
| " self.data = dict()\n", | |
| " \n", | |
| " def __getitem__(self, index):\n", | |
| " if index not in self.data.keys():\n", | |
| " self.data[index] = SequentialDictionary()\n", | |
| " return self.data[index]\n", | |
| " \n", | |
| " def __setitem__(self, index, value):\n", | |
| " self.data[index] = value\n", | |
| " \n", | |
| " def __len__(self):\n", | |
| " length = 0\n", | |
| " for key, value in self.data.items():\n", | |
| " if type(value) is SequentialDictionary:\n", | |
| " length = length + len(value)\n", | |
| " else:\n", | |
| " length = length + 1\n", | |
| " return length" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 672/672 [00:34<00:00, 20.23it/s]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# Video\n", | |
| "VIDEO_RGB = 'imageSequence'\n", | |
| "VIDEO_MASK_HUMAN_AND_CHAIR = 'FGmasks'\n", | |
| "VIDEO_MASK_CHAIR = 'ChairMasks'\n", | |
| "\n", | |
| "available_format = [\n", | |
| " VIDEO_RGB,\n", | |
| " VIDEO_MASK_HUMAN_AND_CHAIR,\n", | |
| " VIDEO_MASK_CHAIR,\n", | |
| "]\n", | |
| "\n", | |
| "video_path = '{root}/{subject}/{sequence}/{format}/video_{camera}.avi'\n", | |
| "video = SequentialDictionary()\n", | |
| "\n", | |
| "available_video = product(*[\n", | |
| " available_subject, \n", | |
| " available_sequence, \n", | |
| " available_format, \n", | |
| " available_camera,\n", | |
| "])\n", | |
| "total = len(available_subject) * len(available_sequence) * len(available_format) * len(available_camera)\n", | |
| "\n", | |
| "for subject, sequence, format, camera in tqdm(available_video, total=total):\n", | |
| " video[subject][sequence][format][camera] = cv.VideoCapture(video_path.format(\n", | |
| " root=root,\n", | |
| " subject='S%d' % subject,\n", | |
| " sequence='Seq%d' % sequence,\n", | |
| " format=format,\n", | |
| " camera=camera,\n", | |
| " ))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:17<00:00, 1.14it/s]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# annotation\n", | |
| "ANNOT_CAMERA_2D = 'annot2'\n", | |
| "ANNOT_CAMERA_3D = 'annot3'\n", | |
| "ANNOT_WORLD_3D = 'univ_annot3'\n", | |
| "ANNOT_CAMERA_CALI = 'cameras'\n", | |
| "\n", | |
| "annot_path = '{root}/{subject}/{sequence}/annot.mat'\n", | |
| "annot = SequentialDictionary()\n", | |
| "\n", | |
| "available_annot = product(*[\n", | |
| " available_subject, \n", | |
| " available_sequence,\n", | |
| "])\n", | |
| "total = len(available_subject) * len(available_sequence)\n", | |
| "\n", | |
| "for subject, sequence, in tqdm(available_annot, total=total):\n", | |
| " annot[subject][sequence] = scipy.io.loadmat(annot_path.format(\n", | |
| " root=root,\n", | |
| " subject='S%d' % subject,\n", | |
| " sequence='Seq%d' % sequence,\n", | |
| " ))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 144.42it/s]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# camera parameters\n", | |
| "CAMERA_INTRINSIC = 'intrinsic'\n", | |
| "CAMERA_EXTRINSIC = 'extrinsic'\n", | |
| "\n", | |
| "camera_path = '{root}/{subject}/{sequence}/camera.calibration'\n", | |
| "camera_parameter = SequentialDictionary()\n", | |
| "\n", | |
| "available_camera_parameter = product(*[\n", | |
| " available_subject, \n", | |
| " available_sequence,\n", | |
| "])\n", | |
| "total = len(available_subject) * len(available_sequence)\n", | |
| "\n", | |
| "for subject, sequence, in tqdm(available_camera_parameter, total=total):\n", | |
| " camera_index = -1\n", | |
| " with open(camera_path.format(\n", | |
| " root=root,\n", | |
| " subject='S%d' % subject,\n", | |
| " sequence='Seq%d' % sequence,\n", | |
| " ), 'r') as file:\n", | |
| " for line in file:\n", | |
| " word = line.strip().split() # remove whilespace\n", | |
| "\n", | |
| " if word[0] == 'name':\n", | |
| " camera_index = int(word[-1])\n", | |
| "\n", | |
| " elif word[0] == CAMERA_INTRINSIC:\n", | |
| " mat = np.reshape(np.asarray(word[1:], dtype=np.float), newshape=(4, 4))\n", | |
| " mat = mat[0:3, 0:3]\n", | |
| " camera_parameter[subject][sequence][camera_index][CAMERA_INTRINSIC] = mat\n", | |
| " elif word[0] == CAMERA_EXTRINSIC:\n", | |
| " mat = np.reshape(np.asarray(word[1:], dtype=np.float), newshape=(4, 4))\n", | |
| " mat = mat[0:3, 0:4]\n", | |
| " camera_parameter[subject][sequence][camera_index][CAMERA_EXTRINSIC] = mat\n", | |
| " else:\n", | |
| " continue" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 45, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def rotate_bound(image, angle):\n", | |
| " height, width, channel = image.shape\n", | |
| " \n", | |
| " mat = cv.getRotationMatrix2D((width/2, height/2), -angle, 1)\n", | |
| " return cv.warpAffine(image, mat, (width, height))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 46, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def crop_image(image, center, scale, rotate, resolution):\n", | |
| " center = Vector2(center) # assign new array\n", | |
| " height, width, channel = image.shape\n", | |
| " crop_ratio = 200 * scale / resolution\n", | |
| " \n", | |
| " if crop_ratio >= 2: # if box size is greater than two time of resolution px\n", | |
| " # scale down image\n", | |
| " height = math.floor(height / crop_ratio)\n", | |
| " width = math.floor(width / crop_ratio)\n", | |
| "\n", | |
| " if max([height, width]) < 2:\n", | |
| " # Zoomed out so much that the image is now a single pixel or less\n", | |
| " raise ValueError(\"Width or height is invalid!\")\n", | |
| "\n", | |
| "# image = skimage.transform.resize(image, (height, width), mode='constant')\n", | |
| "# image = image.resize(image, (height, width), mode='constant')\n", | |
| " image = cv.resize(image, (height, width))\n", | |
| " center /= crop_ratio\n", | |
| " scale /= crop_ratio\n", | |
| "\n", | |
| " ul = (center - 200 * scale / 2).astype(int)\n", | |
| " br = (center + 200 * scale / 2).astype(int) # Vector2\n", | |
| "\n", | |
| " if crop_ratio >= 2: # force image size 256 x 256\n", | |
| " br -= (br - ul - resolution)\n", | |
| "\n", | |
| " pad_length = math.ceil((ul - br).length - (br.x - ul.x) / 2)\n", | |
| "\n", | |
| " if rotate != 0:\n", | |
| " ul -= pad_length\n", | |
| " br += pad_length\n", | |
| "\n", | |
| " src = [max(0, ul.y), min(height, br.y), max(0, ul.x), min(width, br.x)]\n", | |
| " dst = [max(0, -ul.y), min(height, br.y) - ul.y, max(0, -ul.x), min(width, br.x) - ul.x]\n", | |
| "\n", | |
| " new_image = np.zeros([br.y - ul.y, br.x - ul.x, channel], dtype=np.uint8)\n", | |
| " new_image[dst[0]:dst[1], dst[2]:dst[3], :] = image[src[0]:src[1], src[2]:src[3], :]\n", | |
| "\n", | |
| " if rotate != 0:\n", | |
| " # new_image = skimage.transform.rotate(new_image, rotate)\n", | |
| " new_image = rotate_bound(new_image, rotate)\n", | |
| " new_height, new_width, _ = new_image.shape\n", | |
| " new_image = new_image[pad_length:new_height - pad_length, pad_length:new_width - pad_length, :]\n", | |
| "\n", | |
| " if crop_ratio < 2:\n", | |
| " new_image = cv.resize(new_image, (resolution, resolution))\n", | |
| "# new_image = skimage.transform.resize(new_image, (resolution, resolution), mode='constant')\n", | |
| "# new_image = Image.resize(new_image, (resolution, resolution), mode='constant')\n", | |
| "\n", | |
| " return new_image" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 50, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "subject = 3\n", | |
| "sequence = np.squeeze(np.where(available_segment[subject-1])) + 1\n", | |
| "camera = 8\n", | |
| "frame = 4748\n", | |
| "\n", | |
| "image = SequentialDictionary()\n", | |
| "for format in available_format:\n", | |
| " video[subject][sequence][format][camera].set(cv.CAP_PROP_POS_FRAMES, frame)\n", | |
| " success, image[format] = video[subject][sequence][format][camera].read()\n", | |
| " assert success\n", | |
| "\n", | |
| "height, width, channel = np.asarray([\n", | |
| " video[subject][sequence][VIDEO_RGB][camera].get(cv.CAP_PROP_FRAME_HEIGHT),\n", | |
| " video[subject][sequence][VIDEO_RGB][camera].get(cv.CAP_PROP_FRAME_WIDTH),\n", | |
| " 3,\n", | |
| "]).astype(np.int)\n", | |
| " \n", | |
| "for image_name in ['checker', 'room', 'flower', ]:\n", | |
| " image[image_name] = cv.imread('{image_name}.jpg'.format(image_name=image_name))\n", | |
| " image[image_name] = cv.resize(image[image_name], (height, width))\n", | |
| "\n", | |
| "gitter = 0.4 + 0.8 * random.random()\n", | |
| "\n", | |
| "background = image[VIDEO_MASK_HUMAN_AND_CHAIR][:, :, 2] < 200\n", | |
| "chair = image[VIDEO_MASK_CHAIR][:, :, 2] < 200\n", | |
| "pants = image[VIDEO_MASK_HUMAN_AND_CHAIR][:, :, 0] < 200\n", | |
| "shirts = image[VIDEO_MASK_HUMAN_AND_CHAIR][:, :, 1] < 200\n", | |
| "\n", | |
| "image[VIDEO_RGB][shirts] = image['checker'][shirts]\n", | |
| "image[VIDEO_RGB][pants] = image[VIDEO_RGB][pants] * gitter\n", | |
| "image[VIDEO_RGB][background] = image['room'][background]\n", | |
| "image[VIDEO_RGB][chair] = image['flower'][chair]\n", | |
| "\n", | |
| "in_3D = np.reshape(annot[subject][sequence][ANNOT_CAMERA_3D][camera, 0][frame], newshape=(-1, 3))\n", | |
| "\n", | |
| "num_keypoints = len(in_3D)\n", | |
| "\n", | |
| "# reshape for easy matrix multiplication\n", | |
| "in_3D = np.concatenate((in_3D, np.ones(shape=(num_keypoints, 1))), axis=1).transpose(1, 0)\n", | |
| "identity_transform = np.concatenate((np.eye(3), np.ones(shape=(3, 1))), axis=1)\n", | |
| "\n", | |
| "projected = np.matmul(identity_transform, in_3D)\n", | |
| "projected = np.matmul(camera_parameter[subject][sequence][camera][CAMERA_INTRINSIC], projected)\n", | |
| "projected = projected / projected[-1, :]\n", | |
| "projected = projected.transpose(1, 0)\n", | |
| "\n", | |
| "pad = np.asarray([50, 50], dtype=np.int)\n", | |
| "ul = np.asarray([np.min(projected[:, 0]), np.min(projected[:, 1])], dtype=np.int) - pad\n", | |
| "br = np.asarray([np.max(projected[:, 0]), np.max(projected[:, 1])], dtype=np.int) + pad\n", | |
| "\n", | |
| "center = (ul + br) * 0.5\n", | |
| "center = center.astype(np.int)\n", | |
| "scale = np.max(br - ul) / 200\n", | |
| "\n", | |
| "tmp = crop_image(image[VIDEO_RGB], center, scale, 30, 256)\n", | |
| "cv.imwrite('crop.jpg', tmp)\n", | |
| "\n", | |
| "image[VIDEO_MASK_HUMAN_AND_CHAIR][ul[1]:br[1], ul[0]:br[0], :] = [255, 255, 255]\n", | |
| "\n", | |
| "for keypoint in projected:\n", | |
| " x, y, _ = keypoint\n", | |
| " \n", | |
| " for tx in range(-10, 10):\n", | |
| " for ty in range(-10, 10):\n", | |
| " xx = x + tx\n", | |
| " yy = y + ty\n", | |
| " \n", | |
| " if xx < 0 or image[VIDEO_RGB].shape[1] <= xx \\\n", | |
| " or yy < 0 or image[VIDEO_RGB].shape[0] <= yy:\n", | |
| " continue\n", | |
| " \n", | |
| " image[VIDEO_RGB][int(yy), int(xx), :] = [0, 0, 255]\n", | |
| " \n", | |
| "for format in available_format:\n", | |
| " success = cv.imwrite('{format}.jpg'.format(format=format), image[format])\n", | |
| " assert success" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "for subject, sequence, format, camera in available_video:\n", | |
| " video[subject][sequence][format][camera].release()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# print('Video:', video_path)\n", | |
| "# print('Open:', video.isOpened())\n", | |
| "# print('Resolution:', '%dx%d' % (video.get(cv.CAP_PROP_FRAME_WIDTH), video.get(cv.CAP_PROP_FRAME_HEIGHT)))\n", | |
| "# print('Total frames:', video.get(cv.CAP_PROP_FRAME_COUNT))\n", | |
| "# print('Frame-rate:', video.get(cv.CAP_PROP_FPS))\n", | |
| "# print('OpenCV:', cv.__version__)" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment