Created
June 13, 2024 15:17
-
-
Save ZachL1/a3d3227c76228d2a571c4d040886874e to your computer and use it in GitHub Desktop.
Parse Omnidata's camera intrinsic and pose and convert them to OpenCV format, also provide an example of back-projection to 3D point cloud.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import math | |
| import numpy as np | |
| import cv2 | |
| import torch | |
| from plyfile import PlyData, PlyElement | |
| from pytorch3d.transforms import euler_angles_to_matrix | |
| from pytorch3d.renderer import FoVPerspectiveCameras | |
| # Copied from omnidata, which is badly named, the function returns what is actually world_to_camera | |
| def _get_cam_to_world_R_T_K(point_info): | |
| EULER_X_OFFSET_RADS = math.radians(90.0) | |
| location = point_info['camera_location'] | |
| rotation = point_info['camera_rotation_final'] | |
| fov = point_info['field_of_view_rads'] | |
| # Recover cam -> world | |
| ex, ey, ez = rotation | |
| R = euler_angles_to_matrix(torch.tensor( | |
| [(ex - EULER_X_OFFSET_RADS, -ey, -ez)], | |
| dtype=torch.double), 'XZY') | |
| Tx, Ty, Tz = location | |
| T = torch.tensor([[-Tx, Tz, Ty]], dtype=torch.double) | |
| # P3D expects world -> cam | |
| R_inv = R.transpose(1,2) | |
| T_inv = -R.bmm(T.unsqueeze(-1)).squeeze(-1) | |
| # T_inv = -R.bmm(T.unsqueeze(-1)).squeeze(-1) | |
| # T_inv = T | |
| # R_inv = R | |
| K = FoVPerspectiveCameras(R=R_inv, T=T_inv, fov=fov, degrees=False).compute_projection_matrix(znear=0.001, zfar=512.0, fov=fov, aspect_ratio=1.0, degrees=False) | |
| return dict( | |
| cam_to_world_R=R_inv.squeeze(0).float(), | |
| cam_to_world_T=T_inv.squeeze(0).float(), | |
| proj_K=K.squeeze(0).float(), | |
| proj_K_inv=K[:,:3,:3].inverse().squeeze(0).float()) | |
| def project_rgb_to_point_cloud(rgb_image, depth_map, K, R, t): | |
| """ | |
| unproject RGB-D image to point cloud. | |
| camera coordinate system: x-right, y-down, z-forward. | |
| 3D coordinates in world coordinate system can project to 2D homogeneous coordinates by: x' = K * [R|t] * X | |
| Parameters: | |
| - rgb_image: RGB image | |
| - depth_map: metric depth map | |
| - K: camera intrinsic matrix, i.e. [[fx, 0, cx], [0, fy, cy], [0, 0, 1]] | |
| - R: camera rotation matrix, i.e. world to camera | |
| - t: camera translation vector, i.e. world to camera | |
| Returns: | |
| - point_cloud: point cloud with color, Nx6 array, each row is [x, y, z, r, g, b] | |
| """ | |
| h, w = depth_map.shape | |
| i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy') | |
| # construct homogeneous pixel coordinates | |
| uv1 = np.stack([i.ravel() + 0.5, j.ravel() + 0.5, np.ones_like(i).ravel()], axis=1) | |
| # pixel coordinates -> camera coordinates | |
| K_inv = np.linalg.inv(K) | |
| xyz_camera = K_inv @ (uv1.T * depth_map.ravel()) | |
| # camera coordinates -> world coordinates | |
| R_inv = R.T # camera to world | |
| t_inv = -R_inv @ t # camera to world | |
| xyz_world = R_inv @ xyz_camera + t_inv[:, None] | |
| xyz_world = xyz_world.T | |
| # point cloud with color | |
| colors = rgb_image.reshape(-1, 3) | |
| point_cloud = np.hstack([xyz_world, colors]) | |
| # filter out invalid points | |
| valid_mask = (depth_map.ravel() > 0) & (depth_map.ravel() < 100) | |
| point_cloud = point_cloud[valid_mask] | |
| return point_cloud | |
| def save_point_cloud(points_3d, save_path, binary=True): | |
| """ | |
| Save point cloud to disk | |
| Parameters: | |
| - points_3d: point cloud with color, Nx6 array, each row is [x, y, z, r, g, b] | |
| - save_path: path to save the point cloud | |
| """ | |
| python_types = (float, float, float, int, int, int) | |
| npy_types = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('red', 'u1'), | |
| ('green', 'u1'), ('blue', 'u1')] | |
| if binary is True: | |
| # Format into Numpy structured array | |
| vertices = [] | |
| for row_idx in range(points_3d.shape[0]): | |
| cur_point = points_3d[row_idx] | |
| vertices.append( | |
| tuple( | |
| dtype(point) | |
| for dtype, point in zip(python_types, cur_point))) | |
| vertices_array = np.array(vertices, dtype=npy_types) | |
| el = PlyElement.describe(vertices_array, 'vertex') | |
| # write | |
| PlyData([el]).write(save_path) | |
| else: | |
| x = np.squeeze(points_3d[:, 0]) | |
| y = np.squeeze(points_3d[:, 1]) | |
| z = np.squeeze(points_3d[:, 2]) | |
| r = np.squeeze(points_3d[:, 3]) | |
| g = np.squeeze(points_3d[:, 4]) | |
| b = np.squeeze(points_3d[:, 5]) | |
| ply_head = 'ply\n' \ | |
| 'format ascii 1.0\n' \ | |
| 'element vertex %d\n' \ | |
| 'property float x\n' \ | |
| 'property float y\n' \ | |
| 'property float z\n' \ | |
| 'property uchar red\n' \ | |
| 'property uchar green\n' \ | |
| 'property uchar blue\n' \ | |
| 'end_header' % r.shape[0] | |
| # ---- Save ply data to disk | |
| np.savetxt(save_path, np.column_stack([x, y, z, r, g, b]), fmt='%f %f %f %d %d %d', header=ply_head, comments='') | |
| def unproject_omni(rgb_path, pc_path): | |
| depth_path = rgb_path.replace('/images/', '/depth/')[:-7] + "depth_zbuffer.png" | |
| point_info_path = rgb_path.replace('/images/', '/point_info/')[:-7] + "point_info.json" | |
| rgb_image = cv2.imread(rgb_path)[:, :, ::-1] # BGR -> RGB | |
| depth_map = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 512.0 | |
| # camera intrinsics and pose | |
| with open(point_info_path, 'r') as f: | |
| omnidata = json.load(f) | |
| # load R, T, K, the function returns what is actually world_to_camera | |
| rtk = _get_cam_to_world_R_T_K(omnidata) | |
| # camera intrinsics, convert to OpenCV format | |
| P = rtk['proj_K'].numpy() # OpenGL projection matrix | |
| w = h = 512 # omnidata should all be 512 | |
| fx, fy, cx, cy = P[0,0]*w/2, P[1,1]*h/2, (w-P[0,2]*w)/2, (P[1,2]*h+h)/2 | |
| K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) | |
| # camera extrinsics/pose, convert to OpenCV format | |
| world_to_cam_R_r = rtk['cam_to_world_R'].numpy() # for right multiply | |
| world_to_cam_R = world_to_cam_R_r.T # for left multiply | |
| world_to_cam_T = rtk['cam_to_world_T'].numpy() | |
| # Coordinate system transformation, i.e. pose in pytorch3d -> pose in OpenCV | |
| # Pytorch3D: right-handed, x-left, y-up, z-forward | |
| # OpenCV: right-handed, x-right, y-down, z-forward | |
| pytorch3d_to_opencv = np.array([[-1, 0, 0], [0, -1, 0], [0, 0, 1]]) | |
| world_to_cam_R = pytorch3d_to_opencv @ world_to_cam_R | |
| world_to_cam_T = pytorch3d_to_opencv @ world_to_cam_T | |
| # Now we get the R, T, K in OpenCV format | |
| pc = project_rgb_to_point_cloud(rgb_image, depth_map, K, world_to_cam_R, world_to_cam_T) | |
| save_point_cloud(pc, pc_path) | |
| if __name__ == '__main__': | |
| # read omnidata | |
| rgb_path_1 = '/home/dzc/workspace/stereo/gaussian-splatting/data/taskonomy/onaga/images/point_0_view_0_domain_rgb.png' | |
| rgb_path_2 = '/home/dzc/workspace/stereo/gaussian-splatting/data/taskonomy/onaga/images/point_0_view_1_domain_rgb.png' | |
| unproject_omni(rgb_path_1, 'point_cloud_1.ply') | |
| unproject_omni(rgb_path_2, 'point_cloud_2.ply') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment