Compare commits
16 Commits
a30f1a5ad1
...
master
Author | SHA1 | Date |
---|---|---|
otthorn | 67bab05cc3 | 3 years ago |
otthorn | f4602b5b7a | 3 years ago |
otthorn | 19a8e41621 | 3 years ago |
otthorn | ee030ce624 | 3 years ago |
otthorn | 67c023daba | 3 years ago |
otthorn | 33adb79bf9 | 3 years ago |
otthorn | adf7d25d37 | 3 years ago |
otthorn | 48f59c2557 | 3 years ago |
otthorn | 394fbb1ba8 | 3 years ago |
otthorn | 851c2e42a3 | 3 years ago |
otthorn | 286d69250f | 3 years ago |
otthorn | 83588aec0f | 3 years ago |
otthorn | cc9eb2cb4e | 3 years ago |
otthorn | 0acee99124 | 3 years ago |
otthorn | 365767a6ae | 3 years ago |
otthorn | 90d053705a | 3 years ago |
@ -0,0 +1,81 @@
|
||||
# Why this fork
|
||||
|
||||
This is a fork from the original NeRF++ implementation. The original code has
|
||||
some isses I needed to fix in order to get the algorithm working and to be able
|
||||
to reproduce the authors work.
|
||||
Hopefully this version should be better explained, will the necessary scripts
|
||||
and without bugs to enable someone to train a dataset from start to finish
|
||||
without having delve into the code if do not want to.
|
||||
|
||||
## How to install
|
||||
|
||||
Create a virtual env if you want to
|
||||
```
|
||||
pip3 -m venv env
|
||||
source env/bin/activate
|
||||
```
|
||||
Install the needed dependencies
|
||||
```
|
||||
pip3 install -r requirements.txt
|
||||
```
|
||||
You will also need [COLMAP](colmap.github.io/)
|
||||
|
||||
## How to run
|
||||
### Dataset
|
||||
First you need to create or find a dataset. A large set of images (at least 30,
|
||||
more if you want a 360 degree reconstruction).
|
||||
In order to maximise the quality of the reconstuction it is recommended to take
|
||||
pictures with the same illumination, from differents angles from the same
|
||||
subject (take a step between each picture, do not only rotate).
|
||||
Remember that higher quality pictures can always be resized later if needed
|
||||
with tools like ImageMagick (mogrify or convert)
|
||||
|
||||
```
|
||||
mogrify -resize 800 *.jpg
|
||||
```
|
||||
|
||||
### Camera pose estimation
|
||||
Then use the wrapper colmap `colmap_runner/run_colmap.py`
|
||||
|
||||
First change the two lines corresponding to input and output at the end of the
|
||||
script `img_dir` is your dataset and `output_dir` is the ouput. If your COLMAP
|
||||
binary is not located in `/usr/local/bin/colmap` also change it.
|
||||
|
||||
```
|
||||
cd colmap_runner
|
||||
python3 run_colmap.py
|
||||
```
|
||||
|
||||
Then you will need to use the `format_dataset.py` script to transform the
|
||||
wrapper COLMAP binary format data into the datastructure required by NeRF++.
|
||||
|
||||
You again need to change the `input_path` and `output_path`.
|
||||
|
||||
```
|
||||
python3 format_dataset.py
|
||||
```
|
||||
|
||||
Before training you can visualise your camera poses using the
|
||||
`camera_visualizer/visualize_cameras.py` script.
|
||||
|
||||
Note: the `cam_dict_norm.json` file is the `kai_cameras_normalized.json`
|
||||
created by the colmap wrapper. Or you can use the `vizu.py` script.
|
||||
|
||||
### Training the model
|
||||
|
||||
You then need to create the configuration file, copying the example in
|
||||
`configs` and tweaking the values to your need is recommended. Refer to the
|
||||
help inside `ddp_train_nerf.py` if you need to understand a parameter.
|
||||
|
||||
```
|
||||
python3 ddp_train_nerf.py --config configs/lupo/training_lupo.txt
|
||||
```
|
||||
|
||||
Your training should be running, if you want to visualise the training in real
|
||||
time using tensorboard you can use:
|
||||
|
||||
```
|
||||
tensorboard --logdir logs --host localhost
|
||||
```
|
||||
And opening the given socket (`ip:port`) in your browser.
|
||||
It should be 0.0.0.0:6006
|
@ -0,0 +1,19 @@
|
||||
"""Visualise the cameras using the ouptut from run_colmap.py"""
|
||||
from camera_visualizer import visualize_cameras
|
||||
|
||||
import open3d as o3d
|
||||
import json
|
||||
import os
|
||||
|
||||
sphere_radius = 1.
|
||||
camera_size = 0.1
|
||||
|
||||
base_dir = "./lupo_output_dir_4/"
|
||||
|
||||
cam_dict_path = os.path.join(base_dir, "posed_images/kai_cameras_normalized.json")
|
||||
with open(cam_dict_path) as fd:
|
||||
cam_dict = json.load(fd)
|
||||
|
||||
colored_camera_dicts = [([0,1,0], cam_dict)]
|
||||
|
||||
visualize_cameras.visualize_cameras(colored_camera_dicts, sphere_radius, camera_size=camera_size)
|
@ -0,0 +1,94 @@
|
||||
"""Given the output of the run_colmap.py script, create a usable dataset
|
||||
compatible with the NeRF++ format. Made for Unix-like (posix style path), has
|
||||
not been tested for Windows."""
|
||||
|
||||
import os
|
||||
import json
|
||||
import numpy as np
|
||||
import shutil
|
||||
|
||||
input_path = "./lupo_output_dir_4"
|
||||
output_path = "./lupo_output_data_npp_3"
|
||||
|
||||
# Read the JSON file containing all the data
|
||||
cam_dict_norm_path = os.path.join(input_path, "posed_images/kai_cameras_normalized.json")
|
||||
with open(cam_dict_norm_path) as fd:
|
||||
cam_dict_norm = json.load(fd)
|
||||
|
||||
# Make the train directories
|
||||
train_path = os.path.join(output_path, "train")
|
||||
train_int_path = os.path.join(train_path, "intrinsics")
|
||||
train_pose_path = os.path.join(train_path, "pose")
|
||||
train_rgb_path = os.path.join(train_path, "rgb")
|
||||
|
||||
os.makedirs(train_path, exist_ok=True)
|
||||
os.makedirs(train_int_path, exist_ok=True)
|
||||
os.makedirs(train_pose_path, exist_ok=True)
|
||||
os.makedirs(train_rgb_path, exist_ok=True)
|
||||
|
||||
# Make the train directories
|
||||
test_path = os.path.join(output_path, "test")
|
||||
test_int_path = os.path.join(test_path, "intrinsics")
|
||||
test_pose_path = os.path.join(test_path, "pose")
|
||||
test_rgb_path = os.path.join(test_path, "rgb")
|
||||
|
||||
os.makedirs(test_path, exist_ok=True)
|
||||
os.makedirs(test_int_path, exist_ok=True)
|
||||
os.makedirs(test_pose_path, exist_ok=True)
|
||||
os.makedirs(test_rgb_path, exist_ok=True)
|
||||
|
||||
# Sample images for the test set
|
||||
N = 10
|
||||
image_list = sorted(cam_dict_norm.keys())
|
||||
sampled = np.random.choice(image_list, N, replace=False)
|
||||
|
||||
# Write the files with the corresponding data
|
||||
for img_name in sorted(cam_dict_norm.keys()):
|
||||
# Retrieve the data
|
||||
K = np.array(cam_dict_norm[img_name]['K'])
|
||||
W2C = np.array(cam_dict_norm[img_name]['W2C'])
|
||||
|
||||
img_name_path = os.path.splitext(img_name)[0] # properly espace the extension
|
||||
|
||||
# training set
|
||||
if img_name not in sampled:
|
||||
# Create the paths
|
||||
train_int_img_path = os.path.join(train_int_path, img_name_path + ".txt")
|
||||
train_pose_img_path = os.path.join(train_pose_path, img_name_path + ".txt")
|
||||
train_rgb_img_path = os.path.join(train_rgb_path, img_name)
|
||||
|
||||
# Write intrinsics
|
||||
with open(train_int_img_path, "w") as fd:
|
||||
fd.write(" ".join(map(str, K)))
|
||||
|
||||
# Write poses
|
||||
with open(train_pose_img_path, "w") as fd:
|
||||
fd.write(" ".join(map(str, W2C)))
|
||||
|
||||
# Copy image
|
||||
source_image_path = os.path.join(input_path, "posed_images/images", img_name)
|
||||
shutil.copy(source_image_path, train_rgb_img_path)
|
||||
|
||||
# testing set
|
||||
else:
|
||||
# Create the paths
|
||||
test_int_img_path = os.path.join(test_int_path, img_name_path + ".txt")
|
||||
test_pose_img_path = os.path.join(test_pose_path, img_name_path + ".txt")
|
||||
test_rgb_img_path = os.path.join(test_rgb_path, img_name)
|
||||
|
||||
# Write intrinsics
|
||||
with open(test_int_img_path, "w") as fd:
|
||||
fd.write(" ".join(map(str, K)))
|
||||
|
||||
# Write poses
|
||||
with open(test_pose_img_path, "w") as fd:
|
||||
fd.write(" ".join(map(str, W2C)))
|
||||
|
||||
# Copy image
|
||||
source_image_path = os.path.join(input_path, "posed_images/images", img_name)
|
||||
shutil.copy(source_image_path, test_rgb_img_path)
|
||||
|
||||
|
||||
# Create the validation dataset
|
||||
validation_path = os.path.join(output_path, "validation")
|
||||
os.symlink("./test", validation_path)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,81 @@
|
||||
""""
|
||||
Visualise the camera position for a non-json format dataset
|
||||
"""
|
||||
|
||||
|
||||
import numpy as np
|
||||
import open3d as o3d
|
||||
|
||||
from camera_visualizer.visualize_cameras import (
|
||||
frustums2lineset,
|
||||
get_camera_frustum,
|
||||
)
|
||||
|
||||
|
||||
def format_str_to_array(input_str):
|
||||
|
||||
output = input_str.split(" ")
|
||||
output = np.array(list(map(float, output)))
|
||||
output = output.reshape((4, 4))
|
||||
|
||||
return output
|
||||
|
||||
|
||||
# Paramerers
|
||||
basepath = "/home/user/Downloads/lf_data/ship/camera_path"
|
||||
number_of_pose = 199
|
||||
img_size = [1008, 548]
|
||||
camera_size = 0.1
|
||||
camera_color = (0, 0.5, 1)
|
||||
sphere_radius = 1.0
|
||||
|
||||
# Get instrisics
|
||||
fd = open(basepath + "/intrinsics/000000.txt", "r")
|
||||
K = fd.read()
|
||||
fd.close()
|
||||
|
||||
# Format instrisics
|
||||
K = format_str_to_array(K)
|
||||
# print("K = ", K)
|
||||
|
||||
# Get pose
|
||||
|
||||
list_of_poses = []
|
||||
|
||||
for k in range(number_of_pose):
|
||||
fd = open(basepath + f"/pose/{k:06}.txt", "r")
|
||||
W2C = fd.read()
|
||||
fd.close()
|
||||
|
||||
W2C = format_str_to_array(W2C)
|
||||
C2W = np.linalg.inv(W2C)
|
||||
list_of_poses.append(C2W)
|
||||
|
||||
# print("W2C = ", W2C)
|
||||
|
||||
|
||||
# Draw everything
|
||||
sphere = o3d.geometry.TriangleMesh.create_sphere(
|
||||
radius=sphere_radius, resolution=25
|
||||
)
|
||||
sphere = o3d.geometry.LineSet.create_from_triangle_mesh(sphere)
|
||||
sphere.paint_uniform_color((0.9, 0.9, 0.9))
|
||||
|
||||
coord_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(
|
||||
size=0.5, origin=[0.0, 0.0, 0.0]
|
||||
)
|
||||
things_to_draw = [sphere, coord_frame]
|
||||
|
||||
|
||||
frustums = []
|
||||
|
||||
for W2C in list_of_poses:
|
||||
camera_frustum = get_camera_frustum(
|
||||
img_size, K, W2C, frustum_length=camera_size, color=camera_color
|
||||
)
|
||||
frustums.append(camera_frustum)
|
||||
|
||||
cameras = frustums2lineset(frustums)
|
||||
things_to_draw.append(cameras)
|
||||
|
||||
o3d.visualization.draw_geometries(things_to_draw)
|
@ -0,0 +1,179 @@
|
||||
import os
|
||||
import pickle
|
||||
|
||||
import mcubes
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.distributed
|
||||
import trimesh
|
||||
from tqdm import tqdm
|
||||
|
||||
from ddp_train_nerf import (
|
||||
cleanup,
|
||||
config_parser,
|
||||
create_nerf,
|
||||
setup,
|
||||
setup_logger,
|
||||
)
|
||||
|
||||
parser = config_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
# hardcode settings
|
||||
args.world_size = 1
|
||||
args.rank = 0
|
||||
|
||||
# setup
|
||||
setup(args.rank, args.world_size)
|
||||
start, models = create_nerf(args.rank, args)
|
||||
|
||||
net_0 = models["net_0"]
|
||||
|
||||
fg_far_depth = 1
|
||||
|
||||
# weird way to do it, should be change if something better exists
|
||||
for idx, m in enumerate(net_0.modules()):
|
||||
# print(idx, "->", m)
|
||||
|
||||
# foreground
|
||||
if idx == 3:
|
||||
fg_embedder_position = m
|
||||
if idx == 4:
|
||||
fg_embedder_viewdir = m
|
||||
if idx == 5:
|
||||
fg_mlp_net = m
|
||||
|
||||
# background
|
||||
# if idx == 40:
|
||||
# bg_embedder_position = m
|
||||
# if idx == 41:
|
||||
# bg_embedder_viewdir = m
|
||||
# if idx == 42:
|
||||
# bg_mlp_net = m
|
||||
|
||||
# put everything on GPU
|
||||
device = "cuda"
|
||||
|
||||
|
||||
def query_occupancy(
|
||||
position, embedder_position, embedder_viewdir, mlp_net, device="cuda"
|
||||
):
|
||||
"""
|
||||
Given a position returns the occupancy probabily of the network.
|
||||
|
||||
Given a poisition, appropriate embedders and the MLPNet, return the
|
||||
corresponding occupancy.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
position : torch.tensor
|
||||
A (x,y,z) tensor of the position to query
|
||||
embedder_position, embedder_viewder : nerf_network.Embedder
|
||||
Positional and view directions embedders
|
||||
mlp_net : nerf_network.MLPNet
|
||||
A simple MLP implementation written for NeRF
|
||||
device : str, optional
|
||||
The torch device, can be either `cpu` or `cuda`
|
||||
|
||||
Returns
|
||||
-------
|
||||
sigma : float
|
||||
The occupancy at the given position.
|
||||
"""
|
||||
|
||||
# take a random ray direction as it does not matter for sigma
|
||||
ray_d = torch.rand(3, device=device)
|
||||
|
||||
# normalize ray direction
|
||||
ray_d_norm = torch.norm(ray_d)
|
||||
ray_d = ray_d / ray_d_norm
|
||||
|
||||
# forge the input
|
||||
nn_input = torch.cat(
|
||||
(fg_embedder_position(position), fg_embedder_viewdir(ray_d)), dim=-1
|
||||
)
|
||||
|
||||
# forward the NN
|
||||
nn_raw = mlp_net(nn_input)
|
||||
sigma = float(nn_raw["sigma"])
|
||||
|
||||
return sigma
|
||||
|
||||
|
||||
# annonymous function
|
||||
f = lambda x, y, z: query_occupancy(
|
||||
torch.tensor([x, y, z], dtype=torch.float32, device=device),
|
||||
fg_embedder_position,
|
||||
fg_embedder_viewdir,
|
||||
mlp_net,
|
||||
)
|
||||
|
||||
|
||||
def marching_cube_and_render(sigma_list, threshold):
|
||||
|
||||
vertices, triangles = mcubes.marching_cubes(sigma_list, threshold)
|
||||
mesh = trimesh.Trimesh(vertices / N - 0.5, triangles)
|
||||
mesh.show()
|
||||
|
||||
|
||||
# position = torch.rand(3, device=device)
|
||||
# position = torch.tensor([0.1, 0.1, 0.1], device=device)
|
||||
|
||||
ray_d = torch.rand(3, device=device)
|
||||
# normalize ray direction
|
||||
ray_d_norm = torch.norm(ray_d)
|
||||
ray_d = ray_d / ray_d_norm
|
||||
|
||||
|
||||
N = 256
|
||||
t = np.linspace(-2, 2, N + 1)
|
||||
# A cube of size 2x2x2 is necessary to contain a sphere of radius 1.0
|
||||
|
||||
query_pts = np.stack(np.meshgrid(t, t, t), -1).astype(np.float32)
|
||||
# print(query_pts.shape)
|
||||
sh = query_pts.shape
|
||||
flat = query_pts.reshape([-1, 3])
|
||||
|
||||
# raw_voxel = torch.zeros(N+1, N+1, N+1, 4) # N, D, H, W
|
||||
fg_raw_voxel = torch.zeros(N + 1, N + 1, N + 1)
|
||||
# bg_raw_voxel = torch.zeros(N+1, N+1, N+1)
|
||||
|
||||
i = 0
|
||||
for x, y, z in tqdm(flat):
|
||||
|
||||
position = torch.tensor([x, y, z], device=device)
|
||||
# bg_position = torch.cat((position, torch.tensor([1], device=device)))
|
||||
|
||||
# concat the output of the embedding
|
||||
fg_input = torch.cat(
|
||||
(fg_embedder_position(position), fg_embedder_viewdir(ray_d)), dim=-1
|
||||
)
|
||||
# bg_input = torch.cat((bg_embedder_position(bg_position), bg_embedder_viewdir(ray_d)), dim=-1)
|
||||
|
||||
# forward
|
||||
fg_raw = fg_mlp_net(fg_input)
|
||||
# bg_raw = bg_mlp_net(bg_input)
|
||||
|
||||
# raw_voxel.append(position + float(nn_raw['sigma']))
|
||||
fg_sigma = float(fg_raw["sigma"])
|
||||
# bg_sigma = float(bg_raw["sigma"])
|
||||
|
||||
nx, ny, nz = np.unravel_index(i, (N + 1, N + 1, N + 1))
|
||||
i += 1 # update index
|
||||
# raw_voxel[unraveled_index] = torch.tensor([sigma, x, y, z])
|
||||
fg_raw_voxel[nx, ny, nz] = fg_sigma
|
||||
# bg_raw_voxel[nx, ny, nz] = bg_sigma
|
||||
|
||||
|
||||
fg_sigma = np.array(fg_raw_voxel)
|
||||
# bg_sigma = np.array(bg_raw_voxel)
|
||||
threshold = 0.5
|
||||
|
||||
# save the raw_voxel in pickle format
|
||||
fd = open("raw_voxel_256.pkl", "wb")
|
||||
pickle.dump(fg_sigma, fd)
|
||||
fd.close()
|
||||
|
||||
# vertices, triangles = mcubes.marching_cubes(sigma, threshold)
|
||||
# mesh = trimesh.Trimesh(vertices / N - .5, triangles)
|
||||
# mesh.show()
|
Loading…
Reference in New Issue