Compare commits

...

16 Commits

@ -0,0 +1,81 @@
# Why this fork
This is a fork from the original NeRF++ implementation. The original code has
some isses I needed to fix in order to get the algorithm working and to be able
to reproduce the authors work.
Hopefully this version should be better explained, will the necessary scripts
and without bugs to enable someone to train a dataset from start to finish
without having delve into the code if do not want to.
## How to install
Create a virtual env if you want to
```
pip3 -m venv env
source env/bin/activate
```
Install the needed dependencies
```
pip3 install -r requirements.txt
```
You will also need [COLMAP](colmap.github.io/)
## How to run
### Dataset
First you need to create or find a dataset. A large set of images (at least 30,
more if you want a 360 degree reconstruction).
In order to maximise the quality of the reconstuction it is recommended to take
pictures with the same illumination, from differents angles from the same
subject (take a step between each picture, do not only rotate).
Remember that higher quality pictures can always be resized later if needed
with tools like ImageMagick (mogrify or convert)
```
mogrify -resize 800 *.jpg
```
### Camera pose estimation
Then use the wrapper colmap `colmap_runner/run_colmap.py`
First change the two lines corresponding to input and output at the end of the
script `img_dir` is your dataset and `output_dir` is the ouput. If your COLMAP
binary is not located in `/usr/local/bin/colmap` also change it.
```
cd colmap_runner
python3 run_colmap.py
```
Then you will need to use the `format_dataset.py` script to transform the
wrapper COLMAP binary format data into the datastructure required by NeRF++.
You again need to change the `input_path` and `output_path`.
```
python3 format_dataset.py
```
Before training you can visualise your camera poses using the
`camera_visualizer/visualize_cameras.py` script.
Note: the `cam_dict_norm.json` file is the `kai_cameras_normalized.json`
created by the colmap wrapper. Or you can use the `vizu.py` script.
### Training the model
You then need to create the configuration file, copying the example in
`configs` and tweaking the values to your need is recommended. Refer to the
help inside `ddp_train_nerf.py` if you need to understand a parameter.
```
python3 ddp_train_nerf.py --config configs/lupo/training_lupo.txt
```
Your training should be running, if you want to visualise the training in real
time using tensorboard you can use:
```
tensorboard --logdir logs --host localhost
```
And opening the given socket (`ip:port`) in your browser.
It should be 0.0.0.0:6006

@ -1,3 +1,6 @@
Notice: this is a fork of [The original NeRF++
implementation](https://github.com/Kai-46/nerfplusplus).
# NeRF++
Codebase for arXiv preprint ["NeRF++: Analyzing and Improving Neural Radiance Fields"](http://arxiv.org/abs/2010.07492)
* Work with 360 capture of large-scale unbounded scenes.

@ -0,0 +1,19 @@
"""Visualise the cameras using the ouptut from run_colmap.py"""
from camera_visualizer import visualize_cameras
import open3d as o3d
import json
import os
sphere_radius = 1.
camera_size = 0.1
base_dir = "./lupo_output_dir_4/"
cam_dict_path = os.path.join(base_dir, "posed_images/kai_cameras_normalized.json")
with open(cam_dict_path) as fd:
cam_dict = json.load(fd)
colored_camera_dicts = [([0,1,0], cam_dict)]
visualize_cameras.visualize_cameras(colored_camera_dicts, sphere_radius, camera_size=camera_size)

@ -0,0 +1,94 @@
"""Given the output of the run_colmap.py script, create a usable dataset
compatible with the NeRF++ format. Made for Unix-like (posix style path), has
not been tested for Windows."""
import os
import json
import numpy as np
import shutil
input_path = "./lupo_output_dir_4"
output_path = "./lupo_output_data_npp_3"
# Read the JSON file containing all the data
cam_dict_norm_path = os.path.join(input_path, "posed_images/kai_cameras_normalized.json")
with open(cam_dict_norm_path) as fd:
cam_dict_norm = json.load(fd)
# Make the train directories
train_path = os.path.join(output_path, "train")
train_int_path = os.path.join(train_path, "intrinsics")
train_pose_path = os.path.join(train_path, "pose")
train_rgb_path = os.path.join(train_path, "rgb")
os.makedirs(train_path, exist_ok=True)
os.makedirs(train_int_path, exist_ok=True)
os.makedirs(train_pose_path, exist_ok=True)
os.makedirs(train_rgb_path, exist_ok=True)
# Make the train directories
test_path = os.path.join(output_path, "test")
test_int_path = os.path.join(test_path, "intrinsics")
test_pose_path = os.path.join(test_path, "pose")
test_rgb_path = os.path.join(test_path, "rgb")
os.makedirs(test_path, exist_ok=True)
os.makedirs(test_int_path, exist_ok=True)
os.makedirs(test_pose_path, exist_ok=True)
os.makedirs(test_rgb_path, exist_ok=True)
# Sample images for the test set
N = 10
image_list = sorted(cam_dict_norm.keys())
sampled = np.random.choice(image_list, N, replace=False)
# Write the files with the corresponding data
for img_name in sorted(cam_dict_norm.keys()):
# Retrieve the data
K = np.array(cam_dict_norm[img_name]['K'])
W2C = np.array(cam_dict_norm[img_name]['W2C'])
img_name_path = os.path.splitext(img_name)[0] # properly espace the extension
# training set
if img_name not in sampled:
# Create the paths
train_int_img_path = os.path.join(train_int_path, img_name_path + ".txt")
train_pose_img_path = os.path.join(train_pose_path, img_name_path + ".txt")
train_rgb_img_path = os.path.join(train_rgb_path, img_name)
# Write intrinsics
with open(train_int_img_path, "w") as fd:
fd.write(" ".join(map(str, K)))
# Write poses
with open(train_pose_img_path, "w") as fd:
fd.write(" ".join(map(str, W2C)))
# Copy image
source_image_path = os.path.join(input_path, "posed_images/images", img_name)
shutil.copy(source_image_path, train_rgb_img_path)
# testing set
else:
# Create the paths
test_int_img_path = os.path.join(test_int_path, img_name_path + ".txt")
test_pose_img_path = os.path.join(test_pose_path, img_name_path + ".txt")
test_rgb_img_path = os.path.join(test_rgb_path, img_name)
# Write intrinsics
with open(test_int_img_path, "w") as fd:
fd.write(" ".join(map(str, K)))
# Write poses
with open(test_pose_img_path, "w") as fd:
fd.write(" ".join(map(str, W2C)))
# Copy image
source_image_path = os.path.join(input_path, "posed_images/images", img_name)
shutil.copy(source_image_path, test_rgb_img_path)
# Create the validation dataset
validation_path = os.path.join(output_path, "validation")
os.symlink("./test", validation_path)

@ -47,7 +47,7 @@ def normalize_cam_dict(in_cam_dict_file, out_cam_dict_file, target_radius=1., in
geometry_norm = geometry.transform(tf)
o3d.io.write_triangle_mesh(out_geometry_file, geometry_norm)
mesh_norm = mesh.transform(tf)
#mesh_norm = mesh.transform(tf)
def transform_pose(W2C, translate, scale):
C2W = np.linalg.inv(W2C)
cam_center = C2W[:3, 3]

@ -7,108 +7,104 @@ from normalize_cam_dict import normalize_cam_dict
# Note: configure the colmap_bin to the colmap executable on your machine
#########################################################################
def bash_run(cmd):
colmap_bin = '/home/zhangka2/code/colmap/build/__install__/bin/colmap'
cmd = colmap_bin + ' ' + cmd
print('\nRunning cmd: ', cmd)
colmap_bin = "/usr/local/bin/colmap"
cmd = colmap_bin + " " + cmd
print("\nRunning cmd: ", cmd)
subprocess.check_call(['/bin/bash', '-c', cmd])
subprocess.check_call(["/bin/bash", "-c", cmd])
gpu_index = '-1'
gpu_index = "-1"
def run_sift_matching(img_dir, db_file, remove_exist=False):
print('Running sift matching...')
print("Running sift matching...")
if remove_exist and os.path.exists(db_file):
os.remove(db_file) # otherwise colmap will skip sift matching
os.remove(db_file) # otherwise colmap will skip sift matching
# feature extraction
# if there's no attached display, cannot use feature extractor with GPU
cmd = ' feature_extractor --database_path {} \
--image_path {} \
--ImageReader.single_camera 1 \
--ImageReader.camera_model SIMPLE_RADIAL \
--SiftExtraction.max_image_size 5000 \
--SiftExtraction.estimate_affine_shape 0 \
--SiftExtraction.domain_size_pooling 1 \
--SiftExtraction.use_gpu 1 \
--SiftExtraction.max_num_features 16384 \
--SiftExtraction.gpu_index {}'.format(db_file, img_dir, gpu_index)
cmd = f" feature_extractor --database_path {db_file}"\
f" --image_path {img_dir}"\
f" --ImageReader.single_camera 1"\
f" --ImageReader.camera_model SIMPLE_RADIAL"\
f" --SiftExtraction.max_image_size 5000 "\
f" --SiftExtraction.estimate_affine_shape 0"\
f" --SiftExtraction.domain_size_pooling 1"\
f" --SiftExtraction.use_gpu 1"\
f" --SiftExtraction.max_num_features 16384"\
f" --SiftExtraction.gpu_index {gpu_index}"
bash_run(cmd)
# feature matching
cmd = ' exhaustive_matcher --database_path {} \
--SiftMatching.guided_matching 1 \
--SiftMatching.use_gpu 1 \
--SiftMatching.max_num_matches 65536 \
--SiftMatching.max_error 3 \
--SiftMatching.gpu_index {}'.format(db_file, gpu_index)
cmd = f" exhaustive_matcher --database_path {db_file}"\
f" --SiftMatching.guided_matching 1"\
f" --SiftMatching.use_gpu 1"\
f" --SiftMatching.max_num_matches 65536"\
f" --SiftMatching.max_error 3"\
f" --SiftMatching.gpu_index {gpu_index}"
bash_run(cmd)
def run_sfm(img_dir, db_file, out_dir):
print('Running SfM...')
cmd = ' mapper \
--database_path {} \
--image_path {} \
--output_path {} \
--Mapper.tri_min_angle 3.0 \
--Mapper.filter_min_tri_angle 3.0'.format(db_file, img_dir, out_dir)
print("Running SfM...")
cmd = f" mapper"\
f" --database_path {db_file}"\
f" --image_path {img_dir}"\
f" --output_path {out_dir}"\
f" --Mapper.tri_min_angle 3.0"\
f" --Mapper.filter_min_tri_angle 3.0"
bash_run(cmd)
def prepare_mvs(img_dir, sparse_dir, mvs_dir):
print('Preparing for MVS...')
cmd = ' image_undistorter \
--image_path {} \
--input_path {} \
--output_path {} \
--output_type COLMAP \
--max_image_size 2000'.format(img_dir, sparse_dir, mvs_dir)
print("Preparing for MVS...")
cmd = f" image_undistorter"\
f" --image_path {img_dir}"\
f" --input_path {sparse_dir}"\
f" --output_path {mvs_dir}"\
f" --output_type COLMAP"\
f" --max_image_size 2000"
bash_run(cmd)
def run_photometric_mvs(mvs_dir, window_radius):
print('Running photometric MVS...')
cmd = ' patch_match_stereo --workspace_path {} \
--PatchMatchStereo.window_radius {} \
--PatchMatchStereo.min_triangulation_angle 3.0 \
--PatchMatchStereo.filter 1 \
--PatchMatchStereo.geom_consistency 1 \
--PatchMatchStereo.gpu_index={} \
--PatchMatchStereo.num_samples 15 \
--PatchMatchStereo.num_iterations 12'.format(mvs_dir,
window_radius, gpu_index)
print("Running photometric MVS...")
cmd = f" patch_match_stereo --workspace_path {mvs_dir}"\
f" --PatchMatchStereo.window_radius {window_radius}"\
f" --PatchMatchStereo.min_triangulation_angle 3.0"\
f" --PatchMatchStereo.filter 1"\
f" --PatchMatchStereo.geom_consistency 1"\
f" --PatchMatchStereo.gpu_index={gpu_index}"\
f" --PatchMatchStereo.num_samples 15"\
f" --PatchMatchStereo.num_iterations 12"
bash_run(cmd)
def run_fuse(mvs_dir, out_ply):
print('Running depth fusion...')
cmd = ' stereo_fusion --workspace_path {} \
--output_path {} \
--input_type geometric'.format(mvs_dir, out_ply)
print("Running depth fusion...")
cmd = f"stereo_fusion"\
f" --workspace_path {mvs_dir}"\
f" --output_path {out_ply}"\
f" --input_type geometric"
bash_run(cmd)
def run_possion_mesher(in_ply, out_ply, trim):
print('Running possion mesher...')
cmd = ' poisson_mesher \
--input_path {} \
--output_path {} \
--PoissonMeshing.trim {}'.format(in_ply, out_ply, trim)
print("Running possion mesher...")
cmd = f" poisson_mesher"\
f" --input_path {in_ply}"\
f" --output_path {out_ply}"\
f" --PoissonMeshing.trim {trim}"
bash_run(cmd)
@ -116,53 +112,57 @@ def main(img_dir, out_dir, run_mvs=False):
os.makedirs(out_dir, exist_ok=True)
#### run sfm
sfm_dir = os.path.join(out_dir, 'sfm')
sfm_dir = os.path.join(out_dir, "sfm")
os.makedirs(sfm_dir, exist_ok=True)
img_dir_link = os.path.join(sfm_dir, 'images')
img_dir_link = os.path.join(sfm_dir, "images")
if os.path.exists(img_dir_link):
os.remove(img_dir_link)
os.symlink(img_dir, img_dir_link)
db_file = os.path.join(sfm_dir, 'database.db')
db_file = os.path.join(sfm_dir, "database.db")
run_sift_matching(img_dir, db_file, remove_exist=False)
sparse_dir = os.path.join(sfm_dir, 'sparse')
sparse_dir = os.path.join(sfm_dir, "sparse/0")
os.makedirs(sparse_dir, exist_ok=True)
run_sfm(img_dir, db_file, sparse_dir)
# undistort images
mvs_dir = os.path.join(out_dir, 'mvs')
mvs_dir = os.path.join(out_dir, "mvs")
os.makedirs(mvs_dir, exist_ok=True)
prepare_mvs(img_dir, sparse_dir, mvs_dir)
# extract camera parameters and undistorted images
os.makedirs(os.path.join(out_dir, 'posed_images'), exist_ok=True)
extract_all_to_dir(os.path.join(mvs_dir, 'sparse'), os.path.join(out_dir, 'posed_images'))
undistorted_img_dir = os.path.join(mvs_dir, 'images')
posed_img_dir_link = os.path.join(out_dir, 'posed_images/images')
os.makedirs(os.path.join(out_dir, "posed_images"), exist_ok=True)
extract_all_to_dir(
os.path.join(mvs_dir, "sparse"), os.path.join(out_dir, "posed_images")
)
undistorted_img_dir = os.path.join(mvs_dir, "images")
posed_img_dir_link = os.path.join(out_dir, "posed_images/images")
if os.path.exists(posed_img_dir_link):
os.remove(posed_img_dir_link)
os.symlink(undistorted_img_dir, posed_img_dir_link)
# normalize average camera center to origin, and put all cameras inside the unit sphere
normalize_cam_dict(os.path.join(out_dir, 'posed_images/kai_cameras.json'),
os.path.join(out_dir, 'posed_images/kai_cameras_normalized.json'))
normalize_cam_dict(
os.path.join(out_dir, "posed_images/kai_cameras.json"),
os.path.join(out_dir, "posed_images/kai_cameras_normalized.json"),
)
if run_mvs:
# run mvs
run_photometric_mvs(mvs_dir, window_radius=7)
out_ply = os.path.join(out_dir, 'mvs/fused.ply')
out_ply = os.path.join(out_dir, "mvs/fused.ply")
run_fuse(mvs_dir, out_ply)
out_mesh_ply = os.path.join(out_dir, 'mvs/meshed_trim_3.ply')
out_mesh_ply = os.path.join(out_dir, "mvs/meshed_trim_3.ply")
run_possion_mesher(out_ply, out_mesh_ply, trim=3)
if __name__ == '__main__':
if __name__ == "__main__":
### note: this script is intended for the case where all images are taken by the same camera, i.e., intrinisics are shared.
img_dir = ''
out_dir = ''
# CHANGE THIS TWO PATHS
img_dir = "~/code/my_awesome_dataset/"
out_dir = "~/home/code/nerfplusplus/outdir/"
run_mvs = False
main(img_dir, out_dir, run_mvs=run_mvs)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,81 @@
""""
Visualise the camera position for a non-json format dataset
"""
import numpy as np
import open3d as o3d
from camera_visualizer.visualize_cameras import (
frustums2lineset,
get_camera_frustum,
)
def format_str_to_array(input_str):
output = input_str.split(" ")
output = np.array(list(map(float, output)))
output = output.reshape((4, 4))
return output
# Paramerers
basepath = "/home/user/Downloads/lf_data/ship/camera_path"
number_of_pose = 199
img_size = [1008, 548]
camera_size = 0.1
camera_color = (0, 0.5, 1)
sphere_radius = 1.0
# Get instrisics
fd = open(basepath + "/intrinsics/000000.txt", "r")
K = fd.read()
fd.close()
# Format instrisics
K = format_str_to_array(K)
# print("K = ", K)
# Get pose
list_of_poses = []
for k in range(number_of_pose):
fd = open(basepath + f"/pose/{k:06}.txt", "r")
W2C = fd.read()
fd.close()
W2C = format_str_to_array(W2C)
C2W = np.linalg.inv(W2C)
list_of_poses.append(C2W)
# print("W2C = ", W2C)
# Draw everything
sphere = o3d.geometry.TriangleMesh.create_sphere(
radius=sphere_radius, resolution=25
)
sphere = o3d.geometry.LineSet.create_from_triangle_mesh(sphere)
sphere.paint_uniform_color((0.9, 0.9, 0.9))
coord_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(
size=0.5, origin=[0.0, 0.0, 0.0]
)
things_to_draw = [sphere, coord_frame]
frustums = []
for W2C in list_of_poses:
camera_frustum = get_camera_frustum(
img_size, K, W2C, frustum_length=camera_size, color=camera_color
)
frustums.append(camera_frustum)
cameras = frustums2lineset(frustums)
things_to_draw.append(cameras)
o3d.visualization.draw_geometries(things_to_draw)

@ -0,0 +1,179 @@
import os
import pickle
import mcubes
import numpy as np
import torch
import torch.distributed
import trimesh
from tqdm import tqdm
from ddp_train_nerf import (
cleanup,
config_parser,
create_nerf,
setup,
setup_logger,
)
parser = config_parser()
args = parser.parse_args()
# hardcode settings
args.world_size = 1
args.rank = 0
# setup
setup(args.rank, args.world_size)
start, models = create_nerf(args.rank, args)
net_0 = models["net_0"]
fg_far_depth = 1
# weird way to do it, should be change if something better exists
for idx, m in enumerate(net_0.modules()):
# print(idx, "->", m)
# foreground
if idx == 3:
fg_embedder_position = m
if idx == 4:
fg_embedder_viewdir = m
if idx == 5:
fg_mlp_net = m
# background
# if idx == 40:
# bg_embedder_position = m
# if idx == 41:
# bg_embedder_viewdir = m
# if idx == 42:
# bg_mlp_net = m
# put everything on GPU
device = "cuda"
def query_occupancy(
position, embedder_position, embedder_viewdir, mlp_net, device="cuda"
):
"""
Given a position returns the occupancy probabily of the network.
Given a poisition, appropriate embedders and the MLPNet, return the
corresponding occupancy.
Parameters
----------
position : torch.tensor
A (x,y,z) tensor of the position to query
embedder_position, embedder_viewder : nerf_network.Embedder
Positional and view directions embedders
mlp_net : nerf_network.MLPNet
A simple MLP implementation written for NeRF
device : str, optional
The torch device, can be either `cpu` or `cuda`
Returns
-------
sigma : float
The occupancy at the given position.
"""
# take a random ray direction as it does not matter for sigma
ray_d = torch.rand(3, device=device)
# normalize ray direction
ray_d_norm = torch.norm(ray_d)
ray_d = ray_d / ray_d_norm
# forge the input
nn_input = torch.cat(
(fg_embedder_position(position), fg_embedder_viewdir(ray_d)), dim=-1
)
# forward the NN
nn_raw = mlp_net(nn_input)
sigma = float(nn_raw["sigma"])
return sigma
# annonymous function
f = lambda x, y, z: query_occupancy(
torch.tensor([x, y, z], dtype=torch.float32, device=device),
fg_embedder_position,
fg_embedder_viewdir,
mlp_net,
)
def marching_cube_and_render(sigma_list, threshold):
vertices, triangles = mcubes.marching_cubes(sigma_list, threshold)
mesh = trimesh.Trimesh(vertices / N - 0.5, triangles)
mesh.show()
# position = torch.rand(3, device=device)
# position = torch.tensor([0.1, 0.1, 0.1], device=device)
ray_d = torch.rand(3, device=device)
# normalize ray direction
ray_d_norm = torch.norm(ray_d)
ray_d = ray_d / ray_d_norm
N = 256
t = np.linspace(-2, 2, N + 1)
# A cube of size 2x2x2 is necessary to contain a sphere of radius 1.0
query_pts = np.stack(np.meshgrid(t, t, t), -1).astype(np.float32)
# print(query_pts.shape)
sh = query_pts.shape
flat = query_pts.reshape([-1, 3])
# raw_voxel = torch.zeros(N+1, N+1, N+1, 4) # N, D, H, W
fg_raw_voxel = torch.zeros(N + 1, N + 1, N + 1)
# bg_raw_voxel = torch.zeros(N+1, N+1, N+1)
i = 0
for x, y, z in tqdm(flat):
position = torch.tensor([x, y, z], device=device)
# bg_position = torch.cat((position, torch.tensor([1], device=device)))
# concat the output of the embedding
fg_input = torch.cat(
(fg_embedder_position(position), fg_embedder_viewdir(ray_d)), dim=-1
)
# bg_input = torch.cat((bg_embedder_position(bg_position), bg_embedder_viewdir(ray_d)), dim=-1)
# forward
fg_raw = fg_mlp_net(fg_input)
# bg_raw = bg_mlp_net(bg_input)
# raw_voxel.append(position + float(nn_raw['sigma']))
fg_sigma = float(fg_raw["sigma"])
# bg_sigma = float(bg_raw["sigma"])
nx, ny, nz = np.unravel_index(i, (N + 1, N + 1, N + 1))
i += 1 # update index
# raw_voxel[unraveled_index] = torch.tensor([sigma, x, y, z])
fg_raw_voxel[nx, ny, nz] = fg_sigma
# bg_raw_voxel[nx, ny, nz] = bg_sigma
fg_sigma = np.array(fg_raw_voxel)
# bg_sigma = np.array(bg_raw_voxel)
threshold = 0.5
# save the raw_voxel in pickle format
fd = open("raw_voxel_256.pkl", "wb")
pickle.dump(fg_sigma, fd)
fd.close()
# vertices, triangles = mcubes.marching_cubes(sigma, threshold)
# mesh = trimesh.Trimesh(vertices / N - .5, triangles)
# mesh.show()
Loading…
Cancel
Save