nerf_plus_plus/ddp_train_nerf.py
2021-06-10 17:03:06 +02:00

973 lines
32 KiB
Python

import json
import logging
import os
import time
from collections import OrderedDict
import numpy as np
import torch
import torch.distributed
import torch.multiprocessing
import torch.nn as nn
import torch.optim
from tensorboardX import SummaryWriter
from torch.nn.parallel import DistributedDataParallel as DDP
from tqdm import tqdm
from data_loader_split import load_data_split
from ddp_model import NerfNetWithAutoExpo
from utils import TINY_NUMBER, colorize, img2mse, img_HWC2CHW, mse2psnr
logger = logging.getLogger(__package__)
def setup_logger():
# create logger
logger = logging.getLogger(__package__)
# logger.setLevel(logging.DEBUG)
logger.setLevel(logging.INFO)
# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
# create formatter
formatter = logging.Formatter(
"%(asctime)s [%(levelname)s] %(name)s: %(message)s"
)
# add formatter to ch
ch.setFormatter(formatter)
# add ch to logger
logger.addHandler(ch)
def intersect_sphere(ray_o, ray_d):
"""
ray_o, ray_d: [..., 3]
compute the depth of the intersection point between this ray and unit sphere
"""
# note: d1 becomes negative if this mid point is behind camera
d1 = -torch.sum(ray_d * ray_o, dim=-1) / torch.sum(ray_d * ray_d, dim=-1)
p = ray_o + d1.unsqueeze(-1) * ray_d
# consider the case where the ray does not intersect the sphere
ray_d_cos = 1.0 / torch.norm(ray_d, dim=-1)
p_norm_sq = torch.sum(p * p, dim=-1)
if (p_norm_sq >= 1.0).any():
raise Exception(
"Not all your cameras are bounded by the unit sphere; "
"please make sure the cameras are normalized properly!"
)
d2 = torch.sqrt(1.0 - p_norm_sq) * ray_d_cos
return d1 + d2
def perturb_samples(z_vals):
# get intervals between samples
mids = 0.5 * (z_vals[..., 1:] + z_vals[..., :-1])
upper = torch.cat([mids, z_vals[..., -1:]], dim=-1)
lower = torch.cat([z_vals[..., 0:1], mids], dim=-1)
# uniform samples in those intervals
t_rand = torch.rand_like(z_vals)
z_vals = lower + (upper - lower) * t_rand # [N_rays, N_samples]
return z_vals
def sample_pdf(bins, weights, N_samples, det=False):
"""
:param bins: tensor of shape [..., M+1], M is the number of bins
:param weights: tensor of shape [..., M]
:param N_samples: number of samples along each ray
:param det: if True, will perform deterministic sampling
:return: [..., N_samples]
"""
# Get pdf
weights = weights + TINY_NUMBER # prevent nans
pdf = weights / torch.sum(weights, dim=-1, keepdim=True) # [..., M]
cdf = torch.cumsum(pdf, dim=-1) # [..., M]
cdf = torch.cat(
[torch.zeros_like(cdf[..., 0:1]), cdf], dim=-1
) # [..., M+1]
# Take uniform samples
dots_sh = list(weights.shape[:-1])
M = weights.shape[-1]
min_cdf = 0.00
max_cdf = 1.00 # prevent outlier samples
if det:
u = torch.linspace(min_cdf, max_cdf, N_samples, device=bins.device)
u = u.view([1] * len(dots_sh) + [N_samples]).expand(
dots_sh
+ [
N_samples,
]
) # [..., N_samples]
else:
sh = dots_sh + [N_samples]
u = (
torch.rand(*sh, device=bins.device) * (max_cdf - min_cdf) + min_cdf
) # [..., N_samples]
# Invert CDF
# [..., N_samples, 1] >= [..., 1, M] ----> [..., N_samples, M] ----> [..., N_samples,]
above_inds = torch.sum(
u.unsqueeze(-1) >= cdf[..., :M].unsqueeze(-2), dim=-1
).long()
# random sample inside each bin
below_inds = torch.clamp(above_inds - 1, min=0)
inds_g = torch.stack(
(below_inds, above_inds), dim=-1
) # [..., N_samples, 2]
cdf = cdf.unsqueeze(-2).expand(
dots_sh + [N_samples, M + 1]
) # [..., N_samples, M+1]
cdf_g = torch.gather(
input=cdf, dim=-1, index=inds_g
) # [..., N_samples, 2]
bins = bins.unsqueeze(-2).expand(
dots_sh + [N_samples, M + 1]
) # [..., N_samples, M+1]
bins_g = torch.gather(
input=bins, dim=-1, index=inds_g
) # [..., N_samples, 2]
# fix numeric issue
denom = cdf_g[..., 1] - cdf_g[..., 0] # [..., N_samples]
denom = torch.where(denom < TINY_NUMBER, torch.ones_like(denom), denom)
t = (u - cdf_g[..., 0]) / denom
samples = bins_g[..., 0] + t * (
bins_g[..., 1] - bins_g[..., 0] + TINY_NUMBER
)
return samples
def render_single_image(rank, world_size, models, ray_sampler, chunk_size):
##### parallel rendering of a single image
ray_batch = ray_sampler.get_all()
if (ray_batch["ray_d"].shape[0] // world_size) * world_size != ray_batch[
"ray_d"
].shape[0]:
raise Exception(
"Number of pixels in the image is not divisible by the number of GPUs!\n\t# pixels: {}\n\t# GPUs: {}".format(
ray_batch["ray_d"].shape[0], world_size
)
)
# split into ranks; make sure different processes don't overlap
rank_split_sizes = [
ray_batch["ray_d"].shape[0] // world_size,
] * world_size
rank_split_sizes[-1] = ray_batch["ray_d"].shape[0] - sum(
rank_split_sizes[:-1]
)
for key in ray_batch:
if torch.is_tensor(ray_batch[key]):
ray_batch[key] = torch.split(ray_batch[key], rank_split_sizes)[
rank
].to(rank)
# split into chunks and render inside each process
ray_batch_split = OrderedDict()
for key in ray_batch:
if torch.is_tensor(ray_batch[key]):
ray_batch_split[key] = torch.split(ray_batch[key], chunk_size)
# forward and backward
ret_merge_chunk = [OrderedDict() for _ in range(models["cascade_level"])]
for s in range(len(ray_batch_split["ray_d"])):
ray_o = ray_batch_split["ray_o"][s]
ray_d = ray_batch_split["ray_d"][s]
min_depth = ray_batch_split["min_depth"][s]
dots_sh = list(ray_d.shape[:-1])
for m in range(models["cascade_level"]):
net = models["net_{}".format(m)]
# sample depths
N_samples = models["cascade_samples"][m]
if m == 0:
# foreground depth
fg_far_depth = intersect_sphere(ray_o, ray_d) # [...,]
fg_near_depth = min_depth # [..., ]
step = (fg_far_depth - fg_near_depth) / (N_samples - 1)
fg_depth = torch.stack(
[fg_near_depth + i * step for i in range(N_samples)],
dim=-1,
) # [..., N_samples]
# background depth
bg_depth = (
torch.linspace(0.0, 1.0, N_samples)
.view(
[
1,
]
* len(dots_sh)
+ [
N_samples,
]
)
.expand(
dots_sh
+ [
N_samples,
]
)
.to(rank)
)
# delete unused memory
del fg_near_depth
del step
torch.cuda.empty_cache()
else:
# sample pdf and concat with earlier samples
fg_weights = ret["fg_weights"].clone().detach()
fg_depth_mid = 0.5 * (
fg_depth[..., 1:] + fg_depth[..., :-1]
) # [..., N_samples-1]
fg_weights = fg_weights[..., 1:-1] # [..., N_samples-2]
fg_depth_samples = sample_pdf(
bins=fg_depth_mid,
weights=fg_weights,
N_samples=N_samples,
det=True,
) # [..., N_samples]
fg_depth, _ = torch.sort(
torch.cat((fg_depth, fg_depth_samples), dim=-1)
)
# sample pdf and concat with earlier samples
bg_weights = ret["bg_weights"].clone().detach()
bg_depth_mid = 0.5 * (bg_depth[..., 1:] + bg_depth[..., :-1])
bg_weights = bg_weights[..., 1:-1] # [..., N_samples-2]
bg_depth_samples = sample_pdf(
bins=bg_depth_mid,
weights=bg_weights,
N_samples=N_samples,
det=True,
) # [..., N_samples]
bg_depth, _ = torch.sort(
torch.cat((bg_depth, bg_depth_samples), dim=-1)
)
# delete unused memory
del fg_weights
del fg_depth_mid
del fg_depth_samples
del bg_weights
del bg_depth_mid
del bg_depth_samples
torch.cuda.empty_cache()
with torch.no_grad():
ret = net(ray_o, ray_d, fg_far_depth, fg_depth, bg_depth)
for key in ret:
if key not in ["fg_weights", "bg_weights"]:
if torch.is_tensor(ret[key]):
if key not in ret_merge_chunk[m]:
ret_merge_chunk[m][key] = [
ret[key].cpu(),
]
else:
ret_merge_chunk[m][key].append(ret[key].cpu())
ret[key] = None
# clean unused memory
torch.cuda.empty_cache()
# merge results from different chunks
for m in range(len(ret_merge_chunk)):
for key in ret_merge_chunk[m]:
ret_merge_chunk[m][key] = torch.cat(ret_merge_chunk[m][key], dim=0)
# merge results from different processes
if rank == 0:
ret_merge_rank = [OrderedDict() for _ in range(len(ret_merge_chunk))]
for m in range(len(ret_merge_chunk)):
for key in ret_merge_chunk[m]:
# generate tensors to store results from other processes
sh = list(ret_merge_chunk[m][key].shape[1:])
ret_merge_rank[m][key] = [
torch.zeros(
*[
size,
]
+ sh,
dtype=torch.float32
)
for size in rank_split_sizes
]
torch.distributed.gather(
ret_merge_chunk[m][key], ret_merge_rank[m][key]
)
ret_merge_rank[m][key] = (
torch.cat(ret_merge_rank[m][key], dim=0)
.reshape((ray_sampler.H, ray_sampler.W, -1))
.squeeze()
)
# print(m, key, ret_merge_rank[m][key].shape)
else: # send results to main process
for m in range(len(ret_merge_chunk)):
for key in ret_merge_chunk[m]:
torch.distributed.gather(ret_merge_chunk[m][key])
# only rank 0 program returns
if rank == 0:
return ret_merge_rank
else:
return None
def log_view_to_tb(writer, global_step, log_data, gt_img, mask, prefix=""):
rgb_im = img_HWC2CHW(torch.from_numpy(gt_img))
writer.add_image(prefix + "rgb_gt", rgb_im, global_step)
for m in range(len(log_data)):
rgb_im = img_HWC2CHW(log_data[m]["rgb"])
rgb_im = torch.clamp(
rgb_im, min=0.0, max=1.0
) # just in case diffuse+specular>1
writer.add_image(
prefix + "level_{}/rgb".format(m), rgb_im, global_step
)
rgb_im = img_HWC2CHW(log_data[m]["fg_rgb"])
rgb_im = torch.clamp(
rgb_im, min=0.0, max=1.0
) # just in case diffuse+specular>1
writer.add_image(
prefix + "level_{}/fg_rgb".format(m), rgb_im, global_step
)
depth = log_data[m]["fg_depth"]
depth_im = img_HWC2CHW(
colorize(depth, cmap_name="jet", append_cbar=True, mask=mask)
)
writer.add_image(
prefix + "level_{}/fg_depth".format(m), depth_im, global_step
)
rgb_im = img_HWC2CHW(log_data[m]["bg_rgb"])
rgb_im = torch.clamp(
rgb_im, min=0.0, max=1.0
) # just in case diffuse+specular>1
writer.add_image(
prefix + "level_{}/bg_rgb".format(m), rgb_im, global_step
)
depth = log_data[m]["bg_depth"]
depth_im = img_HWC2CHW(
colorize(depth, cmap_name="jet", append_cbar=True, mask=mask)
)
writer.add_image(
prefix + "level_{}/bg_depth".format(m), depth_im, global_step
)
bg_lambda = log_data[m]["bg_lambda"]
bg_lambda_im = img_HWC2CHW(
colorize(bg_lambda, cmap_name="hot", append_cbar=True, mask=mask)
)
writer.add_image(
prefix + "level_{}/bg_lambda".format(m), bg_lambda_im, global_step
)
def setup(rank, world_size):
os.environ["MASTER_ADDR"] = "localhost"
# port = np.random.randint(12355, 12399)
# os.environ['MASTER_PORT'] = '{}'.format(port)
os.environ["MASTER_PORT"] = "12355"
# initialize the process group
torch.distributed.init_process_group(
"gloo", rank=rank, world_size=world_size
)
def cleanup():
torch.distributed.destroy_process_group()
def create_nerf(rank, args):
###### create network and wrap in ddp; each process should do this
# fix random seed just to make sure the network is initialized with same weights at different processes
torch.manual_seed(777)
# very important!!! otherwise it might introduce extra memory in rank=0 gpu
torch.cuda.set_device(rank)
models = OrderedDict()
models["cascade_level"] = args.cascade_level
models["cascade_samples"] = [
int(x.strip()) for x in args.cascade_samples.split(",")
]
for m in range(models["cascade_level"]):
img_names = None
if args.optim_autoexpo:
# load training image names for autoexposure
f = os.path.join(args.basedir, args.expname, "train_images.json")
with open(f) as file:
img_names = json.load(file)
net = NerfNetWithAutoExpo(
args, optim_autoexpo=args.optim_autoexpo, img_names=img_names
).to(rank)
net = DDP(
net,
device_ids=[rank],
output_device=rank,
find_unused_parameters=True,
)
# net = DDP(net, device_ids=[rank], output_device=rank)
optim = torch.optim.Adam(net.parameters(), lr=args.lrate)
models["net_{}".format(m)] = net
models["optim_{}".format(m)] = optim
start = -1
###### load pretrained weights; each process should do this
if (args.ckpt_path is not None) and (os.path.isfile(args.ckpt_path)):
ckpts = [args.ckpt_path]
else:
ckpts = [
os.path.join(args.basedir, args.expname, f)
for f in sorted(
os.listdir(os.path.join(args.basedir, args.expname))
)
if f.endswith(".pth")
]
def path2iter(path):
tmp = os.path.basename(path)[:-4]
idx = tmp.rfind("_")
return int(tmp[idx + 1 :])
ckpts = sorted(ckpts, key=path2iter)
logger.info("Found ckpts: {}".format(ckpts))
if len(ckpts) > 0 and not args.no_reload:
fpath = ckpts[-1]
logger.info("Reloading from: {}".format(fpath))
start = path2iter(fpath)
# configure map_location properly for different processes
map_location = {"cuda:%d" % 0: "cuda:%d" % rank}
to_load = torch.load(fpath, map_location=map_location)
for m in range(models["cascade_level"]):
for name in ["net_{}".format(m), "optim_{}".format(m)]:
models[name].load_state_dict(to_load[name])
return start, models
def ddp_train_nerf(rank, args):
###### set up multi-processing
setup(rank, args.world_size)
###### set up logger
logger = logging.getLogger(__package__)
setup_logger()
###### decide chunk size according to gpu memory
logger.info(
"gpu_mem: {}".format(
torch.cuda.get_device_properties(rank).total_memory
)
)
if torch.cuda.get_device_properties(rank).total_memory / 1e9 > 14:
logger.info("setting batch size according to 24G gpu")
args.N_rand = 1024
args.chunk_size = 8192
else:
logger.info("setting batch size according to 12G gpu")
args.N_rand = 512
args.chunk_size = 4096
###### Create log dir and copy the config file
if rank == 0:
os.makedirs(os.path.join(args.basedir, args.expname), exist_ok=True)
f = os.path.join(args.basedir, args.expname, "args.txt")
with open(f, "w") as file:
for arg in sorted(vars(args)):
attr = getattr(args, arg)
file.write("{} = {}\n".format(arg, attr))
if args.config is not None:
f = os.path.join(args.basedir, args.expname, "config.txt")
with open(f, "w") as file:
file.write(open(args.config, "r").read())
torch.distributed.barrier()
ray_samplers = load_data_split(
args.datadir,
args.scene,
split="train",
try_load_min_depth=args.load_min_depth,
)
val_ray_samplers = load_data_split(
args.datadir,
args.scene,
split="validation",
try_load_min_depth=args.load_min_depth,
skip=args.testskip,
)
# write training image names for autoexposure
if args.optim_autoexpo:
f = os.path.join(args.basedir, args.expname, "train_images.json")
with open(f, "w") as file:
img_names = [
ray_samplers[i].img_path for i in range(len(ray_samplers))
]
json.dump(img_names, file, indent=2)
###### create network and wrap in ddp; each process should do this
start, models = create_nerf(rank, args)
##### important!!!
# make sure different processes sample different rays
np.random.seed((rank + 1) * 777)
# make sure different processes have different perturbations in depth samples
torch.manual_seed((rank + 1) * 777)
##### only main process should do the logging
if rank == 0:
writer = SummaryWriter(
os.path.join(args.basedir, "summaries", args.expname)
)
# start training
what_val_to_log = 0 # helper variable for parallel rendering of a image
what_train_to_log = 0
for global_step in tqdm(range(start + 1, start + 1 + args.N_iters)):
time0 = time.time()
scalars_to_log = OrderedDict()
### Start of core optimization loop
scalars_to_log["resolution"] = ray_samplers[0].resolution_level
# randomly sample rays and move to device
i = np.random.randint(low=0, high=len(ray_samplers))
ray_batch = ray_samplers[i].random_sample(
args.N_rand, center_crop=False
)
for key in ray_batch:
if torch.is_tensor(ray_batch[key]):
ray_batch[key] = ray_batch[key].to(rank)
# forward and backward
dots_sh = list(ray_batch["ray_d"].shape[:-1]) # number of rays
all_rets = [] # results on different cascade levels
for m in range(models["cascade_level"]):
optim = models["optim_{}".format(m)]
net = models["net_{}".format(m)]
# sample depths
N_samples = models["cascade_samples"][m]
if m == 0:
# foreground depth
fg_far_depth = intersect_sphere(
ray_batch["ray_o"], ray_batch["ray_d"]
) # [...,]
fg_near_depth = ray_batch["min_depth"] # [..., ]
step = (fg_far_depth - fg_near_depth) / (N_samples - 1)
fg_depth = torch.stack(
[fg_near_depth + i * step for i in range(N_samples)],
dim=-1,
) # [..., N_samples]
fg_depth = perturb_samples(
fg_depth
) # random perturbation during training
# background depth
bg_depth = (
torch.linspace(0.0, 1.0, N_samples)
.view(
[
1,
]
* len(dots_sh)
+ [
N_samples,
]
)
.expand(
dots_sh
+ [
N_samples,
]
)
.to(rank)
)
bg_depth = perturb_samples(
bg_depth
) # random perturbation during training
else:
# sample pdf and concat with earlier samples
fg_weights = ret["fg_weights"].clone().detach()
fg_depth_mid = 0.5 * (
fg_depth[..., 1:] + fg_depth[..., :-1]
) # [..., N_samples-1]
fg_weights = fg_weights[..., 1:-1] # [..., N_samples-2]
fg_depth_samples = sample_pdf(
bins=fg_depth_mid,
weights=fg_weights,
N_samples=N_samples,
det=False,
) # [..., N_samples]
fg_depth, _ = torch.sort(
torch.cat((fg_depth, fg_depth_samples), dim=-1)
)
# sample pdf and concat with earlier samples
bg_weights = ret["bg_weights"].clone().detach()
bg_depth_mid = 0.5 * (bg_depth[..., 1:] + bg_depth[..., :-1])
bg_weights = bg_weights[..., 1:-1] # [..., N_samples-2]
bg_depth_samples = sample_pdf(
bins=bg_depth_mid,
weights=bg_weights,
N_samples=N_samples,
det=False,
) # [..., N_samples]
bg_depth, _ = torch.sort(
torch.cat((bg_depth, bg_depth_samples), dim=-1)
)
optim.zero_grad()
ret = net(
ray_batch["ray_o"],
ray_batch["ray_d"],
fg_far_depth,
fg_depth,
bg_depth,
img_name=ray_batch["img_name"],
)
all_rets.append(ret)
rgb_gt = ray_batch["rgb"].to(rank)
if "autoexpo" in ret:
scale, shift = ret["autoexpo"]
scalars_to_log[
"level_{}/autoexpo_scale".format(m)
] = scale.item()
scalars_to_log[
"level_{}/autoexpo_shift".format(m)
] = shift.item()
# rgb_gt = scale * rgb_gt + shift
rgb_pred = (ret["rgb"] - shift) / scale
rgb_loss = img2mse(rgb_pred, rgb_gt)
loss = rgb_loss + args.lambda_autoexpo * (
torch.abs(scale - 1.0) + torch.abs(shift)
)
else:
rgb_loss = img2mse(ret["rgb"], rgb_gt)
loss = rgb_loss
scalars_to_log["level_{}/loss".format(m)] = rgb_loss.item()
scalars_to_log["level_{}/pnsr".format(m)] = mse2psnr(
rgb_loss.item()
)
loss.backward()
optim.step()
# # clean unused memory
# torch.cuda.empty_cache()
### end of core optimization loop
dt = time.time() - time0
scalars_to_log["iter_time"] = dt
### only main process should do the logging
if rank == 0 and (global_step % args.i_print == 0 or global_step < 10):
logstr = "{} step: {} ".format(args.expname, global_step)
for k in scalars_to_log:
logstr += " {}: {:.6f}".format(k, scalars_to_log[k])
writer.add_scalar(k, scalars_to_log[k], global_step)
logger.info(logstr)
### each process should do this; but only main process merges the results
if global_step % args.i_img == 0 or global_step == start + 1:
#### critical: make sure each process is working on the same random image
time0 = time.time()
idx = what_val_to_log % len(val_ray_samplers)
log_data = render_single_image(
rank,
args.world_size,
models,
val_ray_samplers[idx],
args.chunk_size,
)
what_val_to_log += 1
dt = time.time() - time0
if rank == 0: # only main process should do this
logger.info(
"Logged a random validation view in {} seconds".format(dt)
)
log_view_to_tb(
writer,
global_step,
log_data,
gt_img=val_ray_samplers[idx].get_img(),
mask=None,
prefix="val/",
)
time0 = time.time()
idx = what_train_to_log % len(ray_samplers)
log_data = render_single_image(
rank,
args.world_size,
models,
ray_samplers[idx],
args.chunk_size,
)
what_train_to_log += 1
dt = time.time() - time0
if rank == 0: # only main process should do this
logger.info(
"Logged a random training view in {} seconds".format(dt)
)
log_view_to_tb(
writer,
global_step,
log_data,
gt_img=ray_samplers[idx].get_img(),
mask=None,
prefix="train/",
)
del log_data
torch.cuda.empty_cache()
if rank == 0 and (
global_step % args.i_weights == 0 and global_step > 0
):
# saving checkpoints and logging
fpath = os.path.join(
args.basedir,
args.expname,
"model_{:06d}.pth".format(global_step),
)
to_save = OrderedDict()
for m in range(models["cascade_level"]):
name = "net_{}".format(m)
to_save[name] = models[name].state_dict()
name = "optim_{}".format(m)
to_save[name] = models[name].state_dict()
torch.save(to_save, fpath)
# clean up for multi-processing
cleanup()
def config_parser():
import configargparse
parser = configargparse.ArgumentParser()
parser.add_argument(
"--config",
is_config_file=True,
help="config file path",
)
parser.add_argument(
"--expname",
type=str,
help="experiment name",
)
parser.add_argument(
"--basedir",
type=str,
default="./logs/",
help="where to store ckpts and logs",
)
# dataset options
parser.add_argument(
"--datadir",
type=str,
default=None,
help="input data directory",
)
parser.add_argument(
"--scene",
type=str,
default=None,
help="scene name",
)
parser.add_argument(
"--testskip",
type=int,
default=8,
help="will load 1/N images from test/val sets, useful for large datasets like deepvoxels",
)
# model size
parser.add_argument(
"--netdepth",
type=int,
default=8,
help="layers in coarse network",
)
parser.add_argument(
"--netwidth",
type=int,
default=256,
help="channels per layer in coarse network",
)
parser.add_argument(
"--use_viewdirs",
action="store_true",
help="use full 5D input instead of 3D",
)
# checkpoints
parser.add_argument(
"--no_reload",
action="store_true",
help="do not reload weights from saved ckpt",
)
parser.add_argument(
"--ckpt_path",
type=str,
default=None,
help="specific weights npy file to reload for coarse network",
)
# batch size
parser.add_argument(
"--N_rand",
type=int,
default=32 * 32 * 2,
help="batch size (number of random rays per gradient step)",
)
parser.add_argument(
"--chunk_size",
type=int,
default=1024 * 8,
help="number of rays processed in parallel, decrease if running out of memory",
)
# iterations
parser.add_argument(
"--N_iters",
type=int,
default=250001,
help="number of iterations",
)
# render only
parser.add_argument(
"--render_splits",
type=str,
default="test",
help="splits to render",
)
# cascade training
parser.add_argument(
"--cascade_level",
type=int,
default=2,
help="number of cascade levels",
)
parser.add_argument(
"--cascade_samples",
type=str,
default="64,64",
help="samples at each level",
)
# multiprocess learning
parser.add_argument(
"--world_size",
type=int,
default="-1",
help="number of processes (GPU). defaults to -1 for every GPU.",
)
# optimize autoexposure
parser.add_argument(
"--optim_autoexpo",
action="store_true",
help="optimize autoexposure parameters",
)
parser.add_argument(
"--lambda_autoexpo",
type=float,
default=1.0,
help="regularization weight for autoexposure",
)
# learning rate options
parser.add_argument(
"--lrate",
type=float,
default=5e-4,
help="learning rate",
)
parser.add_argument(
"--lrate_decay_factor",
type=float,
default=0.1,
help="decay learning rate by a factor every specified number of steps",
)
parser.add_argument(
"--lrate_decay_steps",
type=int,
default=5000,
help="decay learning rate by a factor every specified number of steps",
)
# rendering options
parser.add_argument(
"--det",
action="store_true",
help="deterministic sampling for coarse and fine samples",
)
parser.add_argument(
"--max_freq_log2",
type=int,
default=10,
help="log2 of max freq for positional encoding (3D location)",
)
parser.add_argument(
"--max_freq_log2_viewdirs",
type=int,
default=4,
help="log2 of max freq for positional encoding (2D direction)",
)
parser.add_argument(
"--load_min_depth",
action="store_true",
help="whether to load min depth",
)
# logging/saving options
parser.add_argument(
"--i_print",
type=int,
default=100,
help="frequency of console printout and metric loggin",
)
parser.add_argument(
"--i_img",
type=int,
default=500,
help="frequency of tensorboard image logging",
)
parser.add_argument(
"--i_weights",
type=int,
default=10000,
help="frequency of weight ckpt saving",
)
return parser
def train():
parser = config_parser()
args = parser.parse_args()
logger.info(parser.format_values())
if args.world_size == -1:
args.world_size = torch.cuda.device_count()
logger.info("Using # gpus: {}".format(args.world_size))
torch.multiprocessing.spawn(
ddp_train_nerf, args=(args,), nprocs=args.world_size, join=True
)
if __name__ == "__main__":
setup_logger()
train()