diff --git a/colmap_runner/format_dataset.py b/colmap_runner/format_dataset.py
new file mode 100644
index 0000000..4c40007
--- /dev/null
+++ b/colmap_runner/format_dataset.py
@@ -0,0 +1,94 @@
+"""Given the output of the run_colmap.py script, create a usable dataset
+compatible with the NeRF++ format. Made for Unix-like (posix style path), has
+not been tested for Windows."""
+
+import os
+import json
+import numpy as np
+import shutil
+
+input_path = "./lupo_output_dir_4"
+output_path = "./lupo_output_data_npp_3"
+
+# Read the JSON file containing all the data
+cam_dict_norm_path = os.path.join(input_path, "posed_images/kai_cameras_normalized.json")
+with open(cam_dict_norm_path) as fd:
+    cam_dict_norm = json.load(fd)
+
+# Make the train directories
+train_path = os.path.join(output_path, "train")
+train_int_path = os.path.join(train_path, "intrinsics")
+train_pose_path = os.path.join(train_path, "pose")
+train_rgb_path = os.path.join(train_path, "rgb")
+
+os.makedirs(train_path, exist_ok=True)
+os.makedirs(train_int_path, exist_ok=True)
+os.makedirs(train_pose_path, exist_ok=True)
+os.makedirs(train_rgb_path, exist_ok=True)
+
+# Make the train directories
+test_path = os.path.join(output_path, "test")
+test_int_path = os.path.join(test_path, "intrinsics")
+test_pose_path = os.path.join(test_path, "pose")
+test_rgb_path = os.path.join(test_path, "rgb")
+
+os.makedirs(test_path, exist_ok=True)
+os.makedirs(test_int_path, exist_ok=True)
+os.makedirs(test_pose_path, exist_ok=True)
+os.makedirs(test_rgb_path, exist_ok=True)
+
+# Sample images for the test set
+N = 10
+image_list = sorted(cam_dict_norm.keys())
+sampled = np.random.choice(image_list, N, replace=False)
+
+# Write the files with the corresponding data
+for img_name in sorted(cam_dict_norm.keys()):
+    # Retrieve the data
+    K = np.array(cam_dict_norm[img_name]['K'])
+    W2C = np.array(cam_dict_norm[img_name]['W2C'])
+
+    img_name_path = os.path.splitext(img_name)[0] # properly espace the extension
+    
+    # training set
+    if img_name not in sampled:
+        # Create the paths
+        train_int_img_path = os.path.join(train_int_path, img_name_path + ".txt") 
+        train_pose_img_path = os.path.join(train_pose_path, img_name_path + ".txt") 
+        train_rgb_img_path = os.path.join(train_rgb_path, img_name)
+
+        # Write intrinsics
+        with open(train_int_img_path, "w") as fd:
+            fd.write(" ".join(map(str, K)))
+
+        # Write poses
+        with open(train_pose_img_path, "w") as fd:
+            fd.write(" ".join(map(str, W2C)))
+
+        # Copy image
+        source_image_path = os.path.join(input_path, "posed_images/images", img_name)
+        shutil.copy(source_image_path, train_rgb_img_path)
+
+    # testing set
+    else:
+        # Create the paths
+        test_int_img_path = os.path.join(test_int_path, img_name_path + ".txt") 
+        test_pose_img_path = os.path.join(test_pose_path, img_name_path + ".txt") 
+        test_rgb_img_path = os.path.join(test_rgb_path, img_name)
+
+        # Write intrinsics
+        with open(test_int_img_path, "w") as fd:
+            fd.write(" ".join(map(str, K)))
+
+        # Write poses
+        with open(test_pose_img_path, "w") as fd:
+            fd.write(" ".join(map(str, W2C)))
+
+        # Copy image
+        source_image_path = os.path.join(input_path, "posed_images/images", img_name)
+        shutil.copy(source_image_path, test_rgb_img_path)
+
+
+# Create the validation dataset
+validation_path = os.path.join(output_path, "validation")
+os.symlink("./test", validation_path)