import os
import json
import os.path as osp
import decimal
import argparse
import math
from bisect import bisect_left
from PIL import Image
import numpy as np
import quaternion
from scipy import interpolate
import cv2
from tqdm import tqdm
from multiprocessing import Pool


def get_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--arkitscenes_dir",
        default="",
    )
    parser.add_argument(
        "--output_dir",
        default="data/dust3r_data/processed_arkitscenes_highres",
    )
    return parser


def value_to_decimal(value, decimal_places):
    decimal.getcontext().rounding = decimal.ROUND_HALF_UP  # define rounding method
    return decimal.Decimal(str(float(value))).quantize(
        decimal.Decimal("1e-{}".format(decimal_places))
    )


def closest(value, sorted_list):
    index = bisect_left(sorted_list, value)
    if index == 0:
        return sorted_list[0]
    elif index == len(sorted_list):
        return sorted_list[-1]
    else:
        value_before = sorted_list[index - 1]
        value_after = sorted_list[index]
        if value_after - value < value - value_before:
            return value_after
        else:
            return value_before


def get_up_vectors(pose_device_to_world):
    return np.matmul(pose_device_to_world, np.array([[0.0], [-1.0], [0.0], [0.0]]))


def get_right_vectors(pose_device_to_world):
    return np.matmul(pose_device_to_world, np.array([[1.0], [0.0], [0.0], [0.0]]))


def read_traj(traj_path):
    quaternions = []
    poses = []
    timestamps = []
    poses_p_to_w = []
    with open(traj_path) as f:
        traj_lines = f.readlines()
        for line in traj_lines:
            tokens = line.split()
            assert len(tokens) == 7
            traj_timestamp = float(tokens[0])

            timestamps_decimal_value = value_to_decimal(traj_timestamp, 3)
            timestamps.append(
                float(timestamps_decimal_value)
            )  # for spline interpolation

            angle_axis = [float(tokens[1]), float(tokens[2]), float(tokens[3])]
            r_w_to_p, _ = cv2.Rodrigues(np.asarray(angle_axis))
            t_w_to_p = np.asarray(
                [float(tokens[4]), float(tokens[5]), float(tokens[6])]
            )

            pose_w_to_p = np.eye(4)
            pose_w_to_p[:3, :3] = r_w_to_p
            pose_w_to_p[:3, 3] = t_w_to_p

            pose_p_to_w = np.linalg.inv(pose_w_to_p)

            r_p_to_w_as_quat = quaternion.from_rotation_matrix(pose_p_to_w[:3, :3])
            t_p_to_w = pose_p_to_w[:3, 3]
            poses_p_to_w.append(pose_p_to_w)
            poses.append(t_p_to_w)
            quaternions.append(r_p_to_w_as_quat)
    return timestamps, poses, quaternions, poses_p_to_w


def main(rootdir, outdir):
    os.makedirs(outdir, exist_ok=True)
    subdirs = ["Validation", "Training"]
    for subdir in subdirs:
        outsubdir = osp.join(outdir, subdir)
        scene_dirs = sorted(
            [
                d
                for d in os.listdir(osp.join(rootdir, subdir))
                if osp.isdir(osp.join(rootdir, subdir, d))
            ]
        )

        with Pool() as pool:
            results = list(
                tqdm(
                    pool.imap(
                        process_scene,
                        [
                            (rootdir, outdir, subdir, scene_subdir)
                            for scene_subdir in scene_dirs
                        ],
                    ),
                    total=len(scene_dirs),
                )
            )

        # Filter None results and other post-processing
        valid_scenes = [result for result in results if result is not None]
        outlistfile = osp.join(outsubdir, "scene_list.json")
        with open(outlistfile, "w") as f:
            json.dump(valid_scenes, f)


def process_scene(args):
    rootdir, outdir, subdir, scene_subdir = args
    # Unpack paths
    scene_dir = osp.join(rootdir, subdir, scene_subdir)
    outsubdir = osp.join(outdir, subdir)
    out_scene_subdir = osp.join(outsubdir, scene_subdir)

    # Validation if necessary resources exist
    if (
        not osp.exists(osp.join(scene_dir, "highres_depth"))
        or not osp.exists(osp.join(scene_dir, "vga_wide"))
        or not osp.exists(osp.join(scene_dir, "vga_wide_intrinsics"))
        or not osp.exists(osp.join(scene_dir, "lowres_wide.traj"))
    ):
        return None

    depth_dir = osp.join(scene_dir, "highres_depth")
    rgb_dir = osp.join(scene_dir, "vga_wide")
    intrinsics_dir = osp.join(scene_dir, "vga_wide_intrinsics")
    traj_path = osp.join(scene_dir, "lowres_wide.traj")

    depth_files = sorted(os.listdir(depth_dir))
    img_files = sorted(os.listdir(rgb_dir))

    out_scene_subdir = osp.join(outsubdir, scene_subdir)

    # STEP 3: parse the scene and export the list of valid (K, pose, rgb, depth) and convert images
    scene_metadata_path = osp.join(out_scene_subdir, "scene_metadata.npz")
    if osp.isfile(scene_metadata_path):
        print(f"Skipping {scene_subdir}")
    else:
        print(f"parsing {scene_subdir}")
        # loads traj
        timestamps, poses, quaternions, poses_cam_to_world = read_traj(traj_path)

        poses = np.array(poses)
        quaternions = np.array(quaternions, dtype=np.quaternion)
        quaternions = quaternion.unflip_rotors(quaternions)
        timestamps = np.array(timestamps)

        all_depths = sorted(
            [
                (basename, basename.split(".png")[0].split("_")[1])
                for basename in depth_files
            ],
            key=lambda x: float(x[1]),
        )

        selected_depths = []
        timestamps_selected = []
        timestamp_min = timestamps.min()
        timestamp_max = timestamps.max()
        for basename, frame_id in all_depths:
            frame_id = float(frame_id)
            if frame_id < timestamp_min or frame_id > timestamp_max:
                continue
            selected_depths.append((basename, frame_id))
            timestamps_selected.append(frame_id)

        sky_direction_scene, trajectories, intrinsics, images, depths = (
            convert_scene_metadata(
                scene_subdir,
                intrinsics_dir,
                timestamps,
                quaternions,
                poses,
                poses_cam_to_world,
                img_files,
                selected_depths,
                timestamps_selected,
            )
        )

        if len(images) == 0:
            print(f"Skipping {scene_subdir}")
            return None

        os.makedirs(out_scene_subdir, exist_ok=True)

        os.makedirs(os.path.join(out_scene_subdir, "vga_wide"), exist_ok=True)
        os.makedirs(os.path.join(out_scene_subdir, "highres_depth"), exist_ok=True)
        assert isinstance(sky_direction_scene, str)

        for image_path, depth_path in zip(images, depths):
            img_out = os.path.join(
                out_scene_subdir, "vga_wide", image_path.replace(".png", ".jpg")
            )
            depth_out = os.path.join(out_scene_subdir, "highres_depth", depth_path)
            if osp.isfile(img_out) and osp.isfile(depth_out):
                continue

            vga_wide_path = osp.join(rgb_dir, image_path)
            depth_path = osp.join(depth_dir, depth_path)

            if not osp.isfile(vga_wide_path) or not osp.isfile(depth_path):
                continue

            img = Image.open(vga_wide_path)
            depth = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED)

            # rotate the image
            if sky_direction_scene == "RIGHT":
                try:
                    img = img.transpose(Image.Transpose.ROTATE_90)
                except Exception:
                    img = img.transpose(Image.ROTATE_90)
                depth = cv2.rotate(depth, cv2.ROTATE_90_COUNTERCLOCKWISE)

            elif sky_direction_scene == "LEFT":
                try:
                    img = img.transpose(Image.Transpose.ROTATE_270)
                except Exception:
                    img = img.transpose(Image.ROTATE_270)
                depth = cv2.rotate(depth, cv2.ROTATE_90_CLOCKWISE)

            elif sky_direction_scene == "DOWN":
                try:
                    img = img.transpose(Image.Transpose.ROTATE_180)
                except Exception:
                    img = img.transpose(Image.ROTATE_180)
                depth = cv2.rotate(depth, cv2.ROTATE_180)

            W, H = img.size
            if not osp.isfile(img_out):
                img.save(img_out)

            depth = cv2.resize(depth, (W, H), interpolation=cv2.INTER_NEAREST)
            if not osp.isfile(
                depth_out
            ):  # avoid destroying the base dataset when you mess up the paths
                cv2.imwrite(depth_out, depth)

        # save at the end
        np.savez(
            scene_metadata_path,
            trajectories=trajectories,
            intrinsics=intrinsics,
            images=images,
        )


def convert_scene_metadata(
    scene_subdir,
    intrinsics_dir,
    timestamps,
    quaternions,
    poses,
    poses_cam_to_world,
    all_images,
    selected_depths,
    timestamps_selected,
):
    # find scene orientation
    sky_direction_scene, rotated_to_cam = find_scene_orientation(poses_cam_to_world)

    # find/compute pose for selected timestamps
    # most images have a valid timestamp / exact pose associated
    timestamps_selected = np.array(timestamps_selected)
    spline = interpolate.interp1d(timestamps, poses, kind="linear", axis=0)
    interpolated_rotations = quaternion.squad(
        quaternions, timestamps, timestamps_selected
    )
    interpolated_positions = spline(timestamps_selected)

    trajectories = []
    intrinsics = []
    images = []
    depths = []
    for i, (basename, frame_id) in enumerate(selected_depths):
        intrinsic_fn = osp.join(intrinsics_dir, f"{scene_subdir}_{frame_id}.pincam")
        search_interval = int(0.1 / 0.001)
        for timestamp in range(-search_interval, search_interval + 1):
            if osp.exists(intrinsic_fn):
                break
            intrinsic_fn = osp.join(
                intrinsics_dir,
                f"{scene_subdir}_{float(frame_id) + timestamp * 0.001:.3f}.pincam",
            )
        if not osp.exists(intrinsic_fn):
            print(f"Skipping {intrinsic_fn}")
            continue

        image_path = "{}_{}.png".format(scene_subdir, frame_id)
        search_interval = int(0.001 / 0.001)
        for timestamp in range(-search_interval, search_interval + 1):
            if image_path in all_images:
                break
            image_path = "{}_{}.png".format(
                scene_subdir, float(frame_id) + timestamp * 0.001
            )
        if image_path not in all_images:
            print(f"Skipping {scene_subdir} {frame_id}")
            continue

        w, h, fx, fy, hw, hh = np.loadtxt(intrinsic_fn)  # PINHOLE

        pose = np.eye(4)
        pose[:3, :3] = quaternion.as_rotation_matrix(interpolated_rotations[i])
        pose[:3, 3] = interpolated_positions[i]

        images.append(basename)
        depths.append(basename)
        if sky_direction_scene == "RIGHT" or sky_direction_scene == "LEFT":
            intrinsics.append([h, w, fy, fx, hh, hw])  # swapped intrinsics
        else:
            intrinsics.append([w, h, fx, fy, hw, hh])
        trajectories.append(
            pose @ rotated_to_cam
        )  # pose_cam_to_world @ rotated_to_cam = rotated(cam) to world

    return sky_direction_scene, trajectories, intrinsics, images, depths


def find_scene_orientation(poses_cam_to_world):
    if len(poses_cam_to_world) > 0:
        up_vector = sum(get_up_vectors(p) for p in poses_cam_to_world) / len(
            poses_cam_to_world
        )
        right_vector = sum(get_right_vectors(p) for p in poses_cam_to_world) / len(
            poses_cam_to_world
        )
        up_world = np.array([[0.0], [0.0], [1.0], [0.0]])
    else:
        up_vector = np.array([[0.0], [-1.0], [0.0], [0.0]])
        right_vector = np.array([[1.0], [0.0], [0.0], [0.0]])
        up_world = np.array([[0.0], [0.0], [1.0], [0.0]])

    # value between 0, 180
    device_up_to_world_up_angle = (
        np.arccos(np.clip(np.dot(np.transpose(up_world), up_vector), -1.0, 1.0)).item()
        * 180.0
        / np.pi
    )
    device_right_to_world_up_angle = (
        np.arccos(
            np.clip(np.dot(np.transpose(up_world), right_vector), -1.0, 1.0)
        ).item()
        * 180.0
        / np.pi
    )

    up_closest_to_90 = abs(device_up_to_world_up_angle - 90.0) < abs(
        device_right_to_world_up_angle - 90.0
    )
    if up_closest_to_90:
        assert abs(device_up_to_world_up_angle - 90.0) < 45.0
        # LEFT
        if device_right_to_world_up_angle > 90.0:
            sky_direction_scene = "LEFT"
            cam_to_rotated_q = quaternion.from_rotation_vector(
                [0.0, 0.0, math.pi / 2.0]
            )
        else:
            # note that in metadata.csv RIGHT does not exist, but again it's not accurate...
            # well, turns out there are scenes oriented like this
            # for example Training/41124801
            sky_direction_scene = "RIGHT"
            cam_to_rotated_q = quaternion.from_rotation_vector(
                [0.0, 0.0, -math.pi / 2.0]
            )
    else:
        # right is close to 90
        assert abs(device_right_to_world_up_angle - 90.0) < 45.0
        if device_up_to_world_up_angle > 90.0:
            sky_direction_scene = "DOWN"
            cam_to_rotated_q = quaternion.from_rotation_vector([0.0, 0.0, math.pi])
        else:
            sky_direction_scene = "UP"
            cam_to_rotated_q = quaternion.quaternion(1, 0, 0, 0)
    cam_to_rotated = np.eye(4)
    cam_to_rotated[:3, :3] = quaternion.as_rotation_matrix(cam_to_rotated_q)
    rotated_to_cam = np.linalg.inv(cam_to_rotated)
    return sky_direction_scene, rotated_to_cam


if __name__ == "__main__":
    parser = get_parser()
    args = parser.parse_args()
    main(args.arkitscenes_dir, args.output_dir)