#include <iostream>
#include <string>
#include <vector>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sched.h>
#include <fstream>
#include <filesystem>
#include <cstring>
#include <cerrno>
#include <pwd.h>
#include <grp.h>
#include <sys/types.h>

struct ContainerConfig {
    std::string name;
    std::string rootfs;
    std::string command;
    std::vector<std::string> args;

    int memory_limit_mb;
    int cpu_shares;
    int swap_limit_mb;

    std::string cgroup_path; // full path: /sys/fs/cgroup/socker/<name>
};

const std::string cgroup_base_path = "/sys/fs/cgroup/socker";

const int CONTAINER_STACK_SIZE = 1024 * 1024;

// Writes a string value to a cgroup interface file
bool write_cgroup_file(const std::string& path, const std::string& value) {
    std::ofstream file(path);
    if (!file.is_open()) {
        std::cerr << "Failed to write to " << path << ": " << strerror(errno) << std::endl;
        return false;
    }
    file << value;
    return true;
}

int container_function(void* arg) {
    ContainerConfig& config = *static_cast<ContainerConfig*>(arg);

    /* think about a racecondition regarding setting the uid/gid mapping */
    /* when you encounter problems, try adding a sleep here; in production this is handled cleaner ;) */

    // TODO: move this process into the container's cgroup
    if (!write_cgroup_file(config.cgroup_path + "/cgroup.procs", std::to_string(getpid()))) {
        std::cerr << "Failed to move process into cgroup" << std::endl;
        return 1;
    }

    // TODO: set the container hostname
    if (sethostname(config.name.c_str(), config.name.length()) != 0) {
        std::cerr << "sethostname failed: " << strerror(errno) << std::endl;
        return 1;
    }

    /* Make the mount namespace private so our mounts don't propagate to the host. */
    /* Think about why this is necessary and what would happen without it.         */
    if (mount(nullptr, "/", nullptr, MS_PRIVATE | MS_REC, nullptr) != 0) {
        std::cerr << "mount MS_PRIVATE failed: " << strerror(errno) << std::endl;
        return 1;
    }

    // TODO: set up the container filesystem
    if (mount("proc", (config.rootfs + "/proc").c_str(), "proc", 0, "") != 0) {
        std::cerr << "mount /proc failed: " << strerror(errno) << std::endl;
        return 1;
    }

    if (mount("sysfs", (config.rootfs + "/sys").c_str(), "sysfs", 0, "") != 0) {
        std::cerr << "mount /sys failed: " << strerror(errno) << std::endl;
        return 1;
    }

    if (mount("/dev", (config.rootfs + "/dev").c_str(), nullptr, MS_BIND | MS_REC, "") != 0) {
        std::cerr << "mount /dev failed: " << strerror(errno) << std::endl;
        return 1;
    }

    // TODO: chroot into the rootfs and change the working directory to "/"
    if (chroot(config.rootfs.c_str()) != 0) {
        std::cerr << "chroot failed: " << strerror(errno) << std::endl;
        return 1;
    }

    if (chdir("/") != 0) {
        std::cerr << "chdir failed: " << strerror(errno) << std::endl;
        return 1;
    }

    // Prepare arguments for execvp
    std::vector<char*> args_ptrs;
    args_ptrs.push_back(const_cast<char*>(config.command.c_str()));
    for (const auto& arg : config.args) {
        args_ptrs.push_back(const_cast<char*>(arg.c_str()));
    }
    args_ptrs.push_back(nullptr);

    // Actually run the command inside the container
    execvp(config.command.c_str(), args_ptrs.data());

    std::cerr << "execvp failed: " << strerror(errno) << std::endl;
    return 1;
}

int main(int argc, char* argv[]) {
    if (argc < 4) {
        std::cerr << "Usage: " << argv[0] << " <container_name> <rootfs_path> <command> [args...]" << std::endl;
        return 1;
    }

    ContainerConfig config;
    config.name        = argv[1];
    config.rootfs      = argv[2];
    config.command     = argv[3];
    config.cgroup_path = cgroup_base_path + "/" + config.name;

    for (int i = 4; i < argc; i++) {
        config.args.push_back(argv[i]);
    }

    config.memory_limit_mb = 10;
    config.cpu_shares      = 1024;
    config.swap_limit_mb   = 0;  // 0 = disable swap so memory limit is observable

    // TODO: create the cgroup directory for this container
    try {
        std::filesystem::create_directories(config.cgroup_path);
    } catch (const std::filesystem::filesystem_error& e) {
        std::cerr << "Failed to create cgroup directory: " << e.what() << std::endl;
        return 1;
    }

    // TODO: enable the memory and cpu controllers in the parent cgroup
    if (!write_cgroup_file(cgroup_base_path + "/cgroup.subtree_control", "+memory +cpu")) {
        return 1;
    }

    if (config.memory_limit_mb > 0) {
        // TODO: enforce the memory limit
        if (!write_cgroup_file(config.cgroup_path + "/memory.max",
                               std::to_string((long)config.memory_limit_mb * 1024 * 1024))) {
            return 1;
        }
    }

    if (config.swap_limit_mb >= 0) {
        // TODO: enforce the swap limit
        if (!write_cgroup_file(config.cgroup_path + "/memory.swap.max",
                               std::to_string((long)config.swap_limit_mb * 1024 * 1024))) {
            return 1;
        }
    }

    if (config.cpu_shares > 0) {
        // TODO: enforce the CPU limit
        // cpu.max format: "<quota> <period>", period = 100000 microseconds
        // quota = (cpu_shares * period) / 1024
        int64_t quota = ((int64_t)config.cpu_shares * 100000) / 1024;
        if (!write_cgroup_file(config.cgroup_path + "/cpu.max",
                               std::to_string(quota) + " 100000")) {
            return 1;
        }
    }

    char* container_stack = (char*)malloc(CONTAINER_STACK_SIZE);
    if (!container_stack) {
        std::cerr << "allocation failed: " << strerror(errno) << std::endl;
        return 1;
    }

    // TODO: start the container in new namespaces using clone()
    int clone_flags = CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWNET | CLONE_NEWIPC;
    pid_t container_pid = clone(
        container_function,
        container_stack + CONTAINER_STACK_SIZE,
        clone_flags | SIGCHLD,
        &config
    );

    if (container_pid == -1) {
        std::cerr << "clone failed: " << strerror(errno) << std::endl;
        free(container_stack);
        return 1;
    }

    // Ask us about this :)
    std::ofstream("/proc/" + std::to_string(container_pid) + "/uid_map") << "0 " << getuid() << " 1\n";
    std::ofstream("/proc/" + std::to_string(container_pid) + "/setgroups") << "deny\n";
    std::ofstream("/proc/" + std::to_string(container_pid) + "/gid_map") << "0 " << getgid() << " 1\n";

    int status;
    waitpid(container_pid, &status, 0);

    // TODO: unmount in reverse order
    umount2((config.rootfs + "/dev").c_str(), MNT_DETACH);
    umount2((config.rootfs + "/sys").c_str(), MNT_DETACH);
    umount2((config.rootfs + "/proc").c_str(), MNT_DETACH);

    // TODO: remove the container's cgroup directory
    try {
        std::filesystem::remove(config.cgroup_path);
    } catch (const std::filesystem::filesystem_error& e) {
        std::cerr << "Failed to remove cgroup directory: " << e.what() << std::endl;
    }

    free(container_stack);

    return 0;
}