#include <l4/cxx/iostream>
#include <l4/re/env>
#include <l4/re/error_helper>
#include <l4/re/util/cap_alloc>
#include <l4/re/util/kumem_alloc>
#include <l4/sys/debugger.h>
#include <l4/sys/factory>
#include <l4/sys/ipc.h>
#include <l4/sys/ipc_gate>
#include <l4/sys/kdebug.h>
#include <l4/sys/scheduler>
#include <l4/sys/task>
#include <l4/sys/thread>
#include <l4/sys/utcb.h>
#include <l4/util/util.h>

#include <cstdio>
#include <cstring>
#include <errno.h>
#include <fcntl.h>
#include <sys/mman.h>

static void execute_faulty()
{
    printf("Hello, I am your executor!\n");

    using L4Re::chksys;
    using L4Re::chkcap;
    auto const &env = L4Re::Env::env();

    // allocate memory for faulty
    const size_t Mem_size = 8 * L4_PAGESIZE;

    L4::Cap<L4Re::Dataspace> mem_cap =
        chkcap(L4Re::Util::cap_alloc.alloc<L4Re::Dataspace>(),
               "Dataspace cap alloc");
    chksys(env->mem_alloc()->alloc(Mem_size, mem_cap, 0),
           "Dataspace alloc");

    l4_addr_t mem_addr = 0;
    chksys(env->rm()->attach(&mem_addr, Mem_size,
                             L4Re::Rm::F::Search_addr | L4Re::Rm::F::RWX,
                             mem_cap, 0),
           "attach Dataspace");
    printf("%zd bytes of memory at mem_addr=%lx\n", Mem_size, mem_addr);

    // map faulty's binary into our address space
    int fd = open("rom/faulty", O_RDONLY);
    struct stat st;
    int err = fstat(fd, &st);
    if (err != 0) {
        throw L4::Runtime_error(-errno, "fstat() failed");
    }
    void *faulty_bin = 0;
    faulty_bin = mmap(faulty_bin, st.st_size, PROT_READ | PROT_EXEC,
                      MAP_SHARED, fd, 0);
    printf("faulty_bin=%p, size=%ld\n", faulty_bin, st.st_size);

    // memset() is not necessary to zero out the memory we just got, but it
    // does ensure that the whole dataspace is mapped writable in this address
    // space
    char *mem_ptr = reinterpret_cast<char*>(mem_addr);
    memset(mem_ptr, 0, Mem_size);
    memcpy(mem_ptr, faulty_bin, st.st_size);

    // create new task for faulty
    L4::Cap<L4::Task> task_cap =
        chkcap(L4Re::Util::cap_alloc.alloc<L4::Task>(), "Task cap alloc");
    auto utcb_area = env->utcb_area();
    chksys(env->factory()->create_task(task_cap, &utcb_area),
           "Create task");

    // create new thread to execute in faulty's new task
    L4::Cap<L4::Thread> thread_cap = chkcap(L4Re::Util::cap_alloc.alloc<L4::Thread>(),
                                            "Thread cap alloc");
    chksys(env->factory()->create(thread_cap), "Create thread");

    // we tell the kernel debugger that the newly created thread shall be named
    // "faulty thread", so that we can easily identify it when we check the thread
    // list in the debugger; this is not mandatory and just for debugging and
    // demonstration purposes
    l4_debugger_set_object_name(thread_cap.cap(), "faulty thread");

    // create new IPC gate, which we will set as pager and exception handler
    // for faulty's main thread
    L4::Cap<L4::Ipc_gate> gate_cap = chkcap(L4Re::Util::cap_alloc.alloc<L4::Ipc_gate>(),
                                            "Ipc_gate cap alloc");
    l4_umword_t protocol_id = l4_umword_t(0x1000);
    chksys(env->factory()->create_gate(gate_cap, env->main_thread(), protocol_id),
           "Create IPC gate");

    // map initial capabilities into faulty's address space
    chksys(task_cap->map(L4Re::This_task, gate_cap.fpage(), gate_cap.snd_base()));
    chksys(task_cap->map(L4Re::This_task, env->log().fpage(), env->log().snd_base()));

    // configure faulty's main thread
    // the entry point (address of the first instruction to be executed) is
    // hard-coded and must be obtained by extracting it from the ELF header of
    // faulty's binary file; the stack pointer is arbitrarily chosen, but
    // should not collide with any other sections used by the program
    l4_addr_t  thread_entry = 0x004001cc;

    // we set the initial stack pointer to the middle of the page, because
    // then the pager below can be simpler, as it only needs to support a
    // single stack page
    l4_addr_t  thread_stack = 0xb0000800;
    
    // put the UTCB of faulty's main thread at the beginning of the UTCB area
    l4_utcb_t *thread_utcb =
        reinterpret_cast<l4_utcb_t*>(l4_fpage_page(env->utcb_area())
                                                    << L4_PAGESHIFT);

    printf("thread_entry=%lx, thread_stack=%lx, thread_utcb=%p\n",
           thread_entry, thread_stack, thread_utcb);

    L4::Thread::Attr thread_attr;
    thread_attr.pager(gate_cap);
    thread_attr.exc_handler(gate_cap);
    thread_attr.bind(thread_utcb, task_cap);

    chksys(thread_cap->control(thread_attr));

    // start faulty's main thread
    chksys(thread_cap->ex_regs(thread_entry, thread_stack, 0), "Thread ex_regs");
    chksys(env->scheduler()->run_thread(thread_cap, l4_sched_param(2)));

    // start handling pagefaults in faulty
    int num_pagefaults = 0;
    l4_umword_t label;
    l4_msgtag_t tag = l4_ipc_wait(l4_utcb(), &label, L4_IPC_NEVER);
    while (true) {
        printf("label=%lx\n", label);
    
        l4_msg_regs_t *mr = l4_utcb_mr_u(l4_utcb());
        for (unsigned i = 0; i < 2; i++)
            printf("mr[%u]=%lx\n", i, mr->mr[i]);

        // handle pagefault
        l4_addr_t fault_address = mr->mr[0];
        l4_addr_t page_base = fault_address & L4_PAGEMASK;
        l4_addr_t send_base;

        // the switch arms below are hardcoded for the 'faulty' binary; the
        // magic numbers have been obtained by manual inspection of the
        // readelf output; a real pager would learn about the program's
        // address space structure by parsing the ELF header of the binary;
        // but since we only want to demonstrate the basic mechnisms behind
        // constructing a new task and running a program in it, this is
        // sufficient
        printf("page_base=%lx\n", page_base);
        switch (page_base) {
            case 0x00400000:
                // this is the first page of the program's address space
                // containing readonly sections such as .text, etc.; these
                // sections are stored in the first 4 KiB of the program
                // binary, so we just map that here
                send_base = mem_addr;
                break;
            case 0x00401000:
                // this is the second page, containing sections such
                // .init_array, etc.; since these sections also map to
                // (different) offsets within the fist 4 KiB of the binary, we
                // map the same page here again
                send_base = mem_addr + 1 * L4_PAGESIZE;
                break;
            case 0x00402000:
                // this page contains .bss and .data; .bss has to be
                // initialized with zeros (and be writable); we map the second
                // 4 KiB region from the binary here
                send_base = mem_addr + 2 * L4_PAGESIZE;
                break;
            case 0xb0000000:
                // this is the stack page
                send_base = mem_addr + 3 * L4_PAGESIZE;
                break;
            default:
                // if we end up here, we did something wrong :)
                printf("Unknown page base %lx\n", page_base);
                printf("Sleeping now, because I don't know what to do!\n");
                l4_sleep_forever();
                break;
        }

        // we have to resolve at least four pagefaults to start faulty; if we catch more than 10, we
        // probably did something wrong
        if (++num_pagefaults > 10)
            break;

        // a description of which page (in the local address space) should be mapped
        // to the other task (at the page base of the faulting address) is made available
        // to the kernel by populating message registers in the UTCB: mr[0] describes
        // the location in the target address space, mr[1] describes the page in the
        // local address space that shall be mapped; see documentation for flex pages
        // in the respective header files
        mr->mr[0] = (page_base | L4_ITEM_MAP);
        mr->mr[1] = l4_fpage(send_base, 12, L4_FPAGE_RWX).fpage;

        // the content of the message registers (and buffer registers) in UTCB are
        // described to the kernel by means of a l4_msgtag_t; it basically specifies
        // how many message registers shall be interpreted as direct words (zero in
        // this case) and how many flex page mappings are in there (1 in this case);
        // see documentation of l4_msgtag() function for further details;
        // we send the pagefault response and atomically switch to waiting for the
        // next IPC message by using l4_ipc_reply_and_wait() in this service loop
        l4_msgtag_t reply = l4_msgtag(0, 0, 1, 0);
        tag = l4_ipc_reply_and_wait(l4_utcb(), reply, &label, L4_IPC_SEND_TIMEOUT_0);
    }
}

int main(int argc, char const *argv[])
{
    (void)argc;
    (void)argv;

    try {
        execute_faulty();

        // sleep forever, because we must keep a reference to faulty's kernel objects alive
        printf("Going to sleep forever ...\n");
        l4_sleep_forever();

        // we will never get here
        return 0;
    }
    catch (L4::Runtime_error &e) {
        L4::cerr << "Error: " << e;
        return 1;
    }
}
