/* * Copyright (C) 2016-2020, 2022-2024 Kernkonzept GmbH. * Author(s): Jean Wolter * Manuel von Oltersdorff-Kalettka * * License: see LICENSE.spdx (in this directory or the directories above) */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "debug.h" #include "options.h" #include "switch.h" #include "vlan.h" #include /** * \defgroup virtio_net_switch Virtio Net Switch * * A virtual network switch that can be used as defined in the virtio protocol. * * The abstraction of a single connection with a network device (also called * client) from the switch's perspective is a port. A client can register * multiple ports on the switch. The communication between a client and the * switch happens via IRQs, MMIO and shared memory as defined by the Virtio * protocol. The switch supports VLANs and ports can be either 'access' or * 'trunk' ports. * The optionally available monitor port receives network traffic from all * ports, and the monitor can not send. * * \{ */ /* * Registry for our server, used to register * - factory capability * - irq object for capability deletion irqs * - virtio host kick irqs */ static L4Re::Util::Registry_server server; using Ds_vector = std::vector>; static std::shared_ptr trusted_dataspaces; static bool parse_int_param(L4::Ipc::Varg const ¶m, char const *prefix, int *out) { l4_size_t headlen = strlen(prefix); if (param.length() < headlen) return false; char const *pstr = param.value(); if (strncmp(pstr, prefix, headlen) != 0) return false; std::string tail(pstr + headlen, param.length() - headlen); if (!parse_int_optstring(tail.c_str(), out)) { Err(Err::Normal).printf("Bad parameter '%s'. Invalid number specified.\n", prefix); throw L4::Runtime_error(-L4_EINVAL); } return true; } static void assign_random_mac(l4_uint8_t mac[6]) { static bool initialized = false; if (!initialized) { srandom(l4_kip_clock(l4re_kip())); initialized = true; } for (int i = 0; i < 6; i++) mac[i] = static_cast(random()); mac[0] &= ~(1U << 0); // clear multicast bit mac[0] |= 1U << 1; // set "locally administered" bit } /** * The IPC interface for creating ports. * * The Switch factory provides an IPC interface to create ports. Ports are * the only option for a client to communicate with the switch and, thus, with * other network devices. * * The `Switch_factory` gets constructed when the net switch application gets * started. It thereafter gets registered on the switch's server to serve IPC * `create` calls. */ class Switch_factory : public L4::Epiface_t { /** * Implement the generic irq related part of the port */ class Port : public L4virtio_port { // Irq used to notify the guest L4::Cap _device_notify_irq; L4::Cap device_notify_irq() const override { return _device_notify_irq; } public: Port(unsigned vq_max, unsigned num_ds, char const *name, l4_uint8_t const *mac) : L4virtio_port(vq_max, num_ds, name, mac) {} /** register the host IRQ and the port itself on the switch's server */ void register_end_points(L4Re::Util::Object_registry* registry, L4::Epiface *kick_irq) { // register virtio host kick irq _device_notify_irq = L4Re::chkcap(registry->register_irq_obj(kick_irq)); // register virtio endpoint L4Re::chkcap(registry->register_obj(this)); // decrement ref counter to get a notification when the last // external reference vanishes obj_cap()->dec_refcnt(1); } virtual ~Port() { server.registry()->unregister_obj(this); } }; /** * Implement the irq related part of a switched port */ class Switch_port : public Port { /** * IRQ endpoint on the port. * * Each port holds its own IRQ that gets triggered by the client whenever * there is a new outgoing request in the port's transmission queue or when * there is new space in the port's receive queue. * * A `Kick_irq` is constructed on port creation. At this time, it also gets * registered on the switch's server. */ class Kick_irq : public L4::Irqep_t { Virtio_switch *_switch; /**< pointer to the net switch */ L4virtio_port *_port; /**< pointer to the associated port */ public: /** * Callback for the IRQ * * This function redirects the call to `Virtio_switch::handle_l4virtio_port_tx`, * since the port cannot finish a transmission on its own. */ void handle_irq() { _switch->handle_l4virtio_port_tx(_port); } Kick_irq(Virtio_switch *virtio_switch, L4virtio_port *port) : _switch{virtio_switch}, _port{port} {} }; Kick_irq _kick_irq; /**< The IRQ to notify the client. */ Kick_irq _reschedule_tx_irq; public: Switch_port(L4Re::Util::Object_registry *registry, Virtio_switch *virtio_switch, unsigned vq_max, unsigned num_ds, char const *name, l4_uint8_t const *mac) : Port(vq_max, num_ds, name, mac), _kick_irq(virtio_switch, this), _reschedule_tx_irq(virtio_switch, this) { register_end_points(registry, &_kick_irq); _pending_tx_reschedule = L4Re::chkcap(registry->register_irq_obj(&_reschedule_tx_irq), "Register TX reschedule IRQ."); _pending_tx_reschedule->unmask(); } virtual ~Switch_port() { // We need to delete the IRQ object created in register_irq_obj() ourselves L4::Cap(L4Re::This_task) ->unmap(_kick_irq.obj_cap().fpage(), L4_FP_ALL_SPACES | L4_FP_DELETE_OBJ); server.registry()->unregister_obj(&_kick_irq); L4::Cap(L4Re::This_task) ->unmap(_pending_tx_reschedule.fpage(), L4_FP_ALL_SPACES | L4_FP_DELETE_OBJ); server.registry()->unregister_obj(&_reschedule_tx_irq); } }; /** * Implement the irq related part of a monitor port */ class Monitor_port : public Port { /** * Handle incoming irqs by * - handling pending outgoing requests * - dropping all incoming requests */ class Kick_irq : public L4::Irqep_t { L4virtio_port *_port; public: /** * Callback for the IRQ * * A Monitor port processes only requests on its receive queue and drops * all requests on the transmit queue since it is not supposed to send * network request. */ void handle_irq() { do { _port->tx_q()->disable_notify(); _port->rx_q()->disable_notify(); _port->drop_requests(); _port->tx_q()->enable_notify(); _port->rx_q()->enable_notify(); L4virtio::wmb(); L4virtio::rmb(); } while (_port->tx_work_pending()); } Kick_irq(L4virtio_port *port) : _port{port} {} }; Kick_irq _kick_irq; public: Monitor_port(L4Re::Util::Object_registry* registry, unsigned vq_max, unsigned num_ds, char const *name, l4_uint8_t const *mac) : Port(vq_max, num_ds, name, mac), _kick_irq(this) { register_end_points(registry, &_kick_irq); } virtual ~Monitor_port() { // We need to delete the IRQ object created in register_irq_obj() ourselves L4::Cap(L4Re::This_task) ->unmap(_kick_irq.obj_cap().fpage(), L4_FP_ALL_SPACES | L4_FP_DELETE_OBJ); server.registry()->unregister_obj(&_kick_irq); } }; /** * Implement the handler for the statistics reader capability. */ class Stats_reader : public cxx::D_list_item, public L4::Epiface_t { L4Re::Util::Unique_cap _ds; l4_addr_t _addr; public: Stats_reader() { l4_size_t size = Switch_statistics::get_instance().size(); _ds = L4Re::Util::make_unique_cap(); L4Re::chksys(L4Re::Env::env()->mem_alloc()->alloc(size, _ds.get()), "Could not allocate shared mem ds."); L4Re::chksys(L4Re::Env::env()->rm()->attach(&_addr, _ds->size(), L4Re::Rm::F::Search_addr | L4Re::Rm::F::RW, L4::Ipc::make_cap_rw(_ds.get()))); memset(reinterpret_cast(_addr), 0, _ds->size()); } ~Stats_reader() { L4Re::Env::env()->rm()->detach(reinterpret_cast(_addr), 0); server.registry()->unregister_obj(this); } long op_get_buffer(Virtio_net_switch::Statistics_if::Rights, L4::Ipc::Cap &ds) { // We hand out the dataspace in a read only manner. Clients must not be // able to modify information as that would create an unwanted data // channel. ds = L4::Ipc::Cap(_ds.get(), L4_CAP_FPAGE_RO); return L4_EOK; } long op_sync(Virtio_net_switch::Statistics_if::Rights) { memcpy(reinterpret_cast(_addr), reinterpret_cast(Switch_statistics::get_instance().stats()), Switch_statistics::get_instance().size()); return L4_EOK; } bool is_valid() { return obj_cap() && obj_cap().validate().label(); } }; class Stats_reader_list { cxx::D_list _readers; public: void check_readers() { auto it = _readers.begin(); while (it != _readers.end()) { auto *reader = *it; if (!reader->is_valid()) { it = _readers.erase(it); delete reader; } else ++it; } } void push_back(cxx::unique_ptr reader) { _readers.push_back(reader.release()); } }; /* * Handle vanishing caps by telling the switch that a port might have gone */ struct Del_cap_irq : public L4::Irqep_t { public: void handle_irq() { _switch->check_ports(); _stats_readers->check_readers(); } Del_cap_irq(Virtio_switch *virtio_switch, Stats_reader_list *stats_readers) : _switch{virtio_switch}, _stats_readers{stats_readers} {} private: Virtio_switch *_switch; Stats_reader_list *_stats_readers; }; Virtio_switch *_virtio_switch; /**< pointer to the actual net switch object */ /** maximum number of entries in a new virtqueueue created for a port */ unsigned _vq_max_num; Stats_reader_list _stats_readers; Del_cap_irq _del_cap_irq; /** * Evaluate an optional argument * * \param opt Optional argument. * \param[out] monitor Set to true if argument is "type=monitor". * \param name Pointer to name. * \param size Size of name. * \param[out] vlan_access Id of VLAN access port if "vlan=access=" is * present. * \param[out] vlan_trunk List of VLANs if "vlan=trunk=[[, &vlan_trunk, bool *vlan_trunk_all, l4_uint8_t mac[6], bool &mac_set) { assert(opt.is_of()); unsigned len = opt.length(); const char *opt_str = opt.data(); Err err(Err::Normal); if (len > 5) { if (!strncmp("type=", opt_str, 5)) { if (!strncmp("type=monitor", opt_str, len)) { monitor = true; return true; } else if (!strncmp("type=none", opt_str, len)) return true; err.printf("Unknown type '%.*s'\n", opt.length() - 5, opt.data() + 5); return false; } else if (!strncmp("name=", opt_str, 5)) { snprintf(name, size, "%.*s", opt.length() - 5, opt.data() + 5); return true; } else if (!strncmp("vlan=", opt_str, 5)) { cxx::String str(opt_str + 5, strnlen(opt_str + 5, len - 5)); cxx::String::Index idx; if ((idx = str.starts_with("access="))) { str = str.substr(idx); l4_uint16_t vid; int next = str.from_dec(&vid); if (next && next == str.len() && vlan_valid_id(vid)) vlan_access = vid; else { err.printf("Invalid VLAN access port id '%.*s'\n", opt.length(), opt.data()); return false; } } else if ((idx = str.starts_with("trunk="))) { int next; l4_uint16_t vid; str = str.substr(idx); if (str == cxx::String("all")) { *vlan_trunk_all = true; return true; } while ((next = str.from_dec(&vid))) { if (!vlan_valid_id(vid)) break; vlan_trunk.push_back(vid); if (next < str.len() && str[next] != ',') break; str = str.substr(next+1); } if (vlan_trunk.empty() || !str.empty()) { err.printf("Invalid VLAN trunk port spec '%.*s'\n", opt.length(), opt.data()); return false; } } else { err.printf("Invalid VLAN specification..\n"); return false; } return true; } else if (!strncmp("mac=", opt_str, 4)) { size_t const OPT_LEN = 4 /* mac= */ + 6*2 /* digits */ + 5 /* : */; // expect NUL terminated string for simplicity if (len > OPT_LEN && opt_str[OPT_LEN] == '\0' && sscanf(opt_str+4, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) == 6) { mac_set = true; return true; } err.printf("Invalid mac address '%.*s'\n", len - 4, opt_str + 4); return false; } } err.printf("Unknown option '%.*s'\n", opt.length(), opt.data()); return false; } public: Switch_factory(Virtio_switch *virtio_switch, unsigned vq_max_num) : _virtio_switch{virtio_switch}, _vq_max_num{vq_max_num}, _del_cap_irq{virtio_switch, &_stats_readers} { auto c = L4Re::chkcap(server.registry()->register_irq_obj(&_del_cap_irq)); L4Re::chksys(L4Re::Env::env()->main_thread()->register_del_irq(c)); }; /** * Handle factory protocol * * This function is invoked after an incoming factory::create * request and creates a new port or statistics interface if possible. */ long op_create(L4::Factory::Rights, L4::Ipc::Cap &res, l4_umword_t type, L4::Ipc::Varg_list_ref va) { switch (type) { case 0: return create_port(res, va); case 1: return create_stats(res); default: Dbg(Dbg::Core, Dbg::Warn).printf("op_create: Invalid object type\n"); return -L4_EINVAL; } } long create_port(L4::Ipc::Cap &res, L4::Ipc::Varg_list_ref va) { Dbg warn(Dbg::Port, Dbg::Warn, "Port"); Dbg info(Dbg::Port, Dbg::Info, "Port"); info.printf("Incoming port request\n"); bool monitor = false; char name[20] = ""; unsigned arg_n = 2; l4_uint16_t vlan_access = 0; std::vector vlan_trunk; bool vlan_trunk_all = false; l4_uint8_t mac[6]; bool mac_set = false; int num_ds = 2; for (L4::Ipc::Varg opt: va) { if (!opt.is_of()) { warn.printf("Unexpected type for argument %d\n", arg_n); return -L4_EINVAL; } if (parse_int_param(opt, "ds-max=", &num_ds)) { if (num_ds <= 0 || num_ds > 80) { Err(Err::Normal).printf("warning: client requested invalid number" " of data spaces: 0 < %d <= 80\n", num_ds); return -L4_EINVAL; } } else if (!handle_opt_arg(opt, monitor, name, sizeof(name), vlan_access, vlan_trunk, &vlan_trunk_all, mac, mac_set)) return -L4_EINVAL; ++arg_n; } int port_num = _virtio_switch->port_available(monitor); if (port_num < 0) { warn.printf("No port available\n"); return -L4_ENOMEM; } if (vlan_access && (!vlan_trunk.empty() || vlan_trunk_all)) { warn.printf("VLAN port cannot be access and trunk simultaneously.\n"); return -L4_EINVAL; } if (!name[0]) snprintf(name, sizeof(name), "%s[%d]", monitor ? "monitor" : "", port_num); info.printf(" Creating port %s%s\n", name, monitor ? " as monitor port" : ""); // Assign a random MAC address if we assign one to our devices but the // user has not passed an explicit one for a port. if (!mac_set && Options::get_options()->assign_mac()) assign_random_mac(mac); l4_uint8_t *mac_ptr = (mac_set || Options::get_options()->assign_mac()) ? mac : nullptr; // create port Port *port; if (monitor) { port = new Monitor_port(server.registry(), _vq_max_num, num_ds, name, mac_ptr); port->set_monitor(); if (vlan_access) warn.printf("vlan=access= ignored on monitor ports!\n"); if (!vlan_trunk.empty()) warn.printf("vlan=trunk=... ignored on monitor ports!\n"); } else { port = new Switch_port(server.registry(), _virtio_switch, _vq_max_num, num_ds, name, mac_ptr); if (vlan_access) port->set_vlan_access(vlan_access); else if (vlan_trunk_all) port->set_vlan_trunk_all(); else if (!vlan_trunk.empty()) port->set_vlan_trunk(vlan_trunk); } port->add_trusted_dataspaces(trusted_dataspaces); if (!trusted_dataspaces->empty()) port->enable_trusted_ds_validation(); // hand port over to the switch bool added = monitor ? _virtio_switch->add_monitor_port(port) : _virtio_switch->add_port(port); if (!added) { delete port; return -L4_ENOMEM; } res = L4::Ipc::make_cap(port->obj_cap(), L4_CAP_FPAGE_RWSD); info.printf(" Created port %s\n", name); return L4_EOK; } long create_stats(L4::Ipc::Cap &res) { // Create a stats reader and throw away our reference to get a notification // when the external reference vanishes. auto reader = cxx::make_unique(); L4Re::chkcap(server.registry()->register_obj(reader.get())); reader->obj_cap()->dec_refcnt(1); res = L4::Ipc::make_cap(reader->obj_cap(), L4_CAP_FPAGE_R | L4_CAP_FPAGE_D); _stats_readers.push_back(cxx::move(reader)); return L4_EOK; } }; #if CONFIG_VNS_IXL /** * Implement the irq related part of an ixl port. */ class Ixl_hw_port : public Ixl_port { template class Port_irq : public L4::Irqep_t { public: Port_irq(Virtio_switch *virtio_switch, Ixl_port *port) : _switch{virtio_switch}, _port{port} {} protected: Virtio_switch *_switch; Ixl_port *_port; }; class Receive_irq : public Port_irq { public: using Port_irq::Port_irq; /** * Callback for the IRQ * * This function redirects the call to `Virtio_switch::handle_ixl_port_tx`, * since the port cannot finish a transmission on its own. */ void handle_irq() { if (!_port->dev()->check_recv_irq(0)) return; if (_switch->handle_ixl_port_tx(_port)) _port->dev()->ack_recv_irq(0); } }; class Reschedule_tx_irq : public Port_irq { public: using Port_irq::Port_irq; void handle_irq() { if (_switch->handle_ixl_port_tx(_port)) // Entire TX queue handled, re-enable the recv IRQ again. _port->dev()->ack_recv_irq(0); } }; Receive_irq _recv_irq; Reschedule_tx_irq _reschedule_tx_irq; public: Ixl_hw_port(L4Re::Util::Object_registry *registry, Virtio_switch *virtio_switch, Ixl::Ixl_device *dev) : Ixl_port(dev), _recv_irq(virtio_switch, this), _reschedule_tx_irq(virtio_switch, this) { L4::Cap recv_irq_cap = L4Re::chkcap(dev->get_recv_irq(0), "Get receive IRQ"); L4Re::chkcap(registry->register_obj(&_recv_irq, recv_irq_cap), "Register receive IRQ."); recv_irq_cap->unmask(); _pending_tx_reschedule = L4Re::chkcap(registry->register_irq_obj(&_reschedule_tx_irq), "Register TX reschedule IRQ."); _pending_tx_reschedule->unmask(); } ~Ixl_hw_port() override { server.registry()->unregister_obj(&_recv_irq); } }; static void discover_ixl_devices(L4::Cap vbus, Virtio_switch *virtio_switch) { struct Ixl::Dev_cfg cfg; // Configure the device in asynchronous notify mode. cfg.irq_timeout_ms = -1; // TODO: Support detecting multiple devices on a Vbus. // Setup the driver (also resets and initializes the NIC). Ixl::Ixl_device *dev = Ixl::Ixl_device::ixl_init(vbus, 0, cfg); if (!dev) // No Ixl supported device found, Ixl already printed an error message. return; Ixl_hw_port *hw_port = new Ixl_hw_port(server.registry(), virtio_switch, dev); if (!virtio_switch->add_port(hw_port)) { Err().printf("error adding ixl port\n"); delete hw_port; } } #endif int main(int argc, char *argv[]) { trusted_dataspaces = std::make_shared(); auto *opts = Options::parse_options(argc, argv, trusted_dataspaces); if (!opts) { Err().printf("Error during command line parsing.\n"); return 1; } // Show welcome message if debug level is not set to quiet if (Dbg(Dbg::Core, Dbg::Warn).is_active()) printf("Hello from l4virtio switch\n"); Virtio_switch *virtio_switch = new Virtio_switch(opts->get_max_ports()); #ifdef CONFIG_VNS_STATS Switch_statistics::get_instance().initialize(opts->get_max_ports()); #endif #if CONFIG_VNS_IXL auto vbus = L4Re::Env::env()->get_cap("vbus"); if (vbus.is_valid()) discover_ixl_devices(vbus, virtio_switch); #endif Switch_factory *factory = new Switch_factory(virtio_switch, opts->get_virtq_max_num()); L4::Cap cap = server.registry()->register_obj(factory, "svr"); if (!cap.is_valid()) { Err().printf("error registering switch\n"); return 2; } /* * server loop will handle 4 types of events * - Switch_factory * - factory protocol * - capability deletion * - delegated to Virtio_switch::check_ports() * - Switch_factory::Switch_port * - irqs triggered by clients * - delegated to Virtio_switch::handle_l4virtio_port_tx() * - Virtio_net_transfer * - timeouts for pending transfer requests added by * Port_iface::handle_request() via registered via * L4::Epiface::server_iface()->add_timeout() */ server.loop(); return 0; } /**\}*/