l4re-base-25.08.0

This commit is contained in:
2025-09-12 15:55:45 +02:00
commit d959eaab98
37938 changed files with 9382688 additions and 0 deletions

View File

@@ -0,0 +1,6 @@
PKGDIR ?= ..
L4DIR ?= $(PKGDIR)/../..
TARGET = switch
include $(L4DIR)/mk/subdir.mk

View File

@@ -0,0 +1,13 @@
PKGDIR ?= ../..
L4DIR ?= $(PKGDIR)/../..
TARGET = l4vio_switch
REQUIRES_LIBS = libstdc++ l4virtio
REQUIRES_LIBS-$(CONFIG_VNS_IXL) += ixl
SRC_CC-$(CONFIG_VNS_PORT_FILTER) += filter.cc
SRC_CC = main.cc switch.cc options.cc
include $(L4DIR)/mk/prog.mk

View File

@@ -0,0 +1,117 @@
/*
* (c) 2013-2014 Alexander Warg <warg@os.inf.tu-dresden.de>
* economic rights: Technische Universität Dresden (Germany)
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#pragma once
#include <l4/re/util/debug>
struct Err : L4Re::Util::Err
{
Err(Level l = Fatal) : L4Re::Util::Err(l, "VSwitch") {}
};
class Dbg : public L4Re::Util::Dbg
{
enum
{
Verbosity_shift = 4, /// Bits per component for verbosity
Verbosity_mask = (1UL << Verbosity_shift) - 1
};
public:
/// Verbosity level per component.
enum Verbosity : unsigned long
{
Quiet = 0,
Warn = 1,
Info = 2,
Debug = 4,
Trace = 8,
Max_verbosity = 8
};
/**
* Different components for which the verbosity can be set independently.
*/
enum Component
{
Core = 0,
Virtio,
Port,
Request,
Queue,
Packet,
Max_component
};
#ifndef NDEBUG
static_assert(Max_component * Verbosity_shift <= sizeof(level) * 8,
"Too many components for level mask");
static_assert((Max_verbosity & Verbosity_mask) == Max_verbosity,
"Verbosity_shift to small for verbosity levels");
/**
* Set the verbosity for all components to the given levels.
*
* \param mask Mask of verbosity levels.
*/
static void set_verbosity(unsigned mask)
{
for (unsigned i = 0; i < Max_component; ++i)
set_verbosity(i, mask);
}
/**
* Set the verbosity of a single component to the given level.
*
* \param c Component for which to set verbosity.
* \param mask Mask of verbosity levels.
*/
static void set_verbosity(unsigned c, unsigned mask)
{
level &= ~(Verbosity_mask << (Verbosity_shift * c));
level |= (mask & Verbosity_mask) << (Verbosity_shift * c);
}
/**
* Check whether debugging is active for a component and verbosity level.
*
* \param c Component for which to check verbosity.
* \param mask Mask of verbosity levels.
*
* \retval true Debugging is active.
* \retval false Debugging is not active.
*/
static bool is_active(unsigned c, unsigned mask)
{ return level & (mask & Verbosity_mask) << (Verbosity_shift * c); }
/**
* Check whether debugging is active for the current debug object.
*
* \retval true Debugging is active.
* \retval false Debugging is not active.
*/
using L4Re::Util::Dbg::is_active;
Dbg(Component c = Core, Verbosity v = Warn, char const *subsys = "")
: L4Re::Util::Dbg(v << (Verbosity_shift * c), "SWI", subsys)
{}
#else
static void set_verbosity(unsigned) {}
static void set_verbosity(unsigned, unsigned) {}
static bool is_active(unsigned, unsigned) { return false; }
using L4Re::Util::Dbg::is_active;
Dbg(Component c = Core, Verbosity v = Warn, char const *subsys = "")
: L4Re::Util::Dbg(v << (Verbosity_shift * c), "", subsys)
{}
#endif
};

View File

@@ -0,0 +1,44 @@
/*
* Copyright (C) 2016-2017, 2024 Kernkonzept GmbH.
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#include "filter.h"
/* This is an example filter and therefore rather verbose. A real
filter would not produce any output */
bool
filter(const uint8_t *buf, size_t size)
{
// Packet large enough to apply filter condition?
if (size <= 13)
return false;
uint16_t ether_type = (uint16_t)*(buf + 12) << 8
| (uint16_t)*(buf + 13);
char const *protocol;
switch (ether_type)
{
case 0x0800: protocol = "IPv4"; break;
case 0x0806: protocol = "ARP"; break;
case 0x8100: protocol = "Vlan"; break;
case 0x86dd: protocol = "IPv6"; break;
case 0x8863: protocol = "PPPoE Discovery"; break;
case 0x8864: protocol = "PPPoE Session"; break;
default: protocol = nullptr;
}
if (protocol)
printf("%s\n", protocol);
else
printf("%04x\n", ether_type);
if (ether_type == 0x0806)
{
printf("Do not filter arp\n");
return false;
}
return true;
}

View File

@@ -0,0 +1,48 @@
/*
* Copyright (C) 2016-2017, 2023-2024 Kernkonzept GmbH.
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#pragma once
#include "request.h"
#include <l4/bid_config.h>
/**
* Decide whether a packet shall be filtered out.
*
* \param buf The buffer containing the available part of the package.
* \param size The size of the available packet data.
*
* \retval true The packet shall be filtered out.
* \retval false The packet shall be forwarded.
*
* This function looks at the available part of a packet and decides
* whether it shall be filtered.
*/
#ifdef CONFIG_VNS_PORT_FILTER
bool filter(const uint8_t *buf, size_t size);
#else
inline bool filter(const uint8_t *, size_t)
{
// default implementation filtering out no packets, see filter.cc for
// other examples
return false;
}
#endif
/**
* Look at a request and decide whether it shall be filtered.
*
* \param req The request to be considered for filtering.
*
* \retval true The packet shall be filtered out.
* \retval false The packet shall be forwarded.
*/
inline bool filter_request(Net_request const &req)
{
size_t size;
const uint8_t *buf = req.buffer(&size);
return filter(buf, size);
}

View File

@@ -0,0 +1,120 @@
/*
* Copyright (C) 2016-2017, 2020, 2022-2024 Kernkonzept GmbH.
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#pragma once
#include <cstring>
#include <inttypes.h>
/**
* \ingroup virtio_net_switch
* \{
*/
/**
* A wrapper class around the value of a MAC address.
*/
class Mac_addr
{
public:
enum
{
Addr_length = 6,
Addr_unknown = 0ULL
};
explicit Mac_addr(char const *_src)
{
/* A mac address is 6 bytes long, it is transmitted in big endian
order over the network. For our internal representation we
focus on easy testability of broadcast/multicast and reorder
the bytes that the most significant byte becomes the least
significant one. */
unsigned char const *src = reinterpret_cast<unsigned char const *>(_src);
_mac = ((uint64_t)src[0]) | (((uint64_t)src[1]) << 8)
| (((uint64_t)src[2]) << 16) | (((uint64_t)src[3]) << 24)
| (((uint64_t)src[4]) << 32) | (((uint64_t)src[5]) << 40);
}
static Mac_addr from_uncached(char volatile const *src)
{ return Mac_addr(src); }
explicit Mac_addr(uint64_t mac) : _mac{mac} {}
Mac_addr(Mac_addr const &other) : _mac{other._mac} {}
/** Check if MAC address is a broadcast or multicast address. */
bool is_broadcast() const
{
/* There are broadcast and multicast addresses, both are supposed
to be delivered to all station and the local network (layer 2).
Broadcast address is FF:FF:FF:FF:FF:FF, multicast addresses have
the LSB of the first octet set. Since this holds for both
broadcast and multicast we test for the multicast bit here.
In our internal representation we store the bytes in reverse
order, so we test the least significant bit of the least
significant byte.
*/
return _mac & 1;
}
/** Check if the MAC address is not yet known. */
bool is_unknown() const
{ return _mac == Addr_unknown; }
bool operator == (Mac_addr const &other) const
{ return _mac == other._mac; }
bool operator != (Mac_addr const &other) const
{ return _mac != other._mac; }
bool operator < (Mac_addr const &other) const
{ return _mac < other._mac; }
Mac_addr& operator = (Mac_addr const &other)
{ _mac = other._mac; return *this; }
Mac_addr& operator = (uint64_t mac)
{ _mac = mac; return *this; }
template<typename T>
void print(T &stream) const
{
stream.cprintf("%02x:%02x:%02x:%02x:%02x:%02x",
(int)(_mac & 0xff) , (int)((_mac >> 8) & 0xff),
(int)((_mac >> 16) & 0xff), (int)((_mac >> 24) & 0xff),
(int)((_mac >> 32) & 0xff), (int)((_mac >> 40) & 0xff));
}
void to_array(unsigned char mac[6]) const
{
mac[0] = _mac & 0xffU;
mac[1] = (_mac >> 8) & 0xffU;
mac[2] = (_mac >> 16) & 0xffU;
mac[3] = (_mac >> 24) & 0xffU;
mac[4] = (_mac >> 32) & 0xffU;
mac[5] = (_mac >> 40) & 0xffU;
}
private:
explicit Mac_addr(char volatile const *_src)
{
/* A mac address is 6 bytes long, it is transmitted in big endian
order over the network. For our internal representation we
focus on easy testability of broadcast/multicast and reorder
the bytes that the most significant byte becomes the least
significant one. */
volatile unsigned char const *src = reinterpret_cast<volatile unsigned char const *>(_src);
_mac = ((uint64_t)src[0]) | (((uint64_t)src[1]) << 8)
| (((uint64_t)src[2]) << 16) | (((uint64_t)src[3]) << 24)
| (((uint64_t)src[4]) << 32) | (((uint64_t)src[5]) << 40);
}
/// Mac addresses are 6 bytes long, we use 8 bytes to store them
uint64_t _mac;
};
/**\}*/

View File

@@ -0,0 +1,173 @@
/*
* Copyright (C) 2016-2017, 2020, 2022-2024 Kernkonzept GmbH.
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#pragma once
#include "mac_addr.h"
#include "port.h"
#include <array>
#include <map>
#include <tuple>
#include <algorithm>
/**
* \ingroup virtio_net_switch
* \{
*/
/**
* Mac_table manages a 1:n association between ports and MAC addresses.
*
* There are different types of devices which might be attached to a
* port. For a normal device the switch sees exactly one MAC address
* per port - the MAC address of the device attached to it. But there
* might be other devices like software bridges attached to the port
* sending packets with different MAC addresses to the port. Therefore
* the switch has to manage a 1:n association between ports and MAC
* addresses. The MAC table manages this association.
*
* When a packet comes in we need to find the destination port for the
* packet and therefore perform a lookup based on the MAC address.
*
* To prevent unbounded growth of the lookup table, the number of entries is
* limited. Replacement is done on a round-robin basis. If the capacity was
* reached, the oldest entry is evicted.
*/
template<std::size_t Size = 1024U>
class Mac_table
{
public:
Mac_table()
: _mac_table(),
_entries(),
_rr_index(0U)
{}
/**
* Find the destination port for a MAC address and VLAN id.
*
* \param dst MAC address
* \param vlan_id VLAN id
*
* \retval nullptr The MAC address is not known (yet)
* \retval other Pointer to the destination port
*/
Port_iface *lookup(Mac_addr dst, l4_uint16_t vlan_id) const
{
auto entry = _mac_table.find(std::tuple(dst, vlan_id));
return (entry != _mac_table.end()) ? entry->second->port : nullptr;
}
/**
* Learn a MAC address (add it to the MAC table).
*
* \param src MAC address
* \param port Pointer to the port object that can be used to reach
* MAC address src
* \param vlan_id
* VLAN id of the packet destination.
*
* Will evict the oldest learned address from the table if the maximum
* capacity was reached and if the MAC address was not known yet. The source
* port of the table entry is always updated to cope with clients that move
* between ports.
*/
void learn(Mac_addr src, Port_iface *port, l4_uint16_t vlan_id)
{
Dbg info(Dbg::Port, Dbg::Info);
if (L4_UNLIKELY(info.is_active()))
{
// check whether we already know about src mac and vlan_id
auto *p = lookup(src, vlan_id);
if (!p || p != port)
{
info.printf("%s %-20s -> ", !p ? "learned " : "replaced",
port->get_name());
src.print(info);
info.cprintf("\n");
}
}
auto status = _mac_table.emplace(std::tuple(src, vlan_id),
&_entries[_rr_index]);
if (L4_UNLIKELY(status.second))
{
if (_entries[_rr_index].port)
{
// remove old entry
_mac_table.erase(std::tuple(_entries[_rr_index].addr,
_entries[_rr_index].vlan_id));
}
// Set/Replace port and mac address
_entries[_rr_index].port = port;
_entries[_rr_index].addr = src;
_entries[_rr_index].vlan_id = vlan_id;
_rr_index = (_rr_index + 1U) % Size;
}
else
{
// Update port to allow for movement of client between ports
status.first->second->port = port;
}
}
/**
* Flush all associations with a given port.
*
* \param port Pointer to port that is to be flushed
*
* This function removes all references to a given port from the MAC
* table. Since we manage a 1:n association between ports and MAC
* addresses there might be more than one entry for a given port and
* we have to iterate over the whole array to delete every reference
* to the port.
*/
void flush(Port_iface *port)
{
typedef std::pair<std::tuple<const Mac_addr, l4_uint16_t>, Entry*> TableEntry;
auto iter = _mac_table.begin();
while ((iter = std::find_if(iter, _mac_table.end(),
[port](TableEntry const &p)
{ return p.second->port == port; }))
!= _mac_table.end())
{
iter->second->port = nullptr;
iter->second->addr = Mac_addr::Addr_unknown;
iter->second->vlan_id = 0;
iter = _mac_table.erase(iter);
}
assert(std::find_if(_mac_table.begin(), _mac_table.end(),
[port](TableEntry const &p)
{ return p.second->port == port; }) == _mac_table.end());
}
private:
/**
* Value class for MAC table entry.
*
* The instances hold the actual key (addr) to know which _mac_table entry
* points there.
*/
struct Entry {
Port_iface *port;
Mac_addr addr;
l4_uint16_t vlan_id;
Entry()
: port(nullptr),
addr(Mac_addr::Addr_unknown),
vlan_id(0)
{}
};
std::map<std::tuple<Mac_addr, l4_uint16_t>, Entry*> _mac_table;
std::array<Entry, Size> _entries;
size_t _rr_index;
};
/**\}*/

View File

@@ -0,0 +1,827 @@
/*
* Copyright (C) 2016-2020, 2022-2024 Kernkonzept GmbH.
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
* Manuel von Oltersdorff-Kalettka <manuel.kalettka@kernkonzept.com>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#include <l4/re/util/meta>
#include <l4/re/util/object_registry>
#include <l4/re/util/br_manager>
#include <l4/sys/factory>
#include <l4/sys/task>
#include <l4/sys/cxx/ipc_epiface>
#include <l4/sys/cxx/ipc_varg>
#include <l4/cxx/dlist>
#include <l4/cxx/string>
#include <stdlib.h>
#include <string>
#include <terminate_handler-l4>
#include <vector>
#include "debug.h"
#include "options.h"
#include "switch.h"
#include "vlan.h"
#include <l4/virtio-net-switch/stats.h>
/**
* \defgroup virtio_net_switch Virtio Net Switch
*
* A virtual network switch that can be used as defined in the virtio protocol.
*
* The abstraction of a single connection with a network device (also called
* client) from the switch's perspective is a port. A client can register
* multiple ports on the switch. The communication between a client and the
* switch happens via IRQs, MMIO and shared memory as defined by the Virtio
* protocol. The switch supports VLANs and ports can be either 'access' or
* 'trunk' ports.
* The optionally available monitor port receives network traffic from all
* ports, and the monitor can not send.
*
* \{
*/
/*
* Registry for our server, used to register
* - factory capability
* - irq object for capability deletion irqs
* - virtio host kick irqs
*/
static L4Re::Util::Registry_server<L4Re::Util::Br_manager_hooks> server;
using Ds_vector = std::vector<L4::Cap<L4Re::Dataspace>>;
static std::shared_ptr<Ds_vector> trusted_dataspaces;
static bool
parse_int_param(L4::Ipc::Varg const &param, char const *prefix, int *out)
{
l4_size_t headlen = strlen(prefix);
if (param.length() < headlen)
return false;
char const *pstr = param.value<char const *>();
if (strncmp(pstr, prefix, headlen) != 0)
return false;
std::string tail(pstr + headlen, param.length() - headlen);
if (!parse_int_optstring(tail.c_str(), out))
{
Err(Err::Normal).printf("Bad parameter '%s'. Invalid number specified.\n",
prefix);
throw L4::Runtime_error(-L4_EINVAL);
}
return true;
}
static void
assign_random_mac(l4_uint8_t mac[6])
{
static bool initialized = false;
if (!initialized)
{
srandom(l4_kip_clock(l4re_kip()));
initialized = true;
}
for (int i = 0; i < 6; i++)
mac[i] = static_cast<l4_uint8_t>(random());
mac[0] &= ~(1U << 0); // clear multicast bit
mac[0] |= 1U << 1; // set "locally administered" bit
}
/**
* The IPC interface for creating ports.
*
* The Switch factory provides an IPC interface to create ports. Ports are
* the only option for a client to communicate with the switch and, thus, with
* other network devices.
*
* The `Switch_factory` gets constructed when the net switch application gets
* started. It thereafter gets registered on the switch's server to serve IPC
* `create` calls.
*/
class Switch_factory : public L4::Epiface_t<Switch_factory, L4::Factory>
{
/**
* Implement the generic irq related part of the port
*/
class Port : public L4virtio_port
{
// Irq used to notify the guest
L4::Cap<L4::Irq> _device_notify_irq;
L4::Cap<L4::Irq> device_notify_irq() const override
{ return _device_notify_irq; }
public:
Port(unsigned vq_max, unsigned num_ds, char const *name,
l4_uint8_t const *mac)
: L4virtio_port(vq_max, num_ds, name, mac) {}
/** register the host IRQ and the port itself on the switch's server */
void register_end_points(L4Re::Util::Object_registry* registry,
L4::Epiface *kick_irq)
{
// register virtio host kick irq
_device_notify_irq = L4Re::chkcap(registry->register_irq_obj(kick_irq));
// register virtio endpoint
L4Re::chkcap(registry->register_obj(this));
// decrement ref counter to get a notification when the last
// external reference vanishes
obj_cap()->dec_refcnt(1);
}
virtual ~Port()
{ server.registry()->unregister_obj(this); }
};
/**
* Implement the irq related part of a switched port
*/
class Switch_port : public Port
{
/**
* IRQ endpoint on the port.
*
* Each port holds its own IRQ that gets triggered by the client whenever
* there is a new outgoing request in the port's transmission queue or when
* there is new space in the port's receive queue.
*
* A `Kick_irq` is constructed on port creation. At this time, it also gets
* registered on the switch's server.
*/
class Kick_irq : public L4::Irqep_t<Kick_irq>
{
Virtio_switch *_switch; /**< pointer to the net switch */
L4virtio_port *_port; /**< pointer to the associated port */
public:
/**
* Callback for the IRQ
*
* This function redirects the call to `Virtio_switch::handle_l4virtio_port_tx`,
* since the port cannot finish a transmission on its own.
*/
void handle_irq()
{ _switch->handle_l4virtio_port_tx(_port); }
Kick_irq(Virtio_switch *virtio_switch, L4virtio_port *port)
: _switch{virtio_switch}, _port{port} {}
};
Kick_irq _kick_irq; /**< The IRQ to notify the client. */
Kick_irq _reschedule_tx_irq;
public:
Switch_port(L4Re::Util::Object_registry *registry,
Virtio_switch *virtio_switch, unsigned vq_max, unsigned num_ds,
char const *name, l4_uint8_t const *mac)
: Port(vq_max, num_ds, name, mac),
_kick_irq(virtio_switch, this),
_reschedule_tx_irq(virtio_switch, this)
{
register_end_points(registry, &_kick_irq);
_pending_tx_reschedule =
L4Re::chkcap(registry->register_irq_obj(&_reschedule_tx_irq),
"Register TX reschedule IRQ.");
_pending_tx_reschedule->unmask();
}
virtual ~Switch_port()
{
// We need to delete the IRQ object created in register_irq_obj() ourselves
L4::Cap<L4::Task>(L4Re::This_task)
->unmap(_kick_irq.obj_cap().fpage(),
L4_FP_ALL_SPACES | L4_FP_DELETE_OBJ);
server.registry()->unregister_obj(&_kick_irq);
L4::Cap<L4::Task>(L4Re::This_task)
->unmap(_pending_tx_reschedule.fpage(),
L4_FP_ALL_SPACES | L4_FP_DELETE_OBJ);
server.registry()->unregister_obj(&_reschedule_tx_irq);
}
};
/**
* Implement the irq related part of a monitor port
*/
class Monitor_port : public Port
{
/**
* Handle incoming irqs by
* - handling pending outgoing requests
* - dropping all incoming requests
*/
class Kick_irq : public L4::Irqep_t<Kick_irq>
{
L4virtio_port *_port;
public:
/**
* Callback for the IRQ
*
* A Monitor port processes only requests on its receive queue and drops
* all requests on the transmit queue since it is not supposed to send
* network request.
*/
void handle_irq()
{
do
{
_port->tx_q()->disable_notify();
_port->rx_q()->disable_notify();
_port->drop_requests();
_port->tx_q()->enable_notify();
_port->rx_q()->enable_notify();
L4virtio::wmb();
L4virtio::rmb();
}
while (_port->tx_work_pending());
}
Kick_irq(L4virtio_port *port) : _port{port} {}
};
Kick_irq _kick_irq;
public:
Monitor_port(L4Re::Util::Object_registry* registry,
unsigned vq_max, unsigned num_ds, char const *name,
l4_uint8_t const *mac)
: Port(vq_max, num_ds, name, mac), _kick_irq(this)
{ register_end_points(registry, &_kick_irq); }
virtual ~Monitor_port()
{
// We need to delete the IRQ object created in register_irq_obj() ourselves
L4::Cap<L4::Task>(L4Re::This_task)
->unmap(_kick_irq.obj_cap().fpage(),
L4_FP_ALL_SPACES | L4_FP_DELETE_OBJ);
server.registry()->unregister_obj(&_kick_irq);
}
};
/**
* Implement the handler for the statistics reader capability.
*/
class Stats_reader
: public cxx::D_list_item,
public L4::Epiface_t<Stats_reader, Virtio_net_switch::Statistics_if>
{
L4Re::Util::Unique_cap<L4Re::Dataspace> _ds;
l4_addr_t _addr;
public:
Stats_reader()
{
l4_size_t size = Switch_statistics::get_instance().size();
_ds = L4Re::Util::make_unique_cap<L4Re::Dataspace>();
L4Re::chksys(L4Re::Env::env()->mem_alloc()->alloc(size, _ds.get()),
"Could not allocate shared mem ds.");
L4Re::chksys(L4Re::Env::env()->rm()->attach(&_addr, _ds->size(),
L4Re::Rm::F::Search_addr
| L4Re::Rm::F::RW,
L4::Ipc::make_cap_rw(_ds.get())));
memset(reinterpret_cast<void*>(_addr), 0, _ds->size());
}
~Stats_reader()
{
L4Re::Env::env()->rm()->detach(reinterpret_cast<l4_addr_t>(_addr), 0);
server.registry()->unregister_obj(this);
}
long op_get_buffer(Virtio_net_switch::Statistics_if::Rights,
L4::Ipc::Cap<L4Re::Dataspace> &ds)
{
// We hand out the dataspace in a read only manner. Clients must not be
// able to modify information as that would create an unwanted data
// channel.
ds = L4::Ipc::Cap<L4Re::Dataspace>(_ds.get(), L4_CAP_FPAGE_RO);
return L4_EOK;
}
long op_sync(Virtio_net_switch::Statistics_if::Rights)
{
memcpy(reinterpret_cast<void *>(_addr),
reinterpret_cast<void *>(Switch_statistics::get_instance().stats()),
Switch_statistics::get_instance().size());
return L4_EOK;
}
bool is_valid()
{ return obj_cap() && obj_cap().validate().label(); }
};
class Stats_reader_list
{
cxx::D_list<Stats_reader> _readers;
public:
void check_readers()
{
auto it = _readers.begin();
while (it != _readers.end())
{
auto *reader = *it;
if (!reader->is_valid())
{
it = _readers.erase(it);
delete reader;
}
else
++it;
}
}
void push_back(cxx::unique_ptr<Stats_reader> reader)
{
_readers.push_back(reader.release());
}
};
/*
* Handle vanishing caps by telling the switch that a port might have gone
*/
struct Del_cap_irq : public L4::Irqep_t<Del_cap_irq>
{
public:
void handle_irq()
{
_switch->check_ports();
_stats_readers->check_readers();
}
Del_cap_irq(Virtio_switch *virtio_switch, Stats_reader_list *stats_readers)
: _switch{virtio_switch},
_stats_readers{stats_readers}
{}
private:
Virtio_switch *_switch;
Stats_reader_list *_stats_readers;
};
Virtio_switch *_virtio_switch; /**< pointer to the actual net switch object */
/** maximum number of entries in a new virtqueueue created for a port */
unsigned _vq_max_num;
Stats_reader_list _stats_readers;
Del_cap_irq _del_cap_irq;
/**
* Evaluate an optional argument
*
* \param opt Optional argument.
* \param[out] monitor Set to true if argument is "type=monitor".
* \param name Pointer to name.
* \param size Size of name.
* \param[out] vlan_access Id of VLAN access port if "vlan=access=<id>" is
* present.
* \param[out] vlan_trunk List of VLANs if "vlan=trunk=[<id>[,<id]*] is
* present.
* \param[out] vlan_trunk_all
* Iff true, trunk port shall participate in all
* VLANs. vlan_trunk will be ignored.
*/
bool handle_opt_arg(L4::Ipc::Varg const &opt, bool &monitor,
char *name, size_t size,
l4_uint16_t &vlan_access,
std::vector<l4_uint16_t> &vlan_trunk,
bool *vlan_trunk_all,
l4_uint8_t mac[6], bool &mac_set)
{
assert(opt.is_of<char const *>());
unsigned len = opt.length();
const char *opt_str = opt.data();
Err err(Err::Normal);
if (len > 5)
{
if (!strncmp("type=", opt_str, 5))
{
if (!strncmp("type=monitor", opt_str, len))
{
monitor = true;
return true;
}
else if (!strncmp("type=none", opt_str, len))
return true;
err.printf("Unknown type '%.*s'\n", opt.length() - 5, opt.data() + 5);
return false;
}
else if (!strncmp("name=", opt_str, 5))
{
snprintf(name, size, "%.*s", opt.length() - 5, opt.data() + 5);
return true;
}
else if (!strncmp("vlan=", opt_str, 5))
{
cxx::String str(opt_str + 5, strnlen(opt_str + 5, len - 5));
cxx::String::Index idx;
if ((idx = str.starts_with("access=")))
{
str = str.substr(idx);
l4_uint16_t vid;
int next = str.from_dec(&vid);
if (next && next == str.len() && vlan_valid_id(vid))
vlan_access = vid;
else
{
err.printf("Invalid VLAN access port id '%.*s'\n",
opt.length(), opt.data());
return false;
}
}
else if ((idx = str.starts_with("trunk=")))
{
int next;
l4_uint16_t vid;
str = str.substr(idx);
if (str == cxx::String("all"))
{
*vlan_trunk_all = true;
return true;
}
while ((next = str.from_dec(&vid)))
{
if (!vlan_valid_id(vid))
break;
vlan_trunk.push_back(vid);
if (next < str.len() && str[next] != ',')
break;
str = str.substr(next+1);
}
if (vlan_trunk.empty() || !str.empty())
{
err.printf("Invalid VLAN trunk port spec '%.*s'\n",
opt.length(), opt.data());
return false;
}
}
else
{
err.printf("Invalid VLAN specification..\n");
return false;
}
return true;
}
else if (!strncmp("mac=", opt_str, 4))
{
size_t const OPT_LEN = 4 /* mac= */ + 6*2 /* digits */ + 5 /* : */;
// expect NUL terminated string for simplicity
if (len > OPT_LEN && opt_str[OPT_LEN] == '\0' &&
sscanf(opt_str+4, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &mac[0],
&mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) == 6)
{
mac_set = true;
return true;
}
err.printf("Invalid mac address '%.*s'\n", len - 4, opt_str + 4);
return false;
}
}
err.printf("Unknown option '%.*s'\n", opt.length(), opt.data());
return false;
}
public:
Switch_factory(Virtio_switch *virtio_switch, unsigned vq_max_num)
: _virtio_switch{virtio_switch}, _vq_max_num{vq_max_num},
_del_cap_irq{virtio_switch, &_stats_readers}
{
auto c = L4Re::chkcap(server.registry()->register_irq_obj(&_del_cap_irq));
L4Re::chksys(L4Re::Env::env()->main_thread()->register_del_irq(c));
};
/**
* Handle factory protocol
*
* This function is invoked after an incoming factory::create
* request and creates a new port or statistics interface if possible.
*/
long op_create(L4::Factory::Rights, L4::Ipc::Cap<void> &res,
l4_umword_t type, L4::Ipc::Varg_list_ref va)
{
switch (type)
{
case 0:
return create_port(res, va);
case 1:
return create_stats(res);
default:
Dbg(Dbg::Core, Dbg::Warn).printf("op_create: Invalid object type\n");
return -L4_EINVAL;
}
}
long create_port(L4::Ipc::Cap<void> &res, L4::Ipc::Varg_list_ref va)
{
Dbg warn(Dbg::Port, Dbg::Warn, "Port");
Dbg info(Dbg::Port, Dbg::Info, "Port");
info.printf("Incoming port request\n");
bool monitor = false;
char name[20] = "";
unsigned arg_n = 2;
l4_uint16_t vlan_access = 0;
std::vector<l4_uint16_t> vlan_trunk;
bool vlan_trunk_all = false;
l4_uint8_t mac[6];
bool mac_set = false;
int num_ds = 2;
for (L4::Ipc::Varg opt: va)
{
if (!opt.is_of<char const *>())
{
warn.printf("Unexpected type for argument %d\n", arg_n);
return -L4_EINVAL;
}
if (parse_int_param(opt, "ds-max=", &num_ds))
{
if (num_ds <= 0 || num_ds > 80)
{
Err(Err::Normal).printf("warning: client requested invalid number"
" of data spaces: 0 < %d <= 80\n", num_ds);
return -L4_EINVAL;
}
}
else if (!handle_opt_arg(opt, monitor, name, sizeof(name), vlan_access,
vlan_trunk, &vlan_trunk_all, mac, mac_set))
return -L4_EINVAL;
++arg_n;
}
int port_num = _virtio_switch->port_available(monitor);
if (port_num < 0)
{
warn.printf("No port available\n");
return -L4_ENOMEM;
}
if (vlan_access && (!vlan_trunk.empty() || vlan_trunk_all))
{
warn.printf("VLAN port cannot be access and trunk simultaneously.\n");
return -L4_EINVAL;
}
if (!name[0])
snprintf(name, sizeof(name), "%s[%d]", monitor ? "monitor" : "",
port_num);
info.printf(" Creating port %s%s\n", name,
monitor ? " as monitor port" : "");
// Assign a random MAC address if we assign one to our devices but the
// user has not passed an explicit one for a port.
if (!mac_set && Options::get_options()->assign_mac())
assign_random_mac(mac);
l4_uint8_t *mac_ptr = (mac_set || Options::get_options()->assign_mac())
? mac : nullptr;
// create port
Port *port;
if (monitor)
{
port = new Monitor_port(server.registry(), _vq_max_num, num_ds, name,
mac_ptr);
port->set_monitor();
if (vlan_access)
warn.printf("vlan=access=<id> ignored on monitor ports!\n");
if (!vlan_trunk.empty())
warn.printf("vlan=trunk=... ignored on monitor ports!\n");
}
else
{
port = new Switch_port(server.registry(), _virtio_switch, _vq_max_num,
num_ds, name, mac_ptr);
if (vlan_access)
port->set_vlan_access(vlan_access);
else if (vlan_trunk_all)
port->set_vlan_trunk_all();
else if (!vlan_trunk.empty())
port->set_vlan_trunk(vlan_trunk);
}
port->add_trusted_dataspaces(trusted_dataspaces);
if (!trusted_dataspaces->empty())
port->enable_trusted_ds_validation();
// hand port over to the switch
bool added = monitor ? _virtio_switch->add_monitor_port(port)
: _virtio_switch->add_port(port);
if (!added)
{
delete port;
return -L4_ENOMEM;
}
res = L4::Ipc::make_cap(port->obj_cap(), L4_CAP_FPAGE_RWSD);
info.printf(" Created port %s\n", name);
return L4_EOK;
}
long create_stats(L4::Ipc::Cap<void> &res)
{
// Create a stats reader and throw away our reference to get a notification
// when the external reference vanishes.
auto reader = cxx::make_unique<Stats_reader>();
L4Re::chkcap(server.registry()->register_obj(reader.get()));
reader->obj_cap()->dec_refcnt(1);
res = L4::Ipc::make_cap(reader->obj_cap(),
L4_CAP_FPAGE_R | L4_CAP_FPAGE_D);
_stats_readers.push_back(cxx::move(reader));
return L4_EOK;
}
};
#if CONFIG_VNS_IXL
/**
* Implement the irq related part of an ixl port.
*/
class Ixl_hw_port : public Ixl_port
{
template<typename Derived>
class Port_irq : public L4::Irqep_t<Derived>
{
public:
Port_irq(Virtio_switch *virtio_switch, Ixl_port *port)
: _switch{virtio_switch}, _port{port} {}
protected:
Virtio_switch *_switch;
Ixl_port *_port;
};
class Receive_irq : public Port_irq<Receive_irq>
{
public:
using Port_irq::Port_irq;
/**
* Callback for the IRQ
*
* This function redirects the call to `Virtio_switch::handle_ixl_port_tx`,
* since the port cannot finish a transmission on its own.
*/
void handle_irq()
{
if (!_port->dev()->check_recv_irq(0))
return;
if (_switch->handle_ixl_port_tx(_port))
_port->dev()->ack_recv_irq(0);
}
};
class Reschedule_tx_irq : public Port_irq<Reschedule_tx_irq>
{
public:
using Port_irq::Port_irq;
void handle_irq()
{
if (_switch->handle_ixl_port_tx(_port))
// Entire TX queue handled, re-enable the recv IRQ again.
_port->dev()->ack_recv_irq(0);
}
};
Receive_irq _recv_irq;
Reschedule_tx_irq _reschedule_tx_irq;
public:
Ixl_hw_port(L4Re::Util::Object_registry *registry,
Virtio_switch *virtio_switch, Ixl::Ixl_device *dev)
: Ixl_port(dev),
_recv_irq(virtio_switch, this),
_reschedule_tx_irq(virtio_switch, this)
{
L4::Cap<L4::Irq> recv_irq_cap = L4Re::chkcap(dev->get_recv_irq(0), "Get receive IRQ");
L4Re::chkcap(registry->register_obj(&_recv_irq, recv_irq_cap),
"Register receive IRQ.");
recv_irq_cap->unmask();
_pending_tx_reschedule =
L4Re::chkcap(registry->register_irq_obj(&_reschedule_tx_irq),
"Register TX reschedule IRQ.");
_pending_tx_reschedule->unmask();
}
~Ixl_hw_port() override
{
server.registry()->unregister_obj(&_recv_irq);
}
};
static void
discover_ixl_devices(L4::Cap<L4vbus::Vbus> vbus, Virtio_switch *virtio_switch)
{
struct Ixl::Dev_cfg cfg;
// Configure the device in asynchronous notify mode.
cfg.irq_timeout_ms = -1;
// TODO: Support detecting multiple devices on a Vbus.
// Setup the driver (also resets and initializes the NIC).
Ixl::Ixl_device *dev = Ixl::Ixl_device::ixl_init(vbus, 0, cfg);
if (!dev)
// No Ixl supported device found, Ixl already printed an error message.
return;
Ixl_hw_port *hw_port = new Ixl_hw_port(server.registry(), virtio_switch, dev);
if (!virtio_switch->add_port(hw_port))
{
Err().printf("error adding ixl port\n");
delete hw_port;
}
}
#endif
int main(int argc, char *argv[])
{
trusted_dataspaces = std::make_shared<Ds_vector>();
auto *opts = Options::parse_options(argc, argv, trusted_dataspaces);
if (!opts)
{
Err().printf("Error during command line parsing.\n");
return 1;
}
// Show welcome message if debug level is not set to quiet
if (Dbg(Dbg::Core, Dbg::Warn).is_active())
printf("Hello from l4virtio switch\n");
Virtio_switch *virtio_switch = new Virtio_switch(opts->get_max_ports());
#ifdef CONFIG_VNS_STATS
Switch_statistics::get_instance().initialize(opts->get_max_ports());
#endif
#if CONFIG_VNS_IXL
auto vbus = L4Re::Env::env()->get_cap<L4vbus::Vbus>("vbus");
if (vbus.is_valid())
discover_ixl_devices(vbus, virtio_switch);
#endif
Switch_factory *factory = new Switch_factory(virtio_switch,
opts->get_virtq_max_num());
L4::Cap<void> cap = server.registry()->register_obj(factory, "svr");
if (!cap.is_valid())
{
Err().printf("error registering switch\n");
return 2;
}
/*
* server loop will handle 4 types of events
* - Switch_factory
* - factory protocol
* - capability deletion
* - delegated to Virtio_switch::check_ports()
* - Switch_factory::Switch_port
* - irqs triggered by clients
* - delegated to Virtio_switch::handle_l4virtio_port_tx()
* - Virtio_net_transfer
* - timeouts for pending transfer requests added by
* Port_iface::handle_request() via registered via
* L4::Epiface::server_iface()->add_timeout()
*/
server.loop();
return 0;
}
/**\}*/

View File

@@ -0,0 +1,231 @@
/*
* Copyright (C) 2016-2017, 2019, 2022-2024 Kernkonzept GmbH.
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
* Manuel von Oltersdorff-Kalettka <manuel.kalettka@kernkonzept.de>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#include <getopt.h>
#include <stdlib.h>
#include <cstring>
#include <type_traits>
#include <l4/cxx/exceptions>
#include <l4/re/error_helper>
#include <l4/re/env>
#include "debug.h"
#include "options.h"
bool
parse_int_optstring(char const *optstring, int *out)
{
char *endp;
errno = 0;
long num = strtol(optstring, &endp, 10);
// check that long can be converted to int
if (errno || *endp != '\0' || num < INT_MIN || num > INT_MAX)
return false;
*out = num;
return true;
}
static int
verbosity_mask_from_string(char const *str, unsigned *mask)
{
if (strcmp("quiet", str) == 0)
{
*mask = Dbg::Quiet;
return 0;
}
if (strcmp("warn", str) == 0)
{
*mask = Dbg::Warn;
return 0;
}
if (strcmp("info", str) == 0)
{
*mask = Dbg::Warn | Dbg::Info;
return 0;
}
if (strcmp("debug", str) == 0)
{
*mask = Dbg::Warn | Dbg::Info | Dbg::Debug;
return 0;
}
if (strcmp("trace", str) == 0)
{
*mask = Dbg::Warn | Dbg::Info | Dbg::Debug | Dbg::Trace;
return 0;
}
return -L4_ENOENT;
}
/**
* Set debug level according to a verbosity string.
*
* The string may either set a global verbosity level:
* quiet, warn, info, trace
*
* Or it may set the verbosity level for a component:
*
* <component>=<level>
*
* where component is one of: guest, core, cpu, mmio, irq, dev
* and level the same as above.
*
* To change the verbosity of multiple components repeat
* the verbosity switch.
*
* Example:
*
* <program name> -D info -D port=trace
*
* Sets verbosity for all components to info except for
* port handling which is set to trace.
*
* <program name> -D trace -D port=warn -D queue=warn
*
* Enables tracing for all components except port
* and queue.
*
*/
static void
set_verbosity(char const *str)
{
unsigned mask;
if (verbosity_mask_from_string(str, &mask) == 0)
{
Dbg::set_verbosity(mask);
return;
}
static char const *const components[] =
{ "core", "virtio", "port", "request", "queue", "packet" };
static_assert(std::extent<decltype(components)>::value == Dbg::Max_component,
"Component names must match 'enum Component'.");
for (unsigned i = 0; i < Dbg::Max_component; ++i)
{
auto len = strlen(components[i]);
if (strncmp(components[i], str, len) == 0 && str[len] == '='
&& verbosity_mask_from_string(str + len + 1, &mask) == 0)
{
Dbg::set_verbosity(i, mask);
return;
}
}
}
int
Options::parse_cmd_line(int argc, char **argv,
std::shared_ptr<Ds_vector> trusted_dataspaces)
{
int opt, index;
struct option options[] =
{
{"size", 1, 0, 's' }, // size of in/out queue == #buffers in queue
{"ports", 1, 0, 'p' }, // number of ports
{"mac", 0, 0, 'm' }, // switch sets MAC address for each client
{"debug", 1, 0, 'D' }, // configure debug levels
{"verbose", 0, 0, 'v' },
{"quiet", 0, 0, 'q' },
{"register-ds", 1, 0, 'd' }, // register a trusted dataspace
{0, 0, 0, 0}
};
unsigned long verbosity = Dbg::Warn;
Dbg info(Dbg::Core, Dbg::Info);
Dbg::set_verbosity(Dbg::Core, Dbg::Info);
info.printf("Arguments:\n");
for (int i = 0; i < argc; ++i)
info.printf("\t%s\n", argv[i]);
Dbg::set_verbosity(verbosity);
while ( (opt = getopt_long(argc, argv, "s:p:mMqvD:d:", options, &index)) != -1)
{
switch (opt)
{
case 's':
// QueueNumMax must be power of 2 between 1 and 0x8000
if (!parse_int_optstring(optarg, &_virtq_max_num)
|| _virtq_max_num < 1 || _virtq_max_num > 32768
|| (_virtq_max_num & (_virtq_max_num - 1)))
{
Err().printf("Max number of virtqueue buffers must be power of 2"
" between 1 and 32768. Invalid value %i or argument "
"%s\n",
_virtq_max_num, optarg);
return -1;
}
info.printf("Max number of buffers in virtqueue: %i\n",
_virtq_max_num);
break;
case 'p':
if (parse_int_optstring(optarg, &_max_ports))
info.printf("Max number of ports: %u\n", _max_ports);
else
{
Err().printf("Invalid number of ports argument: %s\n", optarg);
return -1;
}
break;
case 'q':
verbosity = Dbg::Quiet;
Dbg::set_verbosity(verbosity);
break;
case 'v':
verbosity = (verbosity << 1) | 1;
Dbg::set_verbosity(verbosity);
break;
case 'D':
set_verbosity(optarg);
break;
case 'm':
info.printf("Option -m ignored to compatibility.\n");
break;
case 'M':
_assign_mac = false;
break;
case 'd':
{
L4::Cap<L4Re::Dataspace> ds =
L4Re::chkcap(L4Re::Env::env()->get_cap<L4Re::Dataspace>(optarg),
"Find a dataspace capability.\n");
trusted_dataspaces->push_back(ds);
break;
}
default:
Err().printf("Unknown command line option '%c' (%d)\n", opt, opt);
return -1;
}
}
return 0;
}
static Options options;
Options const *
Options::get_options()
{ return &options; }
Options const *
Options::parse_options(int argc, char **argv,
std::shared_ptr<Ds_vector> trusted_dataspaces)
{
if (options.parse_cmd_line(argc, argv, trusted_dataspaces) < 0)
return nullptr;
return &options;
}

View File

@@ -0,0 +1,53 @@
/*
* Copyright (C) 2016-2017, 2022, 2024 Kernkonzept GmbH.
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
* Manuel von Oltersdorff-Kalettka <manuel.kalettka@kernkonzept.com>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#pragma once
#include <memory>
#include <vector>
#include <cerrno>
#include <climits>
#include <l4/re/dataspace>
bool
parse_int_optstring(char const *optstring, int *out);
class Options
{
using Ds_vector = std::vector<L4::Cap<L4Re::Dataspace>>;
public:
int get_max_ports() const
{ return _max_ports; }
int get_virtq_max_num() const
{ return _virtq_max_num; }
int get_portq_max_num() const
{ return _portq_max_num; }
int get_request_timeout() const
{ return _request_timeout; }
int assign_mac() const
{ return _assign_mac; }
static Options const *
parse_options(int argc, char **argv,
std::shared_ptr<Ds_vector> trusted_dataspaces);
static Options const *get_options();
private:
int _max_ports = 5;
int _virtq_max_num = 0x100; // default value for data queues
int _portq_max_num = 50; // default value for port queues
int _request_timeout = 1 * 1000 * 1000; // default packet timeout 1 second
bool _assign_mac = true;
int parse_cmd_line(int argc, char **argv,
std::shared_ptr<Ds_vector> trusted_dataspaces);
};

View File

@@ -0,0 +1,283 @@
/*
* Copyright (C) 2016-2018, 2020, 2022-2024 Kernkonzept GmbH.
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
* Alexander Warg <warg@os.inf.tu-dresden.de>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#pragma once
#include "request.h"
#include "mac_addr.h"
#include "vlan.h"
#include "stats.h"
#include <cassert>
#include <set>
#include <vector>
/**
* \ingroup virtio_net_switch
* \{
*/
class Port_iface
{
protected:
Virtio_net_switch::Port_statistics *_stats;
public:
Port_iface(char const *name)
{
strncpy(_name, name, sizeof(_name));
_name[sizeof(_name) - 1] = '\0';
#ifdef CONFIG_VNS_STATS
_stats = Switch_statistics::get_instance().allocate_port_statistics(name);
if (!_stats)
throw L4::Runtime_error(-L4_ENOMEM,
"Could not allocate port statistics.\n");
#endif
}
virtual ~Port_iface()
{
#ifdef CONFIG_VNS_STATS
_stats->in_use = false;
#endif
}
// delete copy and assignment
Port_iface(Port_iface const &) = delete;
Port_iface &operator = (Port_iface const &) = delete;
char const *get_name() const
{ return _name; }
l4_uint16_t get_vlan() const
{ return _vlan_id; }
inline bool is_trunk() const
{ return _vlan_id == VLAN_ID_TRUNK; }
inline bool is_native() const
{ return _vlan_id == VLAN_ID_NATIVE; }
inline bool is_access() const
{ return !is_trunk() && !is_native(); }
/**
* Set port as access port for a certain VLAN.
*
* \param id The VLAN id for traffic on this port (0 < id < 0xfff)
*
* The port does not see VLAN tags but belongs to the given VLAN.
*/
void set_vlan_access(l4_uint16_t id)
{
assert(vlan_valid_id(id));
_vlan_id = id;
_vlan_bloom_filter = 0;
_vlan_ids.clear();
}
/**
* Set port as trunk port.
*
* \param ids List of VLAN ids that are switched on this port
*
* Incoming traffic on this port is expected to have a VLAN tag that matches
* one in \a ids. Outgoing traffic will be tagged it if there is no tag in
* the Ethernet header yet.
*/
void set_vlan_trunk(const std::vector<l4_uint16_t> &ids)
{
// bloom filter to quickly reject packets that do not belong to this port
l4_uint32_t filter = 0;
_vlan_ids.clear();
for (const auto id : ids)
{
assert(vlan_valid_id(id));
filter |= vlan_bloom_hash(id);
_vlan_ids.insert(id);
}
_vlan_id = VLAN_ID_TRUNK;
_vlan_bloom_filter = filter;
}
/**
* This port shall participate in all VLANs.
*/
void set_vlan_trunk_all()
{
_vlan_all = true;
_vlan_id = VLAN_ID_TRUNK;
_vlan_bloom_filter = -1;
}
/**
* Set this port as monitor port.
*
* Ensures that outgoing traffic will have a VLAN tag if the packet belongs
* to a VLAN. Packets coming from native ports will remain untagged.
*/
void set_monitor()
{
_vlan_id = VLAN_ID_TRUNK;
_vlan_bloom_filter = 0;
}
/**
* Match VLAN id.
*
* \param id The VLAN id of the packet or VLAN_ID_NATIVE.
*
* Check whether VLAN \a id is switched on this port. Packets of native ports
* have the special VLAN_ID_NATIVE id.
*/
bool match_vlan(uint16_t id)
{
// Regular case native/access port
if (id == _vlan_id)
return true;
// This port participates in all VLANs
if (_vlan_all)
return true;
// Quick check: does port probably accept this VLAN?
if ((_vlan_bloom_filter & vlan_bloom_hash(id)) == 0)
return false;
return _vlan_ids.find(id) != _vlan_ids.end();
}
/**
* Get MAC address.
*
* Might be Mac_addr::Addr_unknown if this port has no explicit MAC address
* set.
*/
inline Mac_addr mac() const
{ return _mac; }
Virtio_vlan_mangle create_vlan_mangle(Port_iface *src_port) const
{
Virtio_vlan_mangle mangle;
if (is_trunk())
{
/*
* Add a VLAN tag only if the packet does not already have one (by
* coming from another trunk port) or if the packet does not belong to
* any VLAN (by coming from a native port). The latter case is only
* relevant if this is a monitor port. Otherwise traffic from native
* ports is never forwarded to trunk ports.
*/
if (!src_port->is_trunk() && !src_port->is_native())
mangle = Virtio_vlan_mangle::add(src_port->get_vlan());
}
else
/*
* Remove VLAN tag only if the packet actually has one (by coming from a
* trunk port).
*/
if (src_port->is_trunk())
mangle = Virtio_vlan_mangle::remove();
return mangle;
}
virtual void rx_notify_disable_and_remember() = 0;
virtual void rx_notify_emit_and_enable() = 0;
virtual bool is_gone() const = 0;
/** Get one request from the transmission queue */
// std::optional<Net_request> get_tx_request() = 0;
enum class Result
{
Delivered, Exception, Dropped,
};
/**
* Handle a request, i.e. send the request to this port.
*
* \param src_port Port the request is coming from
* \param src Structure describing the current transfer from src_port
* to this port
* \param bytes_transferred Amount of data transferred by a successful transfer
*
* \retval Result::Dropped Request was dropped
* \retval Result::Exception Request triggered an error condition
* while handling the target port queue
* \retval Result::Delivered Request was successfully handled
*
* \throws L4virtio::Svr::Bad_descriptor Exception raised in SRC port queue.
*/
virtual Result handle_request(Port_iface *src_port,
Net_transfer &src,
l4_uint64_t *bytes_transferred) = 0;
void reschedule_pending_tx()
{ _pending_tx_reschedule->trigger(); }
protected:
/*
* VLAN related management information.
*
* A port may either be
* - a native port (_vlan_id == VLAN_ID_NATIVE), or
* - an access port (_vlan_id set accordingly), or
* - a trunk port (_vlan_id == VLAN_ID_TRUNK, _vlan_bloom_filter and
* _vlan_ids populated accordingly, or _vlan_all == true).
*/
l4_uint16_t _vlan_id = VLAN_ID_NATIVE; // VID for native/access port
l4_uint32_t _vlan_bloom_filter = 0; // Bloom filter for trunk ports
std::set<l4_uint16_t> _vlan_ids; // Authoritative list of trunk VLANs
bool _vlan_all; // This port participates in all VLANs (ignoring _vlan_ids)
inline l4_uint32_t vlan_bloom_hash(l4_uint16_t vid)
{ return 1UL << (vid & 31U); }
/**
* Reschedule TX request handling for port that hit its TX burst limit.
*/
L4::Cap<L4::Irq> _pending_tx_reschedule;
Mac_addr _mac = Mac_addr(Mac_addr::Addr_unknown); /**< The MAC address of the port. */
char _name[20]; /**< Debug name */
public:
#ifdef CONFIG_VNS_STATS
inline void stat_inc_tx_num()
{ _stats->tx_num++; }
inline void stat_inc_tx_dropped()
{ _stats->tx_dropped++; }
inline void stat_inc_tx_bytes(l4_uint64_t bytes)
{ _stats->tx_bytes += bytes; }
inline void stat_inc_rx_num()
{ _stats->rx_num++; }
inline void stat_inc_rx_dropped()
{ _stats->rx_dropped++; }
inline void stat_inc_rx_bytes(l4_uint64_t bytes)
{ _stats->rx_bytes += bytes; }
#else
inline void stat_inc_tx_num()
{}
inline void stat_inc_tx_dropped()
{}
inline void stat_inc_tx_bytes(l4_uint64_t /*bytes*/)
{}
inline void stat_inc_rx_num()
{}
inline void stat_inc_rx_dropped()
{}
inline void stat_inc_rx_bytes(l4_uint64_t /*bytes*/)
{}
#endif
};
/**\}*/

View File

@@ -0,0 +1,161 @@
/*
* Copyright (C) 2024 Kernkonzept GmbH.
* Author(s): Georg Kotheimer <georg.kotheimer@kernkonzept.com>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#pragma once
#include "port.h"
#include "request_ixl.h"
#include <l4/ixl/device.h>
#include <l4/ixl/memory.h>
#include <optional>
/**
* \ingroup virtio_net_switch
* \{
*/
class Ixl_port : public Port_iface
{
public:
static constexpr unsigned Tx_batch_size = 32;
static constexpr unsigned Num_bufs = 1024;
static constexpr unsigned Buf_size = 2048;
static constexpr l4_uint64_t Max_mem_size = 1ULL << 28;
Ixl_port(Ixl::Ixl_device *dev)
: Port_iface(dev->get_driver_name().c_str()),
_dev(dev),
_mempool(*_dev, Num_bufs, Buf_size, Max_mem_size)
{
Ixl::mac_address mac_addr = _dev->get_mac_addr();
_mac = Mac_addr(reinterpret_cast<char const *>(mac_addr.addr));
#if CONFIG_VNS_STATS
_mac.to_array(_stats->mac);
#endif
}
// OPTIMIZE: Could use this information for rx batching, i.e. collect while
// rx_notify is disabled, then flush the collected buffers when
// rx_notify is enabled again.
void rx_notify_disable_and_remember() override {}
void rx_notify_emit_and_enable() override {}
bool is_gone() const override { return false; }
/** Check whether there is any work pending on the transmission queue */
bool tx_work_pending()
{
fetch_tx_requests();
return _tx_batch_idx < _tx_batch_len;
}
/** Get one request from the transmission queue */
std::optional<Ixl_net_request> get_tx_request()
{
fetch_tx_requests();
if (_tx_batch_idx < _tx_batch_len)
return std::make_optional<Ixl_net_request>(_tx_batch[_tx_batch_idx++]);
else
return std::nullopt;
}
Result handle_request(Port_iface *src_port, Net_transfer &src,
l4_uint64_t *bytes_transferred) override
{
Virtio_vlan_mangle mangle = create_vlan_mangle(src_port);
Dbg trace(Dbg::Request, Dbg::Trace, "REQ-IXL");
trace.printf("%s: Transfer request %p.\n", _name, src.req_id());
struct Ixl::pkt_buf *buf = _mempool.pkt_buf_alloc();
if (!buf)
{
trace.printf("\tTransfer failed, out-of-memory, dropping.\n");
return Result::Dropped;
}
// NOTE: Currently, the switch does not offer checksum or segmentation
// offloading to its l4virtio clients, so it is fine to simply ignore
// the Virtio_net::Hdr of the request here.
// Copy the request to the pkt_buf.
Buffer dst_buf(reinterpret_cast<char *>(buf->data),
Buf_size - offsetof(Ixl::pkt_buf, data));
unsigned max_size = Buf_size - offsetof(Ixl::pkt_buf, data);
for (;;)
{
try
{
if (src.done())
// Request completely copied to destination.
break;
}
catch (L4virtio::Svr::Bad_descriptor &e)
{
trace.printf("\tTransfer failed, bad descriptor exception, dropping.\n");
// Handle partial transfers to destination port.
Ixl::pkt_buf_free(buf);
throw;
}
if (dst_buf.done())
{
trace.printf(
"\tTransfer failed, exceeds max packet-size, dropping.\n");
Ixl::pkt_buf_free(buf);
return Result::Dropped;
}
auto &src_buf = src.cur_buf();
trace.printf("\tCopying %p#%p:%u (%x) -> %p#%p:%u (%x)\n",
src_port, src_buf.pos, src_buf.left, src_buf.left,
static_cast<Port_iface *>(this),
dst_buf.pos, dst_buf.left, dst_buf.left);
mangle.copy_pkt(dst_buf, src_buf);
}
buf->size = max_size - dst_buf.left;
*bytes_transferred = buf->size;
// Enqueue the pkt_buf at the device.
if (_dev->tx_batch(0, &buf, 1) == 1)
{
trace.printf("\tTransfer queued at device.\n");
return Result::Delivered;
}
else
{
trace.printf("\tTransfer failed, dropping.\n");
Ixl::pkt_buf_free(buf);
return Result::Dropped;
}
}
Ixl::Ixl_device *dev() { return _dev; }
private:
void fetch_tx_requests()
{
if (_tx_batch_idx < _tx_batch_len)
// Previous batch not yet fully processed.
return;
// Batch receive, then cache in member array, to avoid frequent interactions
// with the hardware.
_tx_batch_len = _dev->rx_batch(0, _tx_batch, Tx_batch_size);
_tx_batch_idx = 0;
}
Ixl::Ixl_device *_dev;
Ixl::Mempool _mempool;
Ixl::pkt_buf *_tx_batch[Tx_batch_size];
unsigned _tx_batch_idx = 0;
unsigned _tx_batch_len = 0;
};
/**\}*/

View File

@@ -0,0 +1,321 @@
/*
* Copyright (C) 2016-2017, 2020, 2022-2024 Kernkonzept GmbH.
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
* Alexander Warg <warg@os.inf.tu-dresden.de>
* Georg Kotheimer <georg.kotheimer@kernkonzept.com>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#pragma once
#include "port.h"
#include "request_l4virtio.h"
#include "virtio_net.h"
#include <l4/cxx/pair>
#include <vector>
/**
* \ingroup virtio_net_switch
* \{
*/
/**
* A Port on the Virtio Net Switch
*
* A Port object gets created by `Virtio_factory::op_create()`. This function
* actually only instantiates objects of the types `Switch_port` and
* `Monitor_port`. The created Port registers itself at the switch's server.
* Usually, the IPC call for port creation comes from ned. To finalize the
* setup, the client has to initialize the port during the virtio
* initialization phase. To do this, the client registers a dataspace for
* queues and buffers and provides an IRQ to notify the client on incoming
* network requests.
*/
class L4virtio_port : public Port_iface, public Virtio_net
{
public:
/**
* Create a Virtio net port object
*/
explicit L4virtio_port(unsigned vq_max, unsigned num_ds, char const *name,
l4_uint8_t const *mac)
: Port_iface(name), Virtio_net(vq_max)
{
init_mem_info(num_ds);
Features hf = _dev_config.host_features(0);
if (mac)
{
_mac = Mac_addr((char const *)mac);
memcpy((void *)_dev_config.priv_config()->mac, mac,
sizeof(_dev_config.priv_config()->mac));
hf.mac() = true;
Dbg d(Dbg::Port, Dbg::Info);
d.cprintf("%s: Adding Mac '", _name);
_mac.print(d);
d.cprintf("' to host features to %x\n", hf.raw);
}
_dev_config.host_features(0) = hf.raw;
_dev_config.reset_hdr();
Dbg(Dbg::Port, Dbg::Info)
.printf("%s: Set host features to %x\n", _name,
_dev_config.host_features(0));
#if CONFIG_VNS_STATS
_mac.to_array(_stats->mac);
#endif
}
void rx_notify_disable_and_remember() override
{
kick_disable_and_remember();
}
void rx_notify_emit_and_enable() override
{
kick_emit_and_enable();
}
bool is_gone() const override
{
return obj_cap() && !obj_cap().validate().label();
}
/** Check whether there is any work pending on the transmission queue */
bool tx_work_pending() const
{
return L4_LIKELY(tx_q()->ready()) && tx_q()->desc_avail();
}
/** Get one request from the transmission queue */
std::optional<Virtio_net_request> get_tx_request()
{
return Virtio_net_request::get_request(this, tx_q());
}
/**
* Drop all requests pending in the transmission queue.
*
* This is used for monitor ports, which are not allowed to send packets.
*/
void drop_requests()
{ Virtio_net_request::drop_requests(this, tx_q()); }
Result handle_request(Port_iface *src_port, Net_transfer &src,
l4_uint64_t *bytes_transferred) override
{
Virtio_vlan_mangle mangle = create_vlan_mangle(src_port);
Dbg trace(Dbg::Request, Dbg::Trace, "REQ-VIO");
trace.printf("%s: Transfer request %p.\n", _name, src.req_id());
Buffer dst;
int total = 0;
l4_uint16_t num_merged = 0;
l4_uint64_t total_merged = 0;
typedef cxx::Pair<L4virtio::Svr::Virtqueue::Head_desc, l4_uint32_t> Consumed_entry;
std::vector<Consumed_entry> consumed;
Virtio_net *dst_dev = this;
Virtqueue *dst_queue = rx_q();
L4virtio::Svr::Virtqueue::Head_desc dst_head;
L4virtio::Svr::Request_processor dst_req_proc;
Virtio_net::Hdr *dst_header = nullptr;
for (;;)
{
try
{
if (src.done())
// Request completely copied to destination.
break;
}
catch (L4virtio::Svr::Bad_descriptor &e)
{
trace.printf("\tTransfer failed, bad descriptor exception, dropping.\n");
// Handle partial transfers to destination port.
if (!consumed.empty())
// Partial transfer, rewind to before first descriptor of transfer.
dst_queue->rewind_avail(consumed.at(0).first);
else if (dst_head)
// Partial transfer, still at first _dst_head.
dst_queue->rewind_avail(dst_head);
throw;
}
/* The source data structures are already initialized, the header
is consumed and src stands at the very first real buffer.
Initialize the target data structures if necessary and fill the
header. */
if (!dst_head)
{
if (!dst_queue->ready())
return Result::Dropped;
auto r = dst_queue->next_avail();
if (L4_UNLIKELY(!r))
{
trace.printf("\tTransfer failed, destination queue depleted, dropping.\n");
// Abort incomplete transfer.
if (!consumed.empty())
dst_queue->rewind_avail(consumed.front().first);
return Result::Dropped;
}
try
{
dst_head = dst_req_proc.start(dst_dev->mem_info(), r, &dst);
}
catch (L4virtio::Svr::Bad_descriptor &e)
{
Dbg(Dbg::Request, Dbg::Warn, "REQ")
.printf("%s: bad descriptor exception: %s - %i"
" -- signal device error in destination device %p.\n",
__PRETTY_FUNCTION__, e.message(), e.error, dst_dev);
dst_dev->device_error();
return Result::Exception; // Must not touch the dst queues anymore.
}
if (!dst_header)
{
if (dst.left < sizeof(Virtio_net::Hdr))
throw L4::Runtime_error(-L4_EINVAL,
"Target buffer too small for header");
dst_header = reinterpret_cast<Virtio_net::Hdr *>(dst.pos);
trace.printf("\tCopying header to %p (size: %u)\n",
dst.pos, dst.left);
/*
* Header and csum offloading/general segmentation offloading
*
* We just copy the original header from source to
* destination and have to consider three different
* cases:
* - no flags are set
* - we got a packet that is completely checksummed
* and correctly fragmented, there is nothing to
* do other then copying.
* - virtio_net_hdr_f_needs_csum set
* - the packet is partially checksummed; if we would
* send the packet out on the wire we would have
* to calculate checksums now. But here we rely on
* the ability of our guest to handle partially
* checksummed packets and simply delegate the
* checksum calculation to them.
* - gso_type != gso_none
* - the packet needs to be segmented; if we would
* send it out on the wire we would have to
* segment it now. But again we rely on the
* ability of our guest to handle gso
*
* We currently assume that our guests negotiated
* virtio_net_f_guest_*, this needs to be checked in
* the future.
*
* We also discussed the usage of
* virtio_net_hdr_f_data_valid to remove the need to
* checksum packets at all. But since our clients send
* partially checksummed packets anyway the only
* interesting case would be a packet without
* net_hdr_f_needs_checksum set. In that case we would
* signal that we checked the checksum and the
* checksum is actually correct. Since we do not know
* the origin of the packet (it could have been send
* by an external node and could have been routed to
* u) we can not signal this without actually
* verifying the checksum. Otherwise a packet with an
* invalid checksum could be successfully delivered.
*/
total = sizeof(Virtio_net::Hdr);
src.copy_header(dst_header);
mangle.rewrite_hdr(dst_header);
dst.skip(total);
}
++num_merged;
}
bool has_dst_buffer = !dst.done();
if (!has_dst_buffer)
try
{
// The current dst buffer is full, try to get next chained buffer.
has_dst_buffer = dst_req_proc.next(dst_dev->mem_info(), &dst);
}
catch (L4virtio::Svr::Bad_descriptor &e)
{
Dbg(Dbg::Request, Dbg::Warn, "REQ")
.printf("%s: bad descriptor exception: %s - %i"
" -- signal device error in destination device %p.\n",
__PRETTY_FUNCTION__, e.message(), e.error, dst_dev);
dst_dev->device_error();
return Result::Exception; // Must not touch the dst queues anymore.
}
if (has_dst_buffer)
{
auto &src_buf = src.cur_buf();
trace.printf("\tCopying %p#%p:%u (%x) -> %p#%p:%u (%x)\n",
src_port, src_buf.pos, src_buf.left, src_buf.left,
static_cast<Port_iface *>(this),
dst.pos, dst.left, dst.left);
total += mangle.copy_pkt(dst, src_buf);
}
else if (negotiated_features().mrg_rxbuf())
{
// save descriptor information for later
trace.printf("\tSaving descriptor for later\n");
consumed.push_back(Consumed_entry(dst_head, total));
total_merged += total;
total = 0;
dst_head = L4virtio::Svr::Virtqueue::Head_desc();
}
else
{
trace.printf("\tTransfer failed, destination buffer too small, dropping.\n");
// Abort incomplete transfer.
dst_queue->rewind_avail(dst_head);
return Result::Dropped;
}
}
/*
* Finalize the Request delivery. Call `finish()` on the destination
* port's receive queue, which will result in triggering the destination
* client IRQ.
*/
if (!dst_header)
{
if (!total)
trace.printf("\tTransfer - not started yet, dropping\n");
return Result::Dropped;
}
if (consumed.empty())
{
assert(dst_head);
assert(num_merged == 1);
trace.printf("\tTransfer - Invoke dst_queue->finish()\n");
dst_header->num_buffers = 1;
dst_queue->finish(dst_head, dst_dev, total);
*bytes_transferred = total;
}
else
{
assert(dst_head);
dst_header->num_buffers = num_merged;
consumed.push_back(Consumed_entry(dst_head, total));
trace.printf("\tTransfer - Invoke dst_queue->finish(iter)\n");
*bytes_transferred = total + total_merged;
dst_queue->finish(consumed.begin(), consumed.end(), dst_dev);
}
return Result::Delivered;
}
};
/**\}*/

View File

@@ -0,0 +1,151 @@
/*
* Copyright (C) 2016-2017, 2020, 2022, 2024 Kernkonzept GmbH.
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#pragma once
#include "mac_addr.h"
#include "virtio_net.h"
#include "virtio_net_buffer.h"
#include "vlan.h"
#include <l4/l4virtio/server/virtio>
/**
* \ingroup virtio_net_switch
* \{
*/
/**
* A network request to only a single destination.
*
* A `Net_request` can have multiple destinations (being a broadcast
* request, for example). That is why it is processed by multiple
* `Net_transfer`s, each representing the delivery to a single
* destination port.
*
* `Port_iface::handle_request` uses the `Net_transfer` to move one packet to
* the destination of the request.
*/
class Net_transfer
{
public:
virtual ~Net_transfer() = default;
/**
* Identifier for the underlying `Net_request`, used for logging purposes.
*/
void const *req_id() const { return _req_id; }
/**
* Populate the virtio-net header for the destination.
*/
virtual void copy_header(Virtio_net::Hdr *dst_header) const = 0;
/**
* Buffer containing (a part of) the packet data.
*
* Once emptied, a call to `done()` might replenish the buffer, in case the
* net request consisted of multiple chained buffers.
*/
Buffer &cur_buf() { return _cur_buf; }
/**
* Check whether the transfer has been completed, i.e. the entire packet data
* has been copied.
*
* \retval false There is remaining packet data that needs to be copied.
* \retval true The entire packet data has been copied.
*
* \throws L4virtio::Svr::Bad_descriptor Exception raised in SRC port queue.
*/
virtual bool done() = 0;
protected:
Buffer _cur_buf;
void const *_req_id;
};
class Net_request
{
public:
bool has_vlan() const
{
if (!_pkt.pos || _pkt.left < 14)
return false;
uint8_t *p = reinterpret_cast<uint8_t *>(_pkt.pos);
return p[12] == 0x81U && p[13] == 0x00U;
}
uint16_t vlan_id() const
{
if (!has_vlan() || _pkt.left < 16)
return VLAN_ID_NATIVE;
uint8_t *p = reinterpret_cast<uint8_t *>(_pkt.pos);
return (uint16_t{p[14]} << 8 | p[15]) & 0xfffU;
}
/**
* Get the location and size of the current buffer.
*
* \param[out] size Size of the current buffer.
*
* \return Address of the current buffer.
*
* This function returns the address and size of the currently
* active buffer for this request. The buffer might only be a part
* of the request, which may consist of more than one buffer.
*/
uint8_t const *buffer(size_t *size) const
{
*size = _pkt.left;
return reinterpret_cast<uint8_t const *>(_pkt.pos);
}
void dump_pkt() const
{
Dbg pkt_debug(Dbg::Packet, Dbg::Debug, "PKT");
if (pkt_debug.is_active())
{
//pkt_debug.cprintf("\t");
//src_mac().print(pkt_debug);
//pkt_debug.cprintf(" -> ");
//dst_mac().print(pkt_debug);
//pkt_debug.cprintf("\n");
Dbg pkt_trace(Dbg::Packet, Dbg::Trace, "PKT");
if (pkt_trace.is_active() && _pkt.left >= 14)
{
uint8_t const *packet = reinterpret_cast<uint8_t const *>(_pkt.pos);
pkt_trace.cprintf("\n\tEthertype: ");
uint16_t ether_type = uint16_t{packet[12]} << 8 | packet[13];
char const *protocol;
switch (ether_type)
{
case 0x0800: protocol = "IPv4"; break;
case 0x0806: protocol = "ARP"; break;
case 0x8100: protocol = "Vlan"; break;
case 0x86dd: protocol = "IPv6"; break;
case 0x8863: protocol = "PPPoE Discovery"; break;
case 0x8864: protocol = "PPPoE Session"; break;
default: protocol = nullptr;
}
if (protocol)
pkt_trace.cprintf("%s\n", protocol);
else
pkt_trace.cprintf("%04x\n", ether_type);
}
}
}
protected:
Buffer _pkt;
};
/**\}*/

View File

@@ -0,0 +1,136 @@
/*
* Copyright (C) 2024 Kernkonzept GmbH.
* Author(s): Georg Kotheimer <georg.kotheimer@kernkonzept.com>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#pragma once
#include "port.h"
#include "request.h"
#include <l4/ixl/memory.h>
#include <utility>
/**
* \ingroup virtio_net_switch
* \{
*/
class Ixl_net_request final : public Net_request
{
public:
class Ixl_net_transfer final : public Net_transfer
{
public:
explicit Ixl_net_transfer(Ixl_net_request const &request)
: _request(request)
{
_cur_buf = Buffer(reinterpret_cast<char *>(request.buf()->data),
request.buf()->size);
_req_id = _request.buf();
}
// delete copy constructor and copy assignment operator
Ixl_net_transfer(Ixl_net_transfer const &) = delete;
Ixl_net_transfer &operator = (Ixl_net_transfer const &) = delete;
void copy_header(Virtio_net::Hdr *dst_header) const override
{
dst_header->flags.data_valid() = 0;
dst_header->flags.need_csum() = 0;
dst_header->gso_type = 0; // GSO_NONE
dst_header->hdr_len = sizeof(Virtio_net::Hdr);
dst_header->gso_size = 0;
dst_header->csum_start = 0;
dst_header->csum_offset = 0;
dst_header->num_buffers = 1;
}
bool done() override { return _cur_buf.done(); }
private:
Ixl_net_request const &_request;
};
void dump_request(Port_iface *port) const
{
Dbg debug(Dbg::Request, Dbg::Debug, "REQ-IXL");
if (debug.is_active())
{
debug.printf("%s: Next packet: %p - %x bytes\n",
port->get_name(), _pkt.pos, _pkt.left);
}
dump_pkt();
}
explicit Ixl_net_request(Ixl::pkt_buf *buf) : _buf(buf)
{
_pkt = Buffer(reinterpret_cast<char *>(buf->data), buf->size);
}
// delete copy constructor and copy assignment operator
Ixl_net_request(Ixl_net_request const &) = delete;
Ixl_net_request &operator=(Ixl_net_request const &) = delete;
// define move constructor and copy assignment operator
Ixl_net_request(Ixl_net_request &&other)
: _buf(other._buf)
{
_pkt = std::move(other._pkt);
// Invalidate other.
other._buf = nullptr;
}
Ixl_net_request &operator=(Ixl_net_request &&other)
{
// Invalidate self.
if (_buf != nullptr)
Ixl::pkt_buf_free(_buf);
_buf = other._buf;
_pkt = std::move(other._pkt);
// Invalidate other.
other._buf = nullptr;
return *this;
}
~Ixl_net_request()
{
if (_buf != nullptr)
{
Ixl::pkt_buf_free(_buf);
_buf = nullptr;
}
}
/** Get the Mac address of the destination port. */
Mac_addr dst_mac() const
{
return (_pkt.pos && _pkt.left >= Mac_addr::Addr_length)
? Mac_addr::from_uncached(_pkt.pos)
: Mac_addr(Mac_addr::Addr_unknown);
}
/** Get the Mac address of the source port. */
Mac_addr src_mac() const
{
return (_pkt.pos && _pkt.left >= Mac_addr::Addr_length * 2)
? Mac_addr::from_uncached(_pkt.pos + Mac_addr::Addr_length)
: Mac_addr(Mac_addr::Addr_unknown);
}
Ixl::pkt_buf *buf() const { return _buf; }
Ixl_net_transfer transfer_src() const
{ return Ixl_net_transfer(*this); }
private:
Ixl::pkt_buf *_buf;
};
/**\}*/

View File

@@ -0,0 +1,291 @@
/*
* Copyright (C) 2016-2017, 2020, 2022, 2024 Kernkonzept GmbH.
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
* Georg Kotheimer <georg.kotheimer@kernkonzept.com>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#pragma once
#include "debug.h"
#include "port.h"
#include "request.h"
#include "virtio_net.h"
#include <l4/l4virtio/server/virtio>
#include <l4/util/assert.h>
#include <optional>
#include <utility>
/**
* \ingroup virtio_net_switch
* \{
*/
/**
* Abstraction for a network request
*
* A `Virtio_net_request` is constructed by the source port, using the static
* function `get_request()` as part of `Port_iface::get_tx_request()`.
*
* On destruction, `finish()` will be called, which, will trigger the client
* IRQ of the source client.
*/
class Virtio_net_request final : public Net_request
{
public:
class Virtio_net_transfer final : public Net_transfer
{
public:
explicit Virtio_net_transfer(Virtio_net_request const &request)
: _request(request),
// We already looked at the very first buffer to find the target of the
// packet. The request processor of the "parent request" contains the
// current state of the transaction up to this point. Since there might be
// more then one target for the request we have to keep track of our own
// state and need our own request processor instance, which will be
// initialized using the current state of the "parent request".
_req_proc(_request.get_request_processor())
{
// The buffer descriptors used for this transaction and the amount of bytes
// copied to the current target descriptor.
_cur_buf = request.first_buffer();
_req_id = _request.header();
}
// delete copy constructor and copy assignment operator
Virtio_net_transfer(Virtio_net_transfer const &) = delete;
Virtio_net_transfer &operator = (Virtio_net_transfer const &) = delete;
void copy_header(Virtio_net::Hdr *dst_header) const override
{
memcpy(dst_header, _request.header(), sizeof(Virtio_net::Hdr));
}
bool done() override
{
return _cur_buf.done() && !_req_proc.next(_request.dev()->mem_info(), &_cur_buf);
}
private:
Virtio_net_request const &_request;
L4virtio::Svr::Request_processor _req_proc;
};
void dump_request(Port_iface *port) const
{
Dbg debug(Dbg::Request, Dbg::Debug, "REQ-VIO");
if (debug.is_active())
{
debug.printf("%s: Next packet: %p:%p - %x bytes\n",
port->get_name(), _header, _pkt.pos, _pkt.left);
if (_header->flags.raw || _header->gso_type)
{
debug.cprintf("flags:\t%x\n\t"
"gso_type:\t%x\n\t"
"header len:\t%x\n\t"
"gso size:\t%x\n\t"
"csum start:\t%x\n\t"
"csum offset:\t%x\n"
"\tnum buffer:\t%x\n",
_header->flags.raw,
_header->gso_type, _header->hdr_len,
_header->gso_size,
_header->csum_start, _header->csum_offset,
_header->num_buffers);
}
}
dump_pkt();
}
// delete copy constructor and copy assignment operator
Virtio_net_request(Virtio_net_request const &) = delete;
Virtio_net_request &operator = (Virtio_net_request const &) = delete;
// define move constructor and copy assignment operator
Virtio_net_request(Virtio_net_request &&other)
: _dev(other._dev),
_queue(other._queue),
_head(std::move(other._head)),
_req_proc(std::move(other._req_proc)),
_header(other._header)
{
_pkt = std::move(other._pkt);
// Invalidate other.
other._queue = nullptr;
}
Virtio_net_request &operator = (Virtio_net_request &&other)
{
// Invalidate self.
finish();
_dev = other._dev;
_queue = other._queue;
_head = std::move(other._head);
_req_proc = std::move(other._req_proc);
_header = other._header;
_pkt = std::move(other._pkt);
// Invalidate other.
other._queue = nullptr;
return *this;
}
Virtio_net_request(Virtio_net *dev, L4virtio::Svr::Virtqueue *queue,
L4virtio::Svr::Virtqueue::Request const &req)
: _dev(dev), _queue(queue)
{
_head = _req_proc.start(_dev->mem_info(), req, &_pkt);
_header = (Virtio_net::Hdr *)_pkt.pos;
l4_uint32_t skipped = _pkt.skip(sizeof(Virtio_net::Hdr));
if (L4_UNLIKELY( (skipped != sizeof(Virtio_net::Hdr))
|| (_pkt.done() && !_next_buffer(&_pkt))))
{
_header = 0;
Dbg(Dbg::Queue, Dbg::Warn).printf("Invalid request\n");
return;
}
}
~Virtio_net_request()
{ finish(); }
bool valid() const
{ return _header != 0; }
/**
* Drop all requests of a specific queue.
*
* This function is used for example to drop all requests in the transmission
* queue of a monitor port, since monitor ports are not allowed to transmit
* data.
*
* \param dev Port of the provided virtqueue.
* \param queue Virtqueue to drop all requests of.
*/
static void drop_requests(Virtio_net *dev,
L4virtio::Svr::Virtqueue *queue)
{
if (L4_UNLIKELY(!queue->ready()))
return;
if (queue->desc_avail())
Dbg(Dbg::Request, Dbg::Debug)
.printf("Dropping incoming packets on monitor port\n");
L4virtio::Svr::Request_processor req_proc;
Buffer pkt;
while (auto req = queue->next_avail())
{
auto head = req_proc.start(dev->mem_info(), req, &pkt);
queue->finish(head, dev, 0);
}
}
/**
* Construct a request from the next entry of a provided queue.
*
* \param dev Port of the provided virtqueue.
* \param queue Virtqueue to extract next entry from.
*/
static std::optional<Virtio_net_request>
get_request(Virtio_net *dev, L4virtio::Svr::Virtqueue *queue)
{
if (L4_UNLIKELY(!queue->ready()))
return std::nullopt;
if (auto r = queue->next_avail())
{
// Virtio_net_request keeps "a lot of internal state",
// therefore we create the object before creating the
// state.
// We might check later on whether it is possible to
// save the state when we actually have to because a
// transfer is blocking on a port.
auto request = Virtio_net_request(dev, queue, r);
if (request.valid())
return request;
}
return std::nullopt;
}
Buffer const &first_buffer() const
{ return _pkt; }
Virtio_net::Hdr const *header() const
{ return _header; }
L4virtio::Svr::Request_processor const &get_request_processor() const
{ return _req_proc; }
Virtio_net const *dev() const
{ return _dev; }
Virtio_net_transfer transfer_src() const
{ return Virtio_net_transfer(*this); }
/** Get the Mac address of the destination port. */
Mac_addr dst_mac() const
{
return (_pkt.pos && _pkt.left >= Mac_addr::Addr_length)
? Mac_addr(_pkt.pos)
: Mac_addr(Mac_addr::Addr_unknown);
}
/** Get the Mac address of the source port. */
Mac_addr src_mac() const
{
return (_pkt.pos && _pkt.left >= Mac_addr::Addr_length * 2)
? Mac_addr(_pkt.pos + Mac_addr::Addr_length)
: Mac_addr(Mac_addr::Addr_unknown);
}
private:
/* needed for Virtqueue::finish() */
/** Source Port */
Virtio_net *_dev;
/** transmission queue of the source port */
L4virtio::Svr::Virtqueue *_queue;
L4virtio::Svr::Virtqueue::Head_desc _head;
/* the actual request processor, encapsulates the decoding of the request */
L4virtio::Svr::Request_processor _req_proc;
/* A request to the virtio net layer consists of one or more buffers
containing the Virtio_net::Hdr and the actual packet. To make a
switching decision we need to be able to look at the packet while
still being able access the Virtio_net::Hdr for the actual copy
operation. Therefore we keep track of two locations, the header
location and the start of the packet (which might be in a
different buffer) */
Virtio_net::Hdr *_header;
bool _next_buffer(Buffer *buf)
{ return _req_proc.next(_dev->mem_info(), buf); }
/**
* Finalize request
*
* This function calls `finish()` on the source port's transmission queue,
* which will result in triggering the source client IRQ.
*/
void finish()
{
if (_queue == nullptr || !_queue->ready())
return;
Dbg(Dbg::Virtio, Dbg::Trace).printf("%s(%p)\n", __PRETTY_FUNCTION__, this);
_queue->finish(_head, _dev, 0);
_queue = nullptr;
}
};
/**\}*/

View File

@@ -0,0 +1,79 @@
#include <l4/re/env>
#include <l4/re/dataspace>
#include <l4/re/error_helper>
#include <l4/re/util/cap_alloc>
#include <l4/virtio-net-switch/stats.h>
class Switch_statistics
{
private:
L4Re::Util::Ref_cap<L4Re::Dataspace>::Cap _ds;
Virtio_net_switch::Statistics *_stats;
bool _initialized = false;
Switch_statistics() {}
~Switch_statistics()
{
if (_initialized)
L4Re::Env::env()->rm()->detach(reinterpret_cast<l4_addr_t>(_stats), 0);
}
l4_size_t _size;
public:
Virtio_net_switch::Statistics *stats()
{
if (_initialized)
return _stats;
else
throw L4::Runtime_error(-L4_EAGAIN, "Statistics not set up.");
}
static Switch_statistics& get_instance()
{
static Switch_statistics instance;
return instance;
}
void initialize(l4_uint64_t num_max_ports)
{
_size = l4_round_page(sizeof(Virtio_net_switch::Statistics)
+ sizeof(Virtio_net_switch::Port_statistics) * num_max_ports);
void *addr = malloc(_size);
if (!addr)
throw L4::Runtime_error(-L4_ENOMEM,
"Could not allocate statistics memory.");
memset(addr, 0, _size);
_stats = reinterpret_cast<Virtio_net_switch::Statistics *>(addr);
_initialized = true;
_stats->max_ports = num_max_ports;
}
Virtio_net_switch::Port_statistics *
allocate_port_statistics(char const* name)
{
for (unsigned i = 0; i < _stats->max_ports; ++i)
{
if (!_stats->port_stats[i].in_use)
{
memset(reinterpret_cast<void*>(&_stats->port_stats[i]), 0,
sizeof(Virtio_net_switch::Port_statistics));
_stats->port_stats[i].in_use = 1;
size_t len = std::min(strlen(name), sizeof(_stats->port_stats[i].name) - 1);
memcpy(_stats->port_stats[i].name, name, len);
_stats->port_stats[i].name[len] = '\0';
_stats->age++;
return &_stats->port_stats[i];
}
}
return nullptr;
}
inline l4_size_t size()
{ return _size; }
Switch_statistics(Switch_statistics const&) = delete;
void operator=(Switch_statistics const &) = delete;
};

View File

@@ -0,0 +1,281 @@
/*
* Copyright (C) 2016-2018, 2020, 2023-2024 Kernkonzept GmbH.
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
* Alexander Warg <warg@os.inf.tu-dresden.de>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#include "debug.h"
#include "switch.h"
#include "filter.h"
Virtio_switch::Virtio_switch(unsigned max_ports)
: _ports{new Port_iface *[max_ports]()},
_max_ports{max_ports}
{
}
int
Virtio_switch::lookup_free_slot()
{
for (unsigned idx = 0; idx < _max_ports; ++idx)
if (!_ports[idx])
return idx;
return -1;
}
bool
Virtio_switch::add_port(Port_iface *port)
{
if (!port->mac().is_unknown())
for (unsigned idx = 0; idx < _max_ports; ++idx)
if (_ports[idx] && _ports[idx]->mac() == port->mac())
{
Dbg(Dbg::Port, Dbg::Warn)
.printf("Rejecting port '%s'. MAC address already in use.\n",
port->get_name());
return false;
}
int idx = lookup_free_slot();
if (idx < 0)
return false;
unsigned uidx = static_cast<unsigned>(idx);
_ports[uidx] = port;
if (_max_used == uidx)
++_max_used;
return true;
}
bool
Virtio_switch::add_monitor_port(Port_iface *port)
{
if (!_monitor)
{
_monitor = port;
return true;
}
Dbg(Dbg::Port, Dbg::Warn).printf("'%s' already defined as monitor port,"
" rejecting monitor port '%s'\n",
_monitor->get_name(), port->get_name());
return false;
}
void
Virtio_switch::check_ports()
{
for (unsigned idx = 0; idx < _max_used; ++idx)
{
Port_iface *port = _ports[idx];
if (port && port->is_gone())
{
Dbg(Dbg::Port, Dbg::Info)
.printf("Client on port %p has gone. Deleting...\n", port);
_ports[idx] = nullptr;
if (idx == _max_used-1)
--_max_used;
_mac_table.flush(port);
delete(port);
}
}
if (_monitor && _monitor->is_gone())
{
delete(_monitor);
_monitor = nullptr;
}
}
template<typename REQ>
void
Virtio_switch::handle_tx_request(Port_iface *port, REQ const &request)
{
// Trunk ports are required to have a VLAN tag and only accept packets that
// belong to a configured VLAN.
if (port->is_trunk() && !port->match_vlan(request.vlan_id()))
{
// Drop packet.
port->stat_inc_tx_dropped();
return;
}
// Access ports must not be VLAN tagged to prevent double tagging attacks.
if (port->is_access() && request.has_vlan())
{
// Drop packet.
port->stat_inc_tx_dropped();
return;
}
auto handle_request = [](Port_iface *dst_port, Port_iface *src_port,
REQ const &req)
{
auto transfer_src = req.transfer_src();
l4_uint64_t bytes;
auto res = dst_port->handle_request(src_port, transfer_src, &bytes);
switch (res)
{
case Port_iface::Result::Delivered:
dst_port->stat_inc_tx_num();
dst_port->stat_inc_tx_bytes(bytes);
src_port->stat_inc_rx_num();
src_port->stat_inc_rx_bytes(bytes);
break;
case Port_iface::Result::Dropped:
[[fallthrough]];
case Port_iface::Result::Exception:
[[fallthrough]];
default:
dst_port->stat_inc_tx_dropped();
break;
}
};
Mac_addr src = request.src_mac();
auto dst = request.dst_mac();
bool is_broadcast = dst.is_broadcast();
uint16_t vlan = request.has_vlan() ? request.vlan_id() : port->get_vlan();
_mac_table.learn(src, port, vlan);
if (L4_LIKELY(!is_broadcast))
{
auto *target = _mac_table.lookup(dst, vlan);
if (target)
{
// Do not send packets to the port they came in; they might
// be sent to us by another switch which does not know how
// to reach the target.
if (target != port)
{
handle_request(target, port, request);
if (_monitor && !filter_request(request))
handle_request(_monitor, port, request);
}
return;
}
}
// It is either a broadcast or an unknown destination - send to all
// known ports except the source port
for (unsigned idx = 0; idx < _max_used && _ports[idx]; ++idx)
{
auto *target = _ports[idx];
if (target != port && target->match_vlan(vlan))
handle_request(target, port, request);
}
// Send a copy to the monitor port
if (_monitor && !filter_request(request))
handle_request(_monitor, port, request);
}
template<typename PORT>
void
Virtio_switch::handle_tx_requests(PORT *port, unsigned &num_reqs_handled)
{
while (auto req = port->get_tx_request())
{
req->dump_request(port);
handle_tx_request(port, *req);
if (++num_reqs_handled >= Tx_burst)
// Port has hit its TX burst limit.
break;
}
}
bool
Virtio_switch::handle_l4virtio_port_tx(L4virtio_port *port)
{
/* handle IRQ on one port for the time being */
if (!port->tx_work_pending())
Dbg(Dbg::Port, Dbg::Debug)
.printf("%s: Irq without pending work\n", port->get_name());
unsigned num_reqs_handled = 0;
do
{
port->tx_q()->disable_notify();
port->rx_q()->disable_notify();
if (num_reqs_handled >= Tx_burst)
{
Dbg(Dbg::Port, Dbg::Debug)
.printf(
"%s: Tx burst limit hit, reschedule remaining Tx work.\n",
port->get_name());
// Port has hit its TX burst limit, so for fairness reasons, stop
// processing TX work from this port, and instead reschedule the
// pending work for later.
port->reschedule_pending_tx();
// NOTE: Notifications for this port remain disabled, until eventually
// the reschedule handler calls `handle_l4virtio_port_tx` again.
return false;
}
// Within the loop, to trigger before enabling notifications again.
all_rx_notify_disable_and_remember();
try
{
// throws Bad_descriptor exceptions raised on SRC port
handle_tx_requests(port, num_reqs_handled);
}
catch (L4virtio::Svr::Bad_descriptor &e)
{
Dbg(Dbg::Port, Dbg::Warn, "REQ")
.printf("%s: caught bad descriptor exception: %s - %i"
" -- Signal device error on device %p.\n",
__PRETTY_FUNCTION__, e.message(), e.error, port);
port->device_error();
all_rx_notify_emit_and_enable();
return false;
}
all_rx_notify_emit_and_enable();
port->tx_q()->enable_notify();
port->rx_q()->enable_notify();
L4virtio::wmb();
L4virtio::rmb();
}
while (port->tx_work_pending());
return true;
}
#if CONFIG_VNS_IXL
bool
Virtio_switch::handle_ixl_port_tx(Ixl_port *port)
{
unsigned num_reqs_handled = 0;
all_rx_notify_disable_and_remember();
handle_tx_requests(port, num_reqs_handled);
all_rx_notify_emit_and_enable();
if (num_reqs_handled >= Tx_burst && port->tx_work_pending())
{
Dbg(Dbg::Port, Dbg::Info)
.printf("%s: Tx burst limit hit, reschedule remaining Tx work.\n",
port->get_name());
// Port has hit its TX burst limit, so for fairness reasons, stop
// processing TX work from this port, and instead reschedule the
// pending work for later.
port->reschedule_pending_tx();
return false;
}
return true;
}
#endif

View File

@@ -0,0 +1,156 @@
/*
* Copyright (C) 2016-2017, 2020, 2022-2024 Kernkonzept GmbH.
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
* Alexander Warg <warg@os.inf.tu-dresden.de>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#pragma once
#include "port.h"
#include "port_l4virtio.h"
#include "mac_table.h"
#if CONFIG_VNS_IXL
#include "port_ixl.h"
#endif
/**
* \ingroup virtio_net_switch
* \{
*/
/**
* The Virtio switch contains all ports and processes network requests.
*
* A Port on its own is not capable to process an incoming network request
* because it has no knowledge about other ports. The processing of an incoming
* request therefore gets delegated to the switch.
*
* The `Virtio_switch` is constructed at the start of the Virtio Net Switch
* application. The factory saves a reference to it to pass it to the
* `Kick_irq` on port creation.
*/
class Virtio_switch
{
private:
Port_iface **_ports; /**< Array of ports. */
Port_iface *_monitor = nullptr; /**< The monitor port if there is one. */
unsigned _max_ports;
unsigned _max_used = 0;
Mac_table<> _mac_table;
// Limits the number of consecutive TX requests a port can process before
// being interrupted to ensure fairness to other ports.
static constexpr unsigned Tx_burst = 128;
int lookup_free_slot();
/**
* Deliver a request from a specific port.
*
* In case the MAC address of the destination port of a request is not yet
* present in the `_mac_table` or if the request is a broadcast request, the
* request is passed to all ports in the same VLAN.
*
* \param port Port whose transmission queue should be processed.
*/
template<typename REQ>
void handle_tx_request(Port_iface *port, REQ const &request);
template<typename PORT>
void handle_tx_requests(PORT *port, unsigned &num_reqs_handled);
void all_rx_notify_emit_and_enable()
{
for (unsigned idx = 0; idx < _max_ports; ++idx)
if (_ports[idx])
_ports[idx]->rx_notify_emit_and_enable();
}
void all_rx_notify_disable_and_remember()
{
for (unsigned idx = 0; idx < _max_ports; ++idx)
if (_ports[idx])
_ports[idx]->rx_notify_disable_and_remember();
}
public:
/**
* Create a switch with n ports.
*
* \param max_ports maximal number of provided ports
*/
explicit Virtio_switch(unsigned max_ports);
/**
* Add a port to the switch.
*
* \param port A pointer to an already constructed Port_iface object.
*
* \retval true Port was added successfully.
* \retval false Switch was not able to add the port.
*/
bool add_port(Port_iface *port);
/**
* Add a monitor port to the switch.
*
* \param port A pointer to an already constructed Port_iface object.
*
* \retval true Port was added successfully.
* \retval false Switch was not able to add the port.
*/
bool add_monitor_port(Port_iface *port);
/**
* Check validity of ports.
*
* Check whether all ports are still used and remove any unused
* (unreferenced) ports. Shall be invoked after an incoming cap
* deletion irq to remove ports without clients.
*/
void check_ports();
/**
* Handle TX queue of the given port.
*
* \param port L4virtio_port to handle pending TX work for.
*
* \retval false Port hit its TX burst limit, and thus a TX pending
* reschedule notification was queued.
* \retval true Port's entire TX queue was processed.
*/
bool handle_l4virtio_port_tx(L4virtio_port *port);
#if CONFIG_VNS_IXL
/**
* Handle TX queue of the given port.
*
* \param port Ixl_port to handle pending TX work for.
*
* \retval false Port hit its TX burst limit, and thus a TX pending
* reschedule notification was queued.
* \retval true Port's entire TX queue was processed.
*/
bool handle_ixl_port_tx(Ixl_port *port);
#endif
/**
* Is there still a free port on this switch available?
*
* \param monitor True if we look for a monitor slot.
*
* \retval >=0 The next available port index.
* \retval -1 No port available.
*/
int port_available(bool monitor)
{
if (monitor)
return !_monitor ? 0 : -1;
return lookup_free_slot();
}
};
/**\}*/

View File

@@ -0,0 +1,325 @@
/*
* Copyright (C) 2016-2017, 2019, 2022-2024 Kernkonzept GmbH.
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
* Alexander Warg <warg@os.inf.tu-dresden.de>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#pragma once
#include <l4/re/dataspace>
#include <l4/re/util/unique_cap>
#include <l4/sys/cxx/ipc_epiface>
#include <l4/l4virtio/server/virtio>
#include <l4/l4virtio/server/l4virtio>
#include <l4/l4virtio/l4virtio>
#include "debug.h"
/**
* \ingroup virtio_net_switch
* \{
*/
class Virtqueue : public L4virtio::Svr::Virtqueue
{
public:
bool kick_queue()
{
if (no_notify_guest())
return false;
if (_do_kick)
return true;
_kick_pending = true;
return false;
}
bool kick_enable_get_pending()
{
_do_kick = true;
return _kick_pending;
}
void kick_disable_and_remember()
{
_do_kick = false;
_kick_pending = false;
}
private:
bool _do_kick = true;
bool _kick_pending = false;
};
/**
* The Base class of a Port.
*
* This class provides the Virtio network protocol specific implementation
* aspects of a port.
*
* `Virtio_net` comprises the virtqueues for both, the incoming and the
* outgoing network requests:
*
* - The transmission queue, containing requests to be transmitted to other
* ports. The transmission queue is filled by the client, this port relates
* to.
* - The receive queue, containing requests that have been transmitted from
* other ports. The receive queue is filled by the switch.
*/
class Virtio_net :
public L4virtio::Svr::Device,
public L4::Epiface_t<Virtio_net, L4virtio::Device>
{
public:
struct Hdr_flags
{
l4_uint8_t raw;
CXX_BITFIELD_MEMBER( 0, 0, need_csum, raw);
CXX_BITFIELD_MEMBER( 1, 1, data_valid, raw);
};
struct Hdr
{
Hdr_flags flags;
l4_uint8_t gso_type;
l4_uint16_t hdr_len;
l4_uint16_t gso_size;
l4_uint16_t csum_start;
l4_uint16_t csum_offset;
l4_uint16_t num_buffers;
};
struct Features : L4virtio::Svr::Dev_config::Features
{
Features() = default;
Features(l4_uint32_t raw) : L4virtio::Svr::Dev_config::Features(raw) {}
CXX_BITFIELD_MEMBER( 0, 0, csum, raw); // host handles partial csum
CXX_BITFIELD_MEMBER( 1, 1, guest_csum, raw); // guest handles partial csum
CXX_BITFIELD_MEMBER( 5, 5, mac, raw); // host has given mac
CXX_BITFIELD_MEMBER( 6, 6, gso, raw); // host handles packets /w any GSO
CXX_BITFIELD_MEMBER( 7, 7, guest_tso4, raw); // guest handles TSOv4 in
CXX_BITFIELD_MEMBER( 8, 8, guest_tso6, raw); // guest handles TSOv6 in
CXX_BITFIELD_MEMBER( 9, 9, guest_ecn, raw); // guest handles TSO[6] with ECN in
CXX_BITFIELD_MEMBER(10, 10, guest_ufo, raw); // guest handles UFO in
CXX_BITFIELD_MEMBER(11, 11, host_tso4, raw); // host handles TSOv4 in
CXX_BITFIELD_MEMBER(12, 12, host_tso6, raw); // host handles TSOv6 in
CXX_BITFIELD_MEMBER(13, 13, host_ecn, raw); // host handles TSO[6] with ECN in
CXX_BITFIELD_MEMBER(14, 14, host_ufo, raw); // host handles UFO
CXX_BITFIELD_MEMBER(15, 15, mrg_rxbuf, raw); // host can merge receive buffers
CXX_BITFIELD_MEMBER(16, 16, status, raw); // virtio_net_config.status available
CXX_BITFIELD_MEMBER(17, 17, ctrl_vq, raw); // Control channel available
CXX_BITFIELD_MEMBER(18, 18, ctrl_rx, raw); // Control channel RX mode support
CXX_BITFIELD_MEMBER(19, 19, ctrl_vlan, raw); // Control channel VLAN filtering
CXX_BITFIELD_MEMBER(20, 20, ctrl_rx_extra, raw); // Extra RX mode control support
CXX_BITFIELD_MEMBER(21, 21, guest_announce, raw); // Guest can announce device on the network
CXX_BITFIELD_MEMBER(22, 22, mq, raw); // Device supports Receive Flow Steering
CXX_BITFIELD_MEMBER(23, 23, ctrl_mac_addr, raw); // Set MAC address
};
enum
{
Rx = 0,
Tx = 1,
};
struct Net_config_space
{
// The config defining mac address (if VIRTIO_NET_F_MAC aka Features::mac)
l4_uint8_t mac[6];
// currently not used ...
l4_uint16_t status;
l4_uint16_t max_virtqueue_pairs;
};
L4virtio::Svr::Dev_config_t<Net_config_space> _dev_config;
explicit Virtio_net(unsigned vq_max)
: L4virtio::Svr::Device(&_dev_config),
_dev_config(L4VIRTIO_VENDOR_KK, L4VIRTIO_ID_NET, 2),
_vq_max(vq_max)
{
Features hf(0);
hf.ring_indirect_desc() = true;
hf.mrg_rxbuf() = true;
#if 0
// disable currently unsupported options, but leave them in for
// documentation purposes
hf.csum() = true;
hf.host_tso4() = true;
hf.host_tso6() = true;
hf.host_ufo() = true;
hf.host_ecn() = true;
hf.guest_csum() = true;
hf.guest_tso4() = true;
hf.guest_tso6() = true;
hf.guest_ufo() = true;
hf.guest_ecn() = true;
#endif
_dev_config.host_features(0) = hf.raw;
_dev_config.set_host_feature(L4VIRTIO_FEATURE_VERSION_1);
_dev_config.reset_hdr();
reset_queue_config(Rx, vq_max);
reset_queue_config(Tx, vq_max);
}
void reset() override
{
for (L4virtio::Svr::Virtqueue &q: _q)
q.disable();
reset_queue_config(Rx, _vq_max);
reset_queue_config(Tx, _vq_max);
_dev_config.reset_hdr();
}
template<typename T, unsigned N >
static unsigned array_length(T (&)[N]) { return N; }
int reconfig_queue(unsigned index) override
{
Dbg(Dbg::Virtio, Dbg::Info, "Virtio")
.printf("(%p): Reconfigure queue %d (%p): Status: %02x\n",
this, index, _q + index, _dev_config.status().raw);
if (index >= array_length(_q))
return -L4_ERANGE;
if (setup_queue(_q + index, index, _vq_max))
return 0;
return -L4_EINVAL;
}
void dump_features(Dbg const &dbg, const volatile l4_uint32_t *p)
{
dbg.cprintf("%08x:%08x:%08x:%08x:%08x:%08x:%08x:%08x\n",
p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[17]);
}
void dump_features()
{
Dbg info(Dbg::Virtio, Dbg::Info, "Virtio");
if (!info.is_active())
return;
auto *hdr = _dev_config.hdr();
info.printf("Device %p running (%02x)\n\thost features: ",
this, _dev_config.status().raw);
dump_features(info, hdr->dev_features_map);
info.printf("\tguest features: ");
dump_features(info, hdr->driver_features_map);
}
bool check_features() override
{
_negotiated_features = _dev_config.negotiated_features(0);
return true;
}
bool device_needs_reset() const
{ return _dev_config.status().device_needs_reset(); }
/** Check whether both virtqueues are ready. */
bool check_queues() override
{
for (L4virtio::Svr::Virtqueue &q: _q)
if (!q.ready())
{
reset();
Err().printf("failed to start queues\n");
return false;
}
dump_features();
return true;
}
Server_iface *server_iface() const override
{ return L4::Epiface::server_iface(); }
/**
* Save the `_kick_guest_irq` that the client sent via
* `device_notification_irq()`.
*/
void register_single_driver_irq() override
{
_kick_guest_irq = L4Re::Util::Unique_cap<L4::Irq>(
L4Re::chkcap(server_iface()->template rcv_cap<L4::Irq>(0)));
L4Re::chksys(server_iface()->realloc_rcv_cap(0));
}
void trigger_driver_config_irq() override
{
_dev_config.add_irq_status(L4VIRTIO_IRQ_STATUS_CONFIG);
_kick_guest_irq->trigger();
}
/**
* Trigger the `_kick_guest_irq` IRQ.
*
* This function gets called on the receiving port, when a request was
* successfully transmitted by the switch.
*/
void notify_queue(L4virtio::Svr::Virtqueue *queue)
{
// Downcast to Virtqueue to access kick_queue() - we know that our
// queues have the type Virtqueue.
Virtqueue *q = static_cast<Virtqueue*>(queue);
if (q->kick_queue())
{
_dev_config.add_irq_status(L4VIRTIO_IRQ_STATUS_VRING);
_kick_guest_irq->trigger();
}
}
void kick_emit_and_enable()
{
bool kick_pending = false;
for (auto &q : _q)
kick_pending |= q.kick_enable_get_pending();
if (kick_pending)
{
_dev_config.add_irq_status(L4VIRTIO_IRQ_STATUS_VRING);
_kick_guest_irq->trigger();
}
}
void kick_disable_and_remember()
{
for (auto &q : _q)
q.kick_disable_and_remember();
}
Features negotiated_features() const
{ return _negotiated_features; }
/** Getter for the transmission queue. */
Virtqueue *tx_q() { return &_q[Tx]; }
/** Getter for the receive queue. */
Virtqueue *rx_q() { return &_q[Rx]; }
/** Getter for the transmission queue. */
Virtqueue const *tx_q() const { return &_q[Tx]; }
/** Getter for the receive queue. */
Virtqueue const *rx_q() const { return &_q[Rx]; }
private:
Features _negotiated_features;
/** Maximum number of entries in a virtqueue that is used by the port */
unsigned _vq_max;
/** the two used virtqueues */
Virtqueue _q[2];
/**
* The IRQ used to notify the associated client that a new network request
* has been received and is present in the receive queue.
*/
L4Re::Util::Unique_cap<L4::Irq> _kick_guest_irq;
};
/**\}*/

View File

@@ -0,0 +1,40 @@
/*
* Copyright (C) 2016-2017, 2022, 2024 Kernkonzept GmbH.
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
* Alexander Warg <warg@os.inf.tu-dresden.de>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#pragma once
#include <l4/l4virtio/server/l4virtio>
/**
* \ingroup virtio_net_switch
* \{
*/
/**
* Data buffer used to transfer packets.
*/
struct Buffer : L4virtio::Svr::Data_buffer
{
Buffer() = default;
Buffer(L4virtio::Svr::Driver_mem_region const *r,
L4virtio::Svr::Virtqueue::Desc const &d,
L4virtio::Svr::Request_processor const *)
{
pos = static_cast<char *>(r->local(d.addr));
left = d.len;
}
Buffer(char *data, l4_uint32_t size)
{
pos = data;
left = size;
}
template<typename T>
explicit Buffer(T *p) : Data_buffer(p) {};
};
/**\}*/

View File

@@ -0,0 +1,154 @@
/*
* Copyright (C) 2020, 2022-2024 Kernkonzept GmbH.
* Author(s): Jan Klötzke <jan.kloetzke@kernkonzept.com>
*
* License: see LICENSE.spdx (in this directory or the directories above)
*/
#pragma once
#include <l4/cxx/minmax>
#include <l4/l4virtio/server/virtio>
#include <l4/sys/types.h>
#include <string.h>
#include "virtio_net.h"
#include "virtio_net_buffer.h"
namespace {
const l4_uint16_t VLAN_ID_NATIVE = 0xffffU; ///< Pseudo ID for native ports
const l4_uint16_t VLAN_ID_TRUNK = 0xfffeU; ///< Pseudo ID for trunk ports
inline bool vlan_valid_id(l4_uint16_t id)
{
return id > 0U && id < 0xfffU;
}
}
/**
* \ingroup virtio_net_switch
* \{
*/
/**
* Class for VLAN packet rewriting.
*/
class Virtio_vlan_mangle
{
l4_uint16_t _tci;
l4_uint8_t _mac_remaining;
l4_int8_t _tag_remaining;
constexpr Virtio_vlan_mangle(l4_uint16_t tci, l4_int8_t tag_remaining)
: _tci{tci}, _mac_remaining{12}, _tag_remaining{tag_remaining}
{}
public:
/**
* Default constructor.
*
* The packet is not touched in any way.
*/
Virtio_vlan_mangle()
: _tci{0}, _mac_remaining{0}, _tag_remaining{0}
{}
/**
* Construct an object that adds a VLAN tag.
*
* \param tci The TCI field of the VLAN tag to add.
*
* It is the callers responsibility to ensure that the packet is not already
* tagged.
*/
static constexpr Virtio_vlan_mangle add(l4_uint16_t tci)
{
return Virtio_vlan_mangle(tci, 4);
}
/**
* Construct an object that removes the VLAN tag.
*
* This object assumes that the Ethernet packet has a VLAN tag and will
* slavishly remove the necessary bytes from the packet.
*/
static constexpr Virtio_vlan_mangle remove()
{
return Virtio_vlan_mangle(0xffffU, -4);
}
/**
* Copy packet from \a src to \a dst.
*
* \param src Source packet buffer
* \param dst Destination packet buffer
* \return The number of bytes copied
*
* Copy the data from \a src to \a dst, possibly rewriting parts of the
* packet. The method is expected to be called repeatedly until the source
* packet is finished. Partial copies are allowed (including reading nothing
* from the source buffer) as long as progress is made, i.e. repeatedly
* calling this function eventually consumes the source buffer.
*/
l4_uint32_t copy_pkt(Buffer &dst, Buffer &src)
{
l4_uint32_t ret;
if (L4_LIKELY(_tci == 0))
{
// pass through (no tag or keep tag)
ret = src.copy_to(&dst);
}
else if (_mac_remaining)
{
// copy initial MAC addresses
ret = src.copy_to(&dst, _mac_remaining);
_mac_remaining -= ret;
}
else if (_tag_remaining > 0)
{
// add VLAN tag
l4_uint8_t tag[4] = {
0x81, 0x00,
static_cast<l4_uint8_t>(_tci >> 8),
static_cast<l4_uint8_t>(_tci & 0xffU)
};
ret = cxx::min(static_cast<l4_uint32_t>(_tag_remaining), dst.left);
memcpy(dst.pos, &tag[4 - _tag_remaining], ret);
dst.skip(ret);
_tag_remaining -= (int)ret;
}
else if (_tag_remaining < 0)
{
// remove VLAN tag
_tag_remaining += static_cast<int>(src.skip(-_tag_remaining));
ret = 0;
}
else
ret = src.copy_to(&dst);
return ret;
}
/**
* Rewrite the virtio network header.
*
* \param hdr The virtio header of the packet
*
* This method is called exactly once for every virtio network packet. Any
* necessary changes to the header are done in-place.
*/
void rewrite_hdr(Virtio_net::Hdr *hdr)
{
if (L4_UNLIKELY(_tci != 0 && hdr->flags.need_csum()))
{
if (_tci == 0xffffU)
hdr->csum_start -= 4U;
else
hdr->csum_start += 4U;
}
}
};
/**\}*/