L4Re - L4 Runtime Environment
virtio-block
1 // vi:ft=cpp
2 /*
3  * Copyright (C) 2017 Kernkonzept GmbH.
4  * Author(s): Sarah Hoffmann <sarah.hoffmann@kernkonzept.com>
5  *
6  * This file is distributed under the terms of the GNU General Public
7  * License, version 2. Please see the COPYING-GPL-2 file for details.
8  */
9 #pragma once
10 
11 #include <l4/cxx/unique_ptr>
12 #include <l4/re/util/unique_cap>
13 
14 #include <climits>
15 
16 #include <l4/l4virtio/virtio.h>
17 #include <l4/l4virtio/virtio_block.h>
18 #include <l4/l4virtio/server/l4virtio>
19 #include <l4/sys/cxx/ipc_epiface>
20 
21 namespace L4virtio { namespace Svr {
22 
23 template <typename Ds_data> class Block_dev;
24 
25 /**
26  * A request to read or write data.
27  */
28 template<typename Ds_data>
29 class Block_request
30 {
31  friend Block_dev<Ds_data>;
32  enum { Header_size = sizeof(l4virtio_block_header_t) };
33 
34 public:
35  struct Data_block
36  {
37  /// Pointer to virtio memory descriptor.
38  Driver_mem_region_t<Ds_data> *mem;
39  /// Virtual address of the data block (in device space).
40  void *addr;
41  /// Length of datablock in bytes (max 4MB).
42  l4_uint32_t len;
43 
44  Data_block() = default;
45 
46  Data_block(Driver_mem_region_t<Ds_data> *m, Virtqueue::Desc const &desc,
47  Request_processor const *)
48  : mem(m), addr(m->local(desc.addr)), len(desc.len)
49  {}
50  };
51 
52 
53 
54  /**
55  * Compute the total size of the data in the request.
56  *
57  * \retval Size in bytes or 0 if there was an error.
58  *
59  * \throws L4::Runtime_error(-L4_EIO) Request has a bad format.
60  *
61  * Note that this operation is relatively expensive as
62  * it has to iterate over the complete list of blocks.
63  */
64  unsigned data_size() const
65  {
66  Request_processor rp;
67  Data_block data;
68 
69  rp.start(_mem_list, _request, &data);
70 
71  unsigned total = data.len;
72 
73  try
74  {
75  while (rp.has_more())
76  {
77  rp.next(_mem_list, &data);
78  total += data.len;
79  }
80  }
81  catch (Bad_descriptor const &e)
82  {
83  // need to convert the exception because e contains a raw pointer to rp
84  throw L4::Runtime_error(-L4_EIO, "bad virtio descriptor");
85  }
86 
87  if (total < Header_size + 1)
88  throw L4::Runtime_error(-L4_EIO, "virtio request too short");
89 
90  return total - Header_size - 1;
91  }
92 
93  /**
94  * Check if the request contains more data blocks.
95  */
96  bool has_more()
97  {
98  // peek into the remaining data
99  while (_data.len == 0 && _rp.has_more())
100  _rp.next(_mem_list, &_data);
101 
102  // there always must be one byte left for status
103  return (_data.len > 1 || _rp.has_more());
104  }
105 
106  /**
107  * Return next block in scatter-gather list.
108  *
109  * \return Information about the next data block.
110  *
111  * \throws L4::Runtime_error No more data block is available.
112  * \throws Bad_descriptor Virtio request is corrupted.
113  */
114  Data_block next_block()
115  {
116  Data_block out;
117 
118  if (_data.len == 0)
119  {
120  if (!_rp.has_more())
121  throw L4::Runtime_error(-L4_EEXIST,
122  "No more data blocks in virtio request");
123 
124  if (_todo_blocks == 0)
125  throw Bad_descriptor(&_rp, Bad_descriptor::Bad_size);
126  --_todo_blocks;
127 
128  _rp.next(_mem_list, &_data);
129  }
130 
131  if (_data.len > _max_block_size)
132  throw Bad_descriptor(&_rp, Bad_descriptor::Bad_size);
133 
134  out = _data;
135 
136  if (!_rp.has_more())
137  {
138  --(out.len);
139  _data.len = 1;
140  _data.addr = static_cast<char *>(_data.addr) + out.len;
141  }
142  else
143  _data.len = 0; // is consumed
144 
145  return out;
146  }
147 
148  /// Return the block request header.
149  l4virtio_block_header_t const &header() const
150  { return _header; }
151 
152 private:
153  Block_request(Virtqueue::Request req, Driver_mem_list_t<Ds_data> *mem_list,
154  unsigned max_blocks, l4_uint32_t max_block_size)
155  : _mem_list(mem_list),
156  _request(req),
157  _todo_blocks(max_blocks),
158  _max_block_size(max_block_size)
159  {
160  // read header which should be in the first block
161  _rp.start(mem_list, _request, &_data);
162  --_todo_blocks;
163 
164  if (_data.len < Header_size)
165  throw Bad_descriptor(&_rp, Bad_descriptor::Bad_size);
166 
167  _header = *(static_cast<l4virtio_block_header_t *>(_data.addr));
168 
169  _data.addr = static_cast<char *>(_data.addr) + Header_size;
170  _data.len -= Header_size;
171 
172  // if there is no space for status bit we cannot really recover
173  if (!_rp.has_more() && _data.len == 0)
174  throw Bad_descriptor(&_rp, Bad_descriptor::Bad_size);
175  }
176 
177  int release_request(Virtqueue *queue, l4_uint8_t status, unsigned sz)
178  {
179  // write back status
180  // If there was an error on the way or the status byte is in its
181  // own block, fast-forward to the last block.
182  if (_rp.has_more())
183  {
184  while (_rp.next(_mem_list, &_data) && _todo_blocks > 0)
185  --_todo_blocks;
186 
187  if (_todo_blocks > 0 && _data.len > 0)
188  *(static_cast<l4_uint8_t *>(_data.addr) + _data.len - 1) = status;
189  else
190  return -L4_EIO; // too many data blocks
191  }
192  else if (_data.len > 0)
193  *(static_cast<l4_uint8_t *>(_data.addr)) = status;
194  else
195  return -L4_EIO; // no space for final status byte
196 
197  // now release the head
198  queue->consumed(_request, sz);
199 
200  return L4_EOK;
201  }
202 
203  /**
204  * The list of memory areas for the device.
205  * Points to the memory list of the parent device, which always must
206  * have a longer livespan than the request.
207  */
208  Driver_mem_list_t<Ds_data> *_mem_list;
209  /// Type and destination information.
210  l4virtio_block_header_t _header;
211  /// Request processor containing the current state.
212  Request_processor _rp;
213  /// Current data chunk in flight.
214  Data_block _data;
215 
216  /// Original virtio request.
217  Virtqueue::Request _request;
218  /// Number of blocks that may still be processed.
219  unsigned _todo_blocks;
220  /// Maximum length of a single block.
221  l4_uint32_t _max_block_size;
222 };
223 
224 struct Block_features : public Dev_config::Features
225 {
226  Block_features() = default;
227  Block_features(l4_uint32_t raw) : Dev_config::Features(raw) {}
228 
229  /** Maximum size of any single segment is in size_max. */
230  CXX_BITFIELD_MEMBER( 1, 1, size_max, raw);
231  /** Maximum number of segments in a request is in seg_max. */
232  CXX_BITFIELD_MEMBER( 2, 2, seg_max, raw);
233  /** Disk-style geometry specified in geometry. */
234  CXX_BITFIELD_MEMBER( 4, 4, geometry, raw);
235  /** Device is read-only. */
236  CXX_BITFIELD_MEMBER( 5, 5, ro, raw);
237  /** Block size of disk is in blk_size. */
238  CXX_BITFIELD_MEMBER( 6, 6, blk_size, raw);
239  /** Device exports information about optimal IO alignment. */
240  CXX_BITFIELD_MEMBER(10, 10, topology, raw);
241 };
242 
243 
244 /**
245  * Base class for virtio block devices.
246  *
247  * Use this class as a base to implement your own specific block device.
248  */
249 template <typename Ds_data>
250 class Block_dev
251 : public L4virtio::Svr::Device_t<Ds_data>,
252  public L4::Epiface_t<Block_dev<Ds_data>, L4virtio::Device>
253 {
254 private:
255  class Irq_object : public L4::Irqep_t<Irq_object>
256  {
257  public:
258  Irq_object(Block_dev<Ds_data> *parent) : _parent(parent) {}
259 
260  void handle_irq()
261  {
262  _parent->kick();
263  }
264 
265  private:
266  Block_dev<Ds_data> *_parent;
267  };
268  Irq_object _irq_handler;
269 
270  L4Re::Util::Unique_cap<L4::Irq> _kick_guest_irq;
271  Virtqueue _queue;
272  unsigned _vq_max;
273  l4_uint32_t _max_block_size = UINT_MAX;
274  Dev_config_t<l4virtio_block_config_t> _dev_config;
275 
276 public:
277  typedef Block_request<Ds_data> Request;
278 
279 protected:
280  Block_features device_features() const
281  { return _dev_config.host_features(0); }
282 
283  void set_device_features(Block_features df)
284  { _dev_config.host_features(0) = df.raw; }
285 
286  /**
287  * Sets the maximum size of any single segment reported to client.
288  *
289  * The limit is also applied to any incomming requests.
290  * Requests with larger segments result in an IO error being
291  * reported to the client. That means that process_request() can
292  * safely make the assumption that all segments in the received
293  * request are smaller.
294  */
295  void set_size_max(l4_uint32_t sz)
296  {
297  _dev_config.priv_config()->size_max = sz;
298  Block_features df = device_features();
299  df.size_max() = true;
300  set_device_features(df);
301 
302  _max_block_size = sz;
303  }
304 
305  /**
306  * Sets the maximum number of segments in a request
307  * that is reported to client.
308  */
309  void set_seg_max(l4_uint32_t sz)
310  {
311  _dev_config.priv_config()->seg_max = sz;
312  Block_features df = device_features();
313  df.seg_max() = true;
314  set_device_features(df);
315  }
316 
317  /**
318  * Set disk geometry that is reported to the client.
319  */
320  void set_geometry(l4_uint16_t cylinders, l4_uint8_t heads, l4_uint8_t sectors)
321  {
322  l4virtio_block_config_t volatile *pc = _dev_config.priv_config();
323  pc->geometry.cylinders = cylinders;
324  pc->geometry.heads = heads;
325  pc->geometry.sectors = sectors;
326  Block_features df = device_features();
327  df.geometry() = true;
328  set_device_features(df);
329  }
330 
331  /**
332  * Sets block disk size to be reported to the client.
333  *
334  * Setting this does not change the logical sector size used
335  * for addressing the device.
336  */
337  void set_blk_size(l4_uint32_t sz)
338  {
339  _dev_config.priv_config()->blk_size = sz;
340  Block_features df = device_features();
341  df.blk_size() = true;
342  set_device_features(df);
343  }
344 
345  /**
346  * Sets the I/O alignment information reported back to the client.
347  *
348  * \param physical_block_exp Number of logical blocks per physical block(log2)
349  * \param alignment_offset Offset of the first aligned logical block
350  * \param min_io_size Suggested minimum I/O size in blocks
351  * \param opt_io_size Optimal I/O size in blocks
352  */
353  void set_topology(l4_uint8_t physical_block_exp,
354  l4_uint8_t alignment_offset,
355  l4_uint32_t min_io_size,
356  l4_uint32_t opt_io_size)
357  {
358  l4virtio_block_config_t volatile *pc = _dev_config.priv_config();
359  pc->topology.physical_block_exp = physical_block_exp;
360  pc->topology.alignment_offset = alignment_offset;
361  pc->topology.min_io_size = min_io_size;
362  pc->topology.opt_io_size = opt_io_size;
363  Block_features df = device_features();
364  df.topology() = true;
365  set_device_features(df);
366  }
367 
368 
369 public:
370  /**
371  * Create a new virtio block device.
372  *
373  * \param vendor Vendor ID
374  * \param queue_size Number of entries to provide in avail and used queue.
375  * \param capacity Size of the device in 512-byte sectors.
376  * \param read_only True, if the device should not be writable.
377  */
378  Block_dev(l4_uint32_t vendor, unsigned queue_size,
379  l4_uint64_t capacity, bool read_only)
380  : L4virtio::Svr::Device_t<Ds_data>(&_dev_config),
381  _irq_handler(this), _vq_max(queue_size),
382  _dev_config(vendor, L4VIRTIO_ID_BLOCK, 1)
383  {
384  this->reset_queue_config(0, queue_size);
385 
386  Block_features df(0);
387  df.ring_indirect_desc() = true;
388  df.ro() = read_only;
389  set_device_features(df);
390 
391  _dev_config.priv_config()->capacity = capacity;
392  _dev_config.reset_hdr(); // to publish hardware features
393  }
394 
395 
396  /**
397  * Implements the actual processing of data in the device.
398  *
399  * \param req The request to be processed.
400  * \return If false, no further requests will be scheduled.
401  *
402  * Synchronous and asynchronous processing of the data is supported.
403  * For asynchronous mode, the function should set up the worker
404  * and then return false. In synchronous mode, the function should
405  * return true, once processing is complete. If there is an error
406  * and processing is aborted, the status flag of @req needs to be set
407  * accordingly and the request immediately finished with finish_request()
408  * if the client is to be answered.
409  */
410  virtual bool process_request(cxx::unique_ptr<Request> &&req) = 0;
411 
412  /**
413  * Reset the actual hardware device.
414  */
415  virtual void reset_device() = 0;
416 
417  /**
418  * The client requests reinitialisation of the connection.
419  *
420  * \return False if reinitialisation is not supported.
421  */
422  virtual bool reset_client() { return false; }
423 
424  /**
425  * Return true, if the queues should not be processed further.
426  */
427  virtual bool queue_stopped() = 0;
428 
429  /**
430  * Releases resources related to a request and notifies the client.
431  *
432  * \param req Pointer to request that has finished.
433  * \param sz Number of bytes consumed.
434  * \param status Status of request (see L4virtio_block_status).
435  *
436  * This function must be called when an asynchronous request finishes,
437  * either successfully or with an error. The status byte in the request
438  * must have been set prior to calling it.
439  */
440  void finalize_request(cxx::unique_ptr<Request> req, unsigned sz,
441  l4_uint8_t status = L4VIRTIO_BLOCK_S_OK)
442  {
443  if (_dev_config.status().failed())
444  return;
445 
446  if (req->release_request(&_queue, status, sz) < 0)
447  this->device_error();
448 
449  // XXX not implemented
450  // _dev_config->irq_status |= 1;
451  _kick_guest_irq->trigger();
452 
453  // Request can be dropped here.
454  }
455 
456  int reconfig_queue(unsigned idx)
457  {
458  if (idx == 0 && this->setup_queue(&_queue, 0, _vq_max))
459  return 0;
460 
461  return -L4_EINVAL;
462  }
463 
464  /**
465  * Attach device to an object registry.
466  *
467  * \param registry Object registry that will be responsible for dispatching
468  * requests.
469  * \param service Name of an existing capability the device should use.
470  *
471  * This functions registers the general virtio interface as well as the
472  * interrupt handler which is used for receiving client notifications.
473  */
474  L4::Cap<void> register_obj(L4::Registry_iface *registry,
475  char const *service = 0)
476  {
477  L4Re::chkcap(registry->register_irq_obj(&_irq_handler));
478  L4::Cap<void> ret;
479  if (service)
480  ret = registry->register_obj(this, service);
481  else
482  ret = registry->register_obj(this);
483  L4Re::chkcap(ret);
484 
485  return ret;
486  }
487 
488 protected:
489  L4::Ipc_svr::Server_iface *server_iface() const
490  {
491  return L4::Epiface::server_iface();
492  }
493 
494  void kick()
495  {
496  if (queue_stopped())
497  return;
498 
499  while (!_dev_config.status().failed())
500  {
501  auto r = _queue.next_avail();
502  if (!r)
503  return;
504 
505  try
506  {
507  cxx::unique_ptr<Request>
508  cur{new Request(r, &(this->_mem_info), _vq_max, _max_block_size)};
509 
510  if (!process_request(cxx::move(cur)))
511  return;
512  }
513  catch (Bad_descriptor const &e)
514  {
515  this->device_error();
516  return;
517  }
518  }
519  }
520 
521 private:
522  L4::Cap<L4::Irq> device_notify_irq() const
523  {
524  return L4::cap_cast<L4::Irq>(_irq_handler.obj_cap());
525  }
526 
527  void register_single_driver_irq()
528  {
529  if (_kick_guest_irq)
530  {
531  // client has changed, purge all old state
532  reset();
533  _dev_config.reset_hdr();
534  if (!reset_client())
535  L4Re::chksys(-L4_EINVAL, "Client already connected.");
536  }
537 
538  _kick_guest_irq = L4Re::Util::Unique_cap<L4::Irq>(
539  L4Re::chkcap(server_iface()->template rcv_cap<L4::Irq>(0)));
540 
541  L4Re::chksys(server_iface()->realloc_rcv_cap(0));
542  }
543 
544  void reset()
545  {
546  _queue.disable();
547  _dev_config.reset_queue(0, _vq_max);
548  reset_device();
549  }
550 
551  bool check_queues()
552  {
553  if (!_queue.ready())
554  {
555  reset();
556  return false;
557  }
558 
559  return true;
560  }
561 };
562 
563 } }