L4Re - L4 Runtime Environment
virtio-block
1 // vi:ft=cpp
2 /*
3  * Copyright (C) 2017 Kernkonzept GmbH.
4  * Author(s): Sarah Hoffmann <sarah.hoffmann@kernkonzept.com>
5  *
6  * This file is distributed under the terms of the GNU General Public
7  * License, version 2. Please see the COPYING-GPL-2 file for details.
8  */
9 #pragma once
10 
11 #include <l4/cxx/unique_ptr>
12 #include <l4/re/util/unique_cap>
13 
14 #include <climits>
15 
16 #include <l4/l4virtio/virtio.h>
17 #include <l4/l4virtio/virtio_block.h>
18 #include <l4/l4virtio/server/l4virtio>
19 #include <l4/sys/cxx/ipc_epiface>
20 
21 namespace L4virtio { namespace Svr {
22 
23 template <typename Ds_data> class Block_dev;
24 
25 /**
26  * A request to read or write data.
27  */
28 template<typename Ds_data>
29 class Block_request
30 {
31  friend Block_dev<Ds_data>;
32  enum { Header_size = sizeof(l4virtio_block_header_t) };
33 
34 public:
35  struct Data_block
36  {
37  /// Pointer to virtio memory descriptor.
38  Driver_mem_region_t<Ds_data> *mem;
39  /// Virtual address of the data block (in device space).
40  void *addr;
41  /// Length of datablock in bytes (max 4MB).
42  l4_uint32_t len;
43 
44  Data_block() = default;
45 
46  Data_block(Driver_mem_region_t<Ds_data> *m, Virtqueue::Desc const &desc,
47  Request_processor const *)
48  : mem(m), addr(m->local(desc.addr)), len(desc.len)
49  {}
50  };
51 
52 
53 
54  /**
55  * Compute the total size of the data in the request.
56  *
57  * \retval Size in bytes or 0 if there was an error.
58  *
59  * \throws L4::Runtime_error(-L4_EIO) Request has a bad format.
60  *
61  * Note that this operation is relatively expensive as
62  * it has to iterate over the complete list of blocks.
63  */
64  unsigned data_size() const
65  {
66  Request_processor rp;
67  Data_block data;
68 
69  rp.start(_mem_list, _request, &data);
70 
71  unsigned total = data.len;
72 
73  try
74  {
75  while (rp.has_more())
76  {
77  rp.next(_mem_list, &data);
78  total += data.len;
79  }
80  }
81  catch (Bad_descriptor const &e)
82  {
83  // need to convert the exception because e contains a raw pointer to rp
84  throw L4::Runtime_error(-L4_EIO, "bad virtio descriptor");
85  }
86 
87  if (total < Header_size + 1)
88  throw L4::Runtime_error(-L4_EIO, "virtio request too short");
89 
90  return total - Header_size - 1;
91  }
92 
93  /**
94  * Check if the request contains more data blocks.
95  */
96  bool has_more()
97  {
98  // peek into the remaining data
99  while (_data.len == 0 && _rp.has_more())
100  _rp.next(_mem_list, &_data);
101 
102  // there always must be one byte left for status
103  return (_data.len > 1 || _rp.has_more());
104  }
105 
106  /**
107  * Return next block in scatter-gather list.
108  *
109  * \return Information about the next data block.
110  *
111  * \throws L4::Runtime_error No more data block is available.
112  * \throws Bad_descriptor Virtio request is corrupted.
113  */
114  Data_block next_block()
115  {
116  Data_block out;
117 
118  if (_data.len == 0)
119  {
120  if (!_rp.has_more())
121  throw L4::Runtime_error(-L4_EEXIST,
122  "No more data blocks in virtio request");
123 
124  if (_todo_blocks == 0)
125  throw Bad_descriptor(&_rp, Bad_descriptor::Bad_size);
126  --_todo_blocks;
127 
128  _rp.next(_mem_list, &_data);
129  }
130 
131  if (_data.len > _max_block_size)
132  throw Bad_descriptor(&_rp, Bad_descriptor::Bad_size);
133 
134  out = _data;
135 
136  if (!_rp.has_more())
137  {
138  --(out.len);
139  _data.len = 1;
140  _data.addr = static_cast<char *>(_data.addr) + out.len;
141  }
142  else
143  _data.len = 0; // is consumed
144 
145  return out;
146  }
147 
148  /// Return the block request header.
149  l4virtio_block_header_t const &header() const
150  { return _header; }
151 
152 private:
153  Block_request(Virtqueue::Request req, Driver_mem_list_t<Ds_data> *mem_list,
154  unsigned max_blocks, l4_uint32_t max_block_size)
155  : _mem_list(mem_list),
156  _request(req),
157  _todo_blocks(max_blocks),
158  _max_block_size(max_block_size)
159  {
160  // read header which should be in the first block
161  _rp.start(mem_list, _request, &_data);
162  --_todo_blocks;
163 
164  if (_data.len < Header_size)
165  throw Bad_descriptor(&_rp, Bad_descriptor::Bad_size);
166 
167  _header = *(static_cast<l4virtio_block_header_t *>(_data.addr));
168 
169  _data.addr = static_cast<char *>(_data.addr) + Header_size;
170  _data.len -= Header_size;
171 
172  // if there is no space for status bit we cannot really recover
173  if (!_rp.has_more() && _data.len == 0)
174  throw Bad_descriptor(&_rp, Bad_descriptor::Bad_size);
175  }
176 
177  int release_request(Virtqueue *queue, l4_uint8_t status, unsigned sz)
178  {
179  // write back status
180  // If there was an error on the way or the status byte is in its
181  // own block, fast-forward to the last block.
182  if (_rp.has_more())
183  {
184  while (_rp.next(_mem_list, &_data) && _todo_blocks > 0)
185  --_todo_blocks;
186 
187  if (_todo_blocks > 0 && _data.len > 0)
188  *(static_cast<l4_uint8_t *>(_data.addr) + _data.len - 1) = status;
189  else
190  return -L4_EIO; // too many data blocks
191  }
192  else if (_data.len > 0)
193  *(static_cast<l4_uint8_t *>(_data.addr)) = status;
194  else
195  return -L4_EIO; // no space for final status byte
196 
197  // now release the head
198  queue->consumed(_request, sz);
199 
200  return L4_EOK;
201  }
202 
203  /**
204  * The list of memory areas for the device.
205  * Points to the memory list of the parent device, which always must
206  * have a longer livespan than the request.
207  */
208  Driver_mem_list_t<Ds_data> *_mem_list;
209  /// Type and destination information.
210  l4virtio_block_header_t _header;
211  /// Request processor containing the current state.
212  Request_processor _rp;
213  /// Current data chunk in flight.
214  Data_block _data;
215 
216  /// Original virtio request.
217  Virtqueue::Request _request;
218  /// Number of blocks that may still be processed.
219  unsigned _todo_blocks;
220  /// Maximum length of a single block.
221  l4_uint32_t _max_block_size;
222 };
223 
224 struct Block_features : public Dev_config::Features
225 {
226  Block_features() = default;
227  Block_features(l4_uint32_t raw) : Dev_config::Features(raw) {}
228 
229  /** Maximum size of any single segment is in size_max. */
230  CXX_BITFIELD_MEMBER( 1, 1, size_max, raw);
231  /** Maximum number of segments in a request is in seg_max. */
232  CXX_BITFIELD_MEMBER( 2, 2, seg_max, raw);
233  /** Disk-style geometry specified in geometry. */
234  CXX_BITFIELD_MEMBER( 4, 4, geometry, raw);
235  /** Device is read-only. */
236  CXX_BITFIELD_MEMBER( 5, 5, ro, raw);
237  /** Block size of disk is in blk_size. */
238  CXX_BITFIELD_MEMBER( 6, 6, blk_size, raw);
239  /** Cache flush command support. */
240  CXX_BITFIELD_MEMBER( 9, 9, flush, raw);
241  /** Device exports information about optimal IO alignment. */
242  CXX_BITFIELD_MEMBER(10, 10, topology, raw);
243  /** Device can toggle its cache between writeback and writethrough modes. */
244  CXX_BITFIELD_MEMBER(11, 11, config_wce, raw);
245  /** Device can support discard command. */
246  CXX_BITFIELD_MEMBER(13, 13, discard, raw);
247  /** Device can support write zeroes command. */
248  CXX_BITFIELD_MEMBER(14, 14, write_zeroes, raw);
249 };
250 
251 
252 /**
253  * Base class for virtio block devices.
254  *
255  * Use this class as a base to implement your own specific block device.
256  */
257 template <typename Ds_data>
258 class Block_dev
259 : public L4virtio::Svr::Device_t<Ds_data>,
260  public L4::Epiface_t<Block_dev<Ds_data>, L4virtio::Device>
261 {
262 private:
263  class Irq_object : public L4::Irqep_t<Irq_object>
264  {
265  public:
266  Irq_object(Block_dev<Ds_data> *parent) : _parent(parent) {}
267 
268  void handle_irq()
269  {
270  _parent->kick();
271  }
272 
273  private:
274  Block_dev<Ds_data> *_parent;
275  };
276  Irq_object _irq_handler;
277 
278  L4Re::Util::Unique_cap<L4::Irq> _kick_guest_irq;
279  Virtqueue _queue;
280  unsigned _vq_max;
281  l4_uint32_t _max_block_size = UINT_MAX;
282  Dev_config_t<l4virtio_block_config_t> _dev_config;
283 
284 public:
285  typedef Block_request<Ds_data> Request;
286 
287 protected:
288  Block_features negotiated_features() const
289  { return _dev_config.negotiated_features(0); }
290 
291  Block_features device_features() const
292  { return _dev_config.host_features(0); }
293 
294  void set_device_features(Block_features df)
295  { _dev_config.host_features(0) = df.raw; }
296 
297  /**
298  * Sets the maximum size of any single segment reported to client.
299  *
300  * The limit is also applied to any incomming requests.
301  * Requests with larger segments result in an IO error being
302  * reported to the client. That means that process_request() can
303  * safely make the assumption that all segments in the received
304  * request are smaller.
305  */
306  void set_size_max(l4_uint32_t sz)
307  {
308  _dev_config.priv_config()->size_max = sz;
309  Block_features df = device_features();
310  df.size_max() = true;
311  set_device_features(df);
312 
313  _max_block_size = sz;
314  }
315 
316  /**
317  * Sets the maximum number of segments in a request
318  * that is reported to client.
319  */
320  void set_seg_max(l4_uint32_t sz)
321  {
322  _dev_config.priv_config()->seg_max = sz;
323  Block_features df = device_features();
324  df.seg_max() = true;
325  set_device_features(df);
326  }
327 
328  /**
329  * Set disk geometry that is reported to the client.
330  */
331  void set_geometry(l4_uint16_t cylinders, l4_uint8_t heads, l4_uint8_t sectors)
332  {
333  l4virtio_block_config_t volatile *pc = _dev_config.priv_config();
334  pc->geometry.cylinders = cylinders;
335  pc->geometry.heads = heads;
336  pc->geometry.sectors = sectors;
337  Block_features df = device_features();
338  df.geometry() = true;
339  set_device_features(df);
340  }
341 
342  /**
343  * Sets block disk size to be reported to the client.
344  *
345  * Setting this does not change the logical sector size used
346  * for addressing the device.
347  */
348  void set_blk_size(l4_uint32_t sz)
349  {
350  _dev_config.priv_config()->blk_size = sz;
351  Block_features df = device_features();
352  df.blk_size() = true;
353  set_device_features(df);
354  }
355 
356  /**
357  * Sets the I/O alignment information reported back to the client.
358  *
359  * \param physical_block_exp Number of logical blocks per physical block(log2)
360  * \param alignment_offset Offset of the first aligned logical block
361  * \param min_io_size Suggested minimum I/O size in blocks
362  * \param opt_io_size Optimal I/O size in blocks
363  */
364  void set_topology(l4_uint8_t physical_block_exp,
365  l4_uint8_t alignment_offset,
366  l4_uint32_t min_io_size,
367  l4_uint32_t opt_io_size)
368  {
369  l4virtio_block_config_t volatile *pc = _dev_config.priv_config();
370  pc->topology.physical_block_exp = physical_block_exp;
371  pc->topology.alignment_offset = alignment_offset;
372  pc->topology.min_io_size = min_io_size;
373  pc->topology.opt_io_size = opt_io_size;
374  Block_features df = device_features();
375  df.topology() = true;
376  set_device_features(df);
377  }
378 
379  /** Enables the flush command. */
380  void set_flush()
381  {
382  Block_features df = device_features();
383  df.flush() = true;
384  set_device_features(df);
385  }
386 
387  /** Sets cache mode and enables the the writeback toggle.
388  *
389  * \param writeback Mode of the cache (0 for writethrough, 1 for writeback).
390  */
391  void set_config_wce(l4_uint8_t writeback)
392  {
393  l4virtio_block_config_t volatile *pc = _dev_config.priv_config();
394  pc->writeback = writeback;
395  Block_features df = device_features();
396  df.config_wce() = true;
397  set_device_features(df);
398  }
399 
400  /** Get the writeback field from the configuration space.
401  *
402  * \return Value of the writeback field.
403  */
404  l4_uint8_t get_writeback()
405  {
406  l4virtio_block_config_t volatile *pc = _dev_config.priv_config();
407  return pc->writeback;
408  }
409 
410  /**
411  * Sets constraints for and enables the discard command.
412  *
413  * \param max_discard_sectors Maximum discard sectors size.
414  * \param max_discard_seg Maximum discard segment number.
415  * \param discard_sector_alignment Can be used by the driver when splitting a
416  * request based on alignment.
417  */
418  void set_discard(l4_uint32_t max_discard_sectors, l4_uint32_t max_discard_seg,
419  l4_uint32_t discard_sector_alignment)
420  {
421  l4virtio_block_config_t volatile *pc = _dev_config.priv_config();
422  pc->max_discard_sectors = max_discard_sectors;
423  pc->max_discard_seg = max_discard_seg;
424  pc->discard_sector_alignment = discard_sector_alignment;
425  Block_features df = device_features();
426  df.discard() = true;
427  set_device_features(df);
428  }
429 
430  /**
431  * Sets constraints for and enables the write zeroes command.
432  *
433  * \param max_write_zeroes_sectors Maximum write zeroes sectors size.
434  * \param max_write_zeroes_seg maximum write zeroes segment number.
435  * \param write_zeroes_may_unmap Set if a write zeroes request can result in
436  * deallocating one or more sectors.
437  */
438  void set_write_zeroes(l4_uint32_t max_write_zeroes_sectors,
439  l4_uint32_t max_write_zeroes_seg,
440  l4_uint8_t write_zeroes_may_unmap)
441  {
442  l4virtio_block_config_t volatile *pc = _dev_config.priv_config();
443  pc->max_write_zeroes_sectors = max_write_zeroes_sectors;
444  pc->max_write_zeroes_seg = max_write_zeroes_seg;
445  pc->write_zeroes_may_unmap = write_zeroes_may_unmap;
446  Block_features df = device_features();
447  df.write_zeroes() = true;
448  set_device_features(df);
449  }
450 
451 public:
452  /**
453  * Create a new virtio block device.
454  *
455  * \param vendor Vendor ID
456  * \param queue_size Number of entries to provide in avail and used queue.
457  * \param capacity Size of the device in 512-byte sectors.
458  * \param read_only True, if the device should not be writable.
459  */
460  Block_dev(l4_uint32_t vendor, unsigned queue_size,
461  l4_uint64_t capacity, bool read_only)
462  : L4virtio::Svr::Device_t<Ds_data>(&_dev_config),
463  _irq_handler(this), _vq_max(queue_size),
464  _dev_config(vendor, L4VIRTIO_ID_BLOCK, 1)
465  {
466  this->reset_queue_config(0, queue_size);
467 
468  Block_features df(0);
469  df.ring_indirect_desc() = true;
470  df.ro() = read_only;
471  set_device_features(df);
472 
473  _dev_config.priv_config()->capacity = capacity;
474  }
475 
476 
477  /**
478  * Implements the actual processing of data in the device.
479  *
480  * \param req The request to be processed.
481  * \return If false, no further requests will be scheduled.
482  *
483  * Synchronous and asynchronous processing of the data is supported.
484  * For asynchronous mode, the function should set up the worker
485  * and then return false. In synchronous mode, the function should
486  * return true, once processing is complete. If there is an error
487  * and processing is aborted, the status flag of `req` needs to be set
488  * accordingly and the request immediately finished with finish_request()
489  * if the client is to be answered.
490  */
491  virtual bool process_request(cxx::unique_ptr<Request> &&req) = 0;
492 
493  /**
494  * Reset the actual hardware device.
495  */
496  virtual void reset_device() = 0;
497 
498  /**
499  * The client requests reinitialisation of the connection.
500  *
501  * \return False if reinitialisation is not supported.
502  * \return True if reinitialisation is supported and the client has been
503  * reinitialised successfully.
504  */
505  virtual bool reset_client() { return false; }
506 
507  /**
508  * Return true, if the queues should not be processed further.
509  */
510  virtual bool queue_stopped() = 0;
511 
512  /**
513  * Releases resources related to a request and notifies the client.
514  *
515  * \param req Pointer to request that has finished.
516  * \param sz Number of bytes consumed.
517  * \param status Status of request (see L4virtio_block_status).
518  *
519  * This function must be called when an asynchronous request finishes,
520  * either successfully or with an error. The status byte in the request
521  * must have been set prior to calling it.
522  */
523  void finalize_request(cxx::unique_ptr<Request> req, unsigned sz,
524  l4_uint8_t status = L4VIRTIO_BLOCK_S_OK)
525  {
526  if (_dev_config.status().failed())
527  return;
528 
529  if (req->release_request(&_queue, status, sz) < 0)
530  this->device_error();
531 
532  // XXX not implemented
533  // _dev_config->irq_status |= 1;
534  _kick_guest_irq->trigger();
535 
536  // Request can be dropped here.
537  }
538 
539  int reconfig_queue(unsigned idx)
540  {
541  if (idx == 0 && this->setup_queue(&_queue, 0, _vq_max))
542  return 0;
543 
544  return -L4_EINVAL;
545  }
546 
547  /**
548  * Attach device to an object registry.
549  *
550  * \param registry Object registry that will be responsible for dispatching
551  * requests.
552  * \param service Name of an existing capability the device should use.
553  *
554  * This functions registers the general virtio interface as well as the
555  * interrupt handler which is used for receiving client notifications.
556  */
557  L4::Cap<void> register_obj(L4::Registry_iface *registry,
558  char const *service = 0)
559  {
560  L4Re::chkcap(registry->register_irq_obj(&_irq_handler));
561  L4::Cap<void> ret;
562  if (service)
563  ret = registry->register_obj(this, service);
564  else
565  ret = registry->register_obj(this);
566  L4Re::chkcap(ret);
567 
568  return ret;
569  }
570 
571  L4::Cap<void> register_obj(L4::Registry_iface *registry,
572  L4::Cap<L4::Rcv_endpoint> ep)
573  {
574  L4Re::chkcap(registry->register_irq_obj(&_irq_handler));
575 
576  return L4Re::chkcap(registry->register_obj(this, ep));
577  }
578 
579 protected:
580  L4::Ipc_svr::Server_iface *server_iface() const
581  {
582  return L4::Epiface::server_iface();
583  }
584 
585  void kick()
586  {
587  if (!_queue.ready() || queue_stopped())
588  return;
589 
590  while (!_dev_config.status().failed())
591  {
592  auto r = _queue.next_avail();
593  if (!r)
594  return;
595 
596  try
597  {
598  cxx::unique_ptr<Request>
599  cur{new Request(r, &(this->_mem_info), _vq_max, _max_block_size)};
600 
601  if (!process_request(cxx::move(cur)))
602  return;
603  }
604  catch (Bad_descriptor const &e)
605  {
606  this->device_error();
607  return;
608  }
609  }
610  }
611 
612 private:
613  L4::Cap<L4::Irq> device_notify_irq() const
614  {
615  return L4::cap_cast<L4::Irq>(_irq_handler.obj_cap());
616  }
617 
618  void register_single_driver_irq()
619  {
620  if (_kick_guest_irq)
621  {
622  // client has changed, purge all old state
623  reset();
624  if (!reset_client())
625  L4Re::chksys(-L4_EINVAL, "Client already connected.");
626  }
627 
628  _kick_guest_irq = L4Re::Util::Unique_cap<L4::Irq>(
629  L4Re::chkcap(server_iface()->template rcv_cap<L4::Irq>(0)));
630 
631  L4Re::chksys(server_iface()->realloc_rcv_cap(0));
632  }
633 
634  void reset()
635  {
636  _queue.disable();
637  _dev_config.reset_queue(0, _vq_max);
638  _dev_config.reset_hdr();
639  reset_device();
640  }
641 
642  bool check_queues()
643  {
644  if (!_queue.ready())
645  {
646  reset();
647  return false;
648  }
649 
650  return true;
651  }
652 };
653 
654 } }