1 |
|
|
/* $OpenBSD: virtio.c,v 1.54 2017/09/17 23:07:56 pd Exp $ */ |
2 |
|
|
|
3 |
|
|
/* |
4 |
|
|
* Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> |
5 |
|
|
* |
6 |
|
|
* Permission to use, copy, modify, and distribute this software for any |
7 |
|
|
* purpose with or without fee is hereby granted, provided that the above |
8 |
|
|
* copyright notice and this permission notice appear in all copies. |
9 |
|
|
* |
10 |
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
11 |
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
12 |
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
13 |
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
14 |
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
15 |
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
16 |
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
17 |
|
|
*/ |
18 |
|
|
|
19 |
|
|
#include <sys/param.h> /* PAGE_SIZE */ |
20 |
|
|
#include <sys/socket.h> |
21 |
|
|
|
22 |
|
|
#include <machine/vmmvar.h> |
23 |
|
|
#include <dev/pci/pcireg.h> |
24 |
|
|
#include <dev/pci/pcidevs.h> |
25 |
|
|
#include <dev/pv/virtioreg.h> |
26 |
|
|
#include <dev/pv/vioblkreg.h> |
27 |
|
|
|
28 |
|
|
#include <net/if.h> |
29 |
|
|
#include <netinet/in.h> |
30 |
|
|
#include <netinet/if_ether.h> |
31 |
|
|
|
32 |
|
|
#include <errno.h> |
33 |
|
|
#include <event.h> |
34 |
|
|
#include <poll.h> |
35 |
|
|
#include <stddef.h> |
36 |
|
|
#include <stdlib.h> |
37 |
|
|
#include <string.h> |
38 |
|
|
#include <unistd.h> |
39 |
|
|
|
40 |
|
|
#include "pci.h" |
41 |
|
|
#include "vmd.h" |
42 |
|
|
#include "vmm.h" |
43 |
|
|
#include "virtio.h" |
44 |
|
|
#include "loadfile.h" |
45 |
|
|
#include "atomicio.h" |
46 |
|
|
|
47 |
|
|
extern char *__progname; |
48 |
|
|
|
49 |
|
|
struct viornd_dev viornd; |
50 |
|
|
struct vioblk_dev *vioblk; |
51 |
|
|
struct vionet_dev *vionet; |
52 |
|
|
struct vmmci_dev vmmci; |
53 |
|
|
|
54 |
|
|
int nr_vionet; |
55 |
|
|
int nr_vioblk; |
56 |
|
|
|
57 |
|
|
#define MAXPHYS (64 * 1024) /* max raw I/O transfer size */ |
58 |
|
|
|
59 |
|
|
#define VIRTIO_NET_F_MAC (1<<5) |
60 |
|
|
|
61 |
|
|
#define VMMCI_F_TIMESYNC (1<<0) |
62 |
|
|
#define VMMCI_F_ACK (1<<1) |
63 |
|
|
#define VMMCI_F_SYNCRTC (1<<2) |
64 |
|
|
|
65 |
|
|
struct ioinfo { |
66 |
|
|
uint8_t *buf; |
67 |
|
|
ssize_t len; |
68 |
|
|
off_t offset; |
69 |
|
|
int fd; |
70 |
|
|
int error; |
71 |
|
|
}; |
72 |
|
|
|
73 |
|
|
const char * |
74 |
|
|
vioblk_cmd_name(uint32_t type) |
75 |
|
|
{ |
76 |
|
|
switch (type) { |
77 |
|
|
case VIRTIO_BLK_T_IN: return "read"; |
78 |
|
|
case VIRTIO_BLK_T_OUT: return "write"; |
79 |
|
|
case VIRTIO_BLK_T_SCSI_CMD: return "scsi read"; |
80 |
|
|
case VIRTIO_BLK_T_SCSI_CMD_OUT: return "scsi write"; |
81 |
|
|
case VIRTIO_BLK_T_FLUSH: return "flush"; |
82 |
|
|
case VIRTIO_BLK_T_FLUSH_OUT: return "flush out"; |
83 |
|
|
case VIRTIO_BLK_T_GET_ID: return "get id"; |
84 |
|
|
default: return "unknown"; |
85 |
|
|
} |
86 |
|
|
} |
87 |
|
|
|
88 |
|
|
static void |
89 |
|
|
dump_descriptor_chain(struct vring_desc *desc, int16_t dxx) |
90 |
|
|
{ |
91 |
|
|
log_debug("descriptor chain @ %d", dxx); |
92 |
|
|
do { |
93 |
|
|
log_debug("desc @%d addr/len/flags/next = 0x%llx / 0x%x " |
94 |
|
|
"/ 0x%x / 0x%x", |
95 |
|
|
dxx, |
96 |
|
|
desc[dxx].addr, |
97 |
|
|
desc[dxx].len, |
98 |
|
|
desc[dxx].flags, |
99 |
|
|
desc[dxx].next); |
100 |
|
|
dxx = desc[dxx].next; |
101 |
|
|
} while (desc[dxx].flags & VRING_DESC_F_NEXT); |
102 |
|
|
|
103 |
|
|
log_debug("desc @%d addr/len/flags/next = 0x%llx / 0x%x / 0x%x " |
104 |
|
|
"/ 0x%x", |
105 |
|
|
dxx, |
106 |
|
|
desc[dxx].addr, |
107 |
|
|
desc[dxx].len, |
108 |
|
|
desc[dxx].flags, |
109 |
|
|
desc[dxx].next); |
110 |
|
|
} |
111 |
|
|
|
112 |
|
|
static const char * |
113 |
|
|
virtio_reg_name(uint8_t reg) |
114 |
|
|
{ |
115 |
|
|
switch (reg) { |
116 |
|
|
case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature"; |
117 |
|
|
case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature"; |
118 |
|
|
case VIRTIO_CONFIG_QUEUE_ADDRESS: return "queue address"; |
119 |
|
|
case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size"; |
120 |
|
|
case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select"; |
121 |
|
|
case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify"; |
122 |
|
|
case VIRTIO_CONFIG_DEVICE_STATUS: return "device status"; |
123 |
|
|
case VIRTIO_CONFIG_ISR_STATUS: return "isr status"; |
124 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: return "device config 0"; |
125 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: return "device config 1"; |
126 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: return "device config 2"; |
127 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: return "device config 3"; |
128 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: return "device config 4"; |
129 |
|
|
default: return "unknown"; |
130 |
|
|
} |
131 |
|
|
} |
132 |
|
|
|
133 |
|
|
uint32_t |
134 |
|
|
vring_size(uint32_t vq_size) |
135 |
|
|
{ |
136 |
|
|
uint32_t allocsize1, allocsize2; |
137 |
|
|
|
138 |
|
|
/* allocsize1: descriptor table + avail ring + pad */ |
139 |
|
|
allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size |
140 |
|
|
+ sizeof(uint16_t) * (2 + vq_size)); |
141 |
|
|
/* allocsize2: used ring + pad */ |
142 |
|
|
allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * 2 |
143 |
|
|
+ sizeof(struct vring_used_elem) * vq_size); |
144 |
|
|
|
145 |
|
|
return allocsize1 + allocsize2; |
146 |
|
|
} |
147 |
|
|
|
148 |
|
|
/* Update queue select */ |
149 |
|
|
void |
150 |
|
|
viornd_update_qs(void) |
151 |
|
|
{ |
152 |
|
|
/* Invalid queue? */ |
153 |
|
|
if (viornd.cfg.queue_select > 0) { |
154 |
|
|
viornd.cfg.queue_size = 0; |
155 |
|
|
return; |
156 |
|
|
} |
157 |
|
|
|
158 |
|
|
/* Update queue address/size based on queue select */ |
159 |
|
|
viornd.cfg.queue_address = viornd.vq[viornd.cfg.queue_select].qa; |
160 |
|
|
viornd.cfg.queue_size = viornd.vq[viornd.cfg.queue_select].qs; |
161 |
|
|
} |
162 |
|
|
|
163 |
|
|
/* Update queue address */ |
164 |
|
|
void |
165 |
|
|
viornd_update_qa(void) |
166 |
|
|
{ |
167 |
|
|
/* Invalid queue? */ |
168 |
|
|
if (viornd.cfg.queue_select > 0) |
169 |
|
|
return; |
170 |
|
|
|
171 |
|
|
viornd.vq[viornd.cfg.queue_select].qa = viornd.cfg.queue_address; |
172 |
|
|
} |
173 |
|
|
|
174 |
|
|
int |
175 |
|
|
viornd_notifyq(void) |
176 |
|
|
{ |
177 |
|
|
uint64_t q_gpa; |
178 |
|
|
uint32_t vr_sz; |
179 |
|
|
size_t sz; |
180 |
|
|
int ret; |
181 |
|
|
uint16_t aidx, uidx; |
182 |
|
|
char *buf, *rnd_data; |
183 |
|
|
struct vring_desc *desc; |
184 |
|
|
struct vring_avail *avail; |
185 |
|
|
struct vring_used *used; |
186 |
|
|
|
187 |
|
|
ret = 0; |
188 |
|
|
|
189 |
|
|
/* Invalid queue? */ |
190 |
|
|
if (viornd.cfg.queue_notify > 0) |
191 |
|
|
return (0); |
192 |
|
|
|
193 |
|
|
vr_sz = vring_size(VIORND_QUEUE_SIZE); |
194 |
|
|
q_gpa = viornd.vq[viornd.cfg.queue_notify].qa; |
195 |
|
|
q_gpa = q_gpa * VIRTIO_PAGE_SIZE; |
196 |
|
|
|
197 |
|
|
buf = calloc(1, vr_sz); |
198 |
|
|
if (buf == NULL) { |
199 |
|
|
log_warn("calloc error getting viornd ring"); |
200 |
|
|
return (0); |
201 |
|
|
} |
202 |
|
|
|
203 |
|
|
if (read_mem(q_gpa, buf, vr_sz)) { |
204 |
|
|
free(buf); |
205 |
|
|
return (0); |
206 |
|
|
} |
207 |
|
|
|
208 |
|
|
desc = (struct vring_desc *)(buf); |
209 |
|
|
avail = (struct vring_avail *)(buf + |
210 |
|
|
viornd.vq[viornd.cfg.queue_notify].vq_availoffset); |
211 |
|
|
used = (struct vring_used *)(buf + |
212 |
|
|
viornd.vq[viornd.cfg.queue_notify].vq_usedoffset); |
213 |
|
|
|
214 |
|
|
aidx = avail->idx & VIORND_QUEUE_MASK; |
215 |
|
|
uidx = used->idx & VIORND_QUEUE_MASK; |
216 |
|
|
|
217 |
|
|
sz = desc[avail->ring[aidx]].len; |
218 |
|
|
if (sz > MAXPHYS) |
219 |
|
|
fatal("viornd descriptor size too large (%zu)", sz); |
220 |
|
|
|
221 |
|
|
rnd_data = malloc(sz); |
222 |
|
|
|
223 |
|
|
if (rnd_data != NULL) { |
224 |
|
|
arc4random_buf(rnd_data, desc[avail->ring[aidx]].len); |
225 |
|
|
if (write_mem(desc[avail->ring[aidx]].addr, |
226 |
|
|
rnd_data, desc[avail->ring[aidx]].len)) { |
227 |
|
|
log_warnx("viornd: can't write random data @ " |
228 |
|
|
"0x%llx", |
229 |
|
|
desc[avail->ring[aidx]].addr); |
230 |
|
|
} else { |
231 |
|
|
/* ret == 1 -> interrupt needed */ |
232 |
|
|
/* XXX check VIRTIO_F_NO_INTR */ |
233 |
|
|
ret = 1; |
234 |
|
|
viornd.cfg.isr_status = 1; |
235 |
|
|
used->ring[uidx].id = avail->ring[aidx] & |
236 |
|
|
VIORND_QUEUE_MASK; |
237 |
|
|
used->ring[uidx].len = desc[avail->ring[aidx]].len; |
238 |
|
|
used->idx++; |
239 |
|
|
|
240 |
|
|
if (write_mem(q_gpa, buf, vr_sz)) { |
241 |
|
|
log_warnx("viornd: error writing vio ring"); |
242 |
|
|
} |
243 |
|
|
} |
244 |
|
|
free(rnd_data); |
245 |
|
|
} else |
246 |
|
|
fatal("memory allocation error for viornd data"); |
247 |
|
|
|
248 |
|
|
free(buf); |
249 |
|
|
|
250 |
|
|
return (ret); |
251 |
|
|
} |
252 |
|
|
|
253 |
|
|
int |
254 |
|
|
virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, |
255 |
|
|
void *unused, uint8_t sz) |
256 |
|
|
{ |
257 |
|
|
*intr = 0xFF; |
258 |
|
|
|
259 |
|
|
if (dir == 0) { |
260 |
|
|
switch (reg) { |
261 |
|
|
case VIRTIO_CONFIG_DEVICE_FEATURES: |
262 |
|
|
case VIRTIO_CONFIG_QUEUE_SIZE: |
263 |
|
|
case VIRTIO_CONFIG_ISR_STATUS: |
264 |
|
|
log_warnx("%s: illegal write %x to %s", |
265 |
|
|
__progname, *data, virtio_reg_name(reg)); |
266 |
|
|
break; |
267 |
|
|
case VIRTIO_CONFIG_GUEST_FEATURES: |
268 |
|
|
viornd.cfg.guest_feature = *data; |
269 |
|
|
break; |
270 |
|
|
case VIRTIO_CONFIG_QUEUE_ADDRESS: |
271 |
|
|
viornd.cfg.queue_address = *data; |
272 |
|
|
viornd_update_qa(); |
273 |
|
|
break; |
274 |
|
|
case VIRTIO_CONFIG_QUEUE_SELECT: |
275 |
|
|
viornd.cfg.queue_select = *data; |
276 |
|
|
viornd_update_qs(); |
277 |
|
|
break; |
278 |
|
|
case VIRTIO_CONFIG_QUEUE_NOTIFY: |
279 |
|
|
viornd.cfg.queue_notify = *data; |
280 |
|
|
if (viornd_notifyq()) |
281 |
|
|
*intr = 1; |
282 |
|
|
break; |
283 |
|
|
case VIRTIO_CONFIG_DEVICE_STATUS: |
284 |
|
|
viornd.cfg.device_status = *data; |
285 |
|
|
break; |
286 |
|
|
} |
287 |
|
|
} else { |
288 |
|
|
switch (reg) { |
289 |
|
|
case VIRTIO_CONFIG_DEVICE_FEATURES: |
290 |
|
|
*data = viornd.cfg.device_feature; |
291 |
|
|
break; |
292 |
|
|
case VIRTIO_CONFIG_GUEST_FEATURES: |
293 |
|
|
*data = viornd.cfg.guest_feature; |
294 |
|
|
break; |
295 |
|
|
case VIRTIO_CONFIG_QUEUE_ADDRESS: |
296 |
|
|
*data = viornd.cfg.queue_address; |
297 |
|
|
break; |
298 |
|
|
case VIRTIO_CONFIG_QUEUE_SIZE: |
299 |
|
|
*data = viornd.cfg.queue_size; |
300 |
|
|
break; |
301 |
|
|
case VIRTIO_CONFIG_QUEUE_SELECT: |
302 |
|
|
*data = viornd.cfg.queue_select; |
303 |
|
|
break; |
304 |
|
|
case VIRTIO_CONFIG_QUEUE_NOTIFY: |
305 |
|
|
*data = viornd.cfg.queue_notify; |
306 |
|
|
break; |
307 |
|
|
case VIRTIO_CONFIG_DEVICE_STATUS: |
308 |
|
|
*data = viornd.cfg.device_status; |
309 |
|
|
break; |
310 |
|
|
case VIRTIO_CONFIG_ISR_STATUS: |
311 |
|
|
*data = viornd.cfg.isr_status; |
312 |
|
|
viornd.cfg.isr_status = 0; |
313 |
|
|
break; |
314 |
|
|
} |
315 |
|
|
} |
316 |
|
|
return (0); |
317 |
|
|
} |
318 |
|
|
|
319 |
|
|
void |
320 |
|
|
vioblk_update_qa(struct vioblk_dev *dev) |
321 |
|
|
{ |
322 |
|
|
/* Invalid queue? */ |
323 |
|
|
if (dev->cfg.queue_select > 0) |
324 |
|
|
return; |
325 |
|
|
|
326 |
|
|
dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address; |
327 |
|
|
} |
328 |
|
|
|
329 |
|
|
void |
330 |
|
|
vioblk_update_qs(struct vioblk_dev *dev) |
331 |
|
|
{ |
332 |
|
|
/* Invalid queue? */ |
333 |
|
|
if (dev->cfg.queue_select > 0) { |
334 |
|
|
dev->cfg.queue_size = 0; |
335 |
|
|
return; |
336 |
|
|
} |
337 |
|
|
|
338 |
|
|
/* Update queue address/size based on queue select */ |
339 |
|
|
dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa; |
340 |
|
|
dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs; |
341 |
|
|
} |
342 |
|
|
|
343 |
|
|
static void |
344 |
|
|
vioblk_free_info(struct ioinfo *info) |
345 |
|
|
{ |
346 |
|
|
if (!info) |
347 |
|
|
return; |
348 |
|
|
free(info->buf); |
349 |
|
|
free(info); |
350 |
|
|
} |
351 |
|
|
|
352 |
|
|
static struct ioinfo * |
353 |
|
|
vioblk_start_read(struct vioblk_dev *dev, off_t sector, ssize_t sz) |
354 |
|
|
{ |
355 |
|
|
struct ioinfo *info; |
356 |
|
|
|
357 |
|
|
info = calloc(1, sizeof(*info)); |
358 |
|
|
if (!info) |
359 |
|
|
goto nomem; |
360 |
|
|
info->buf = malloc(sz); |
361 |
|
|
if (info->buf == NULL) |
362 |
|
|
goto nomem; |
363 |
|
|
info->len = sz; |
364 |
|
|
info->offset = sector * VIRTIO_BLK_SECTOR_SIZE; |
365 |
|
|
info->fd = dev->fd; |
366 |
|
|
|
367 |
|
|
return info; |
368 |
|
|
|
369 |
|
|
nomem: |
370 |
|
|
free(info); |
371 |
|
|
log_warn("malloc errror vioblk read"); |
372 |
|
|
return (NULL); |
373 |
|
|
} |
374 |
|
|
|
375 |
|
|
|
376 |
|
|
static const uint8_t * |
377 |
|
|
vioblk_finish_read(struct ioinfo *info) |
378 |
|
|
{ |
379 |
|
|
if (pread(info->fd, info->buf, info->len, info->offset) != info->len) { |
380 |
|
|
info->error = errno; |
381 |
|
|
log_warn("vioblk read error"); |
382 |
|
|
return NULL; |
383 |
|
|
} |
384 |
|
|
|
385 |
|
|
return info->buf; |
386 |
|
|
} |
387 |
|
|
|
388 |
|
|
static struct ioinfo * |
389 |
|
|
vioblk_start_write(struct vioblk_dev *dev, off_t sector, paddr_t addr, size_t len) |
390 |
|
|
{ |
391 |
|
|
struct ioinfo *info; |
392 |
|
|
|
393 |
|
|
info = calloc(1, sizeof(*info)); |
394 |
|
|
if (!info) |
395 |
|
|
goto nomem; |
396 |
|
|
info->buf = malloc(len); |
397 |
|
|
if (info->buf == NULL) |
398 |
|
|
goto nomem; |
399 |
|
|
info->len = len; |
400 |
|
|
info->offset = sector * VIRTIO_BLK_SECTOR_SIZE; |
401 |
|
|
info->fd = dev->fd; |
402 |
|
|
|
403 |
|
|
if (read_mem(addr, info->buf, len)) { |
404 |
|
|
vioblk_free_info(info); |
405 |
|
|
return NULL; |
406 |
|
|
} |
407 |
|
|
|
408 |
|
|
return info; |
409 |
|
|
|
410 |
|
|
nomem: |
411 |
|
|
free(info); |
412 |
|
|
log_warn("malloc errror vioblk write"); |
413 |
|
|
return (NULL); |
414 |
|
|
} |
415 |
|
|
|
416 |
|
|
static int |
417 |
|
|
vioblk_finish_write(struct ioinfo *info) |
418 |
|
|
{ |
419 |
|
|
if (pwrite(info->fd, info->buf, info->len, info->offset) != info->len) { |
420 |
|
|
log_warn("vioblk write error"); |
421 |
|
|
return EIO; |
422 |
|
|
} |
423 |
|
|
return 0; |
424 |
|
|
} |
425 |
|
|
|
426 |
|
|
/* |
427 |
|
|
* XXX in various cases, ds should be set to VIRTIO_BLK_S_IOERR, if we can |
428 |
|
|
* XXX cant trust ring data from VM, be extra cautious. |
429 |
|
|
*/ |
430 |
|
|
int |
431 |
|
|
vioblk_notifyq(struct vioblk_dev *dev) |
432 |
|
|
{ |
433 |
|
|
uint64_t q_gpa; |
434 |
|
|
uint32_t vr_sz; |
435 |
|
|
uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx; |
436 |
|
|
uint8_t ds; |
437 |
|
|
int ret; |
438 |
|
|
off_t secbias; |
439 |
|
|
char *vr; |
440 |
|
|
struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc; |
441 |
|
|
struct vring_avail *avail; |
442 |
|
|
struct vring_used *used; |
443 |
|
|
struct virtio_blk_req_hdr cmd; |
444 |
|
|
|
445 |
|
|
ret = 0; |
446 |
|
|
|
447 |
|
|
/* Invalid queue? */ |
448 |
|
|
if (dev->cfg.queue_notify > 0) |
449 |
|
|
return (0); |
450 |
|
|
|
451 |
|
|
vr_sz = vring_size(VIOBLK_QUEUE_SIZE); |
452 |
|
|
q_gpa = dev->vq[dev->cfg.queue_notify].qa; |
453 |
|
|
q_gpa = q_gpa * VIRTIO_PAGE_SIZE; |
454 |
|
|
|
455 |
|
|
vr = calloc(1, vr_sz); |
456 |
|
|
if (vr == NULL) { |
457 |
|
|
log_warn("calloc error getting vioblk ring"); |
458 |
|
|
return (0); |
459 |
|
|
} |
460 |
|
|
|
461 |
|
|
if (read_mem(q_gpa, vr, vr_sz)) { |
462 |
|
|
log_warnx("error reading gpa 0x%llx", q_gpa); |
463 |
|
|
goto out; |
464 |
|
|
} |
465 |
|
|
|
466 |
|
|
/* Compute offsets in ring of descriptors, avail ring, and used ring */ |
467 |
|
|
desc = (struct vring_desc *)(vr); |
468 |
|
|
avail = (struct vring_avail *)(vr + |
469 |
|
|
dev->vq[dev->cfg.queue_notify].vq_availoffset); |
470 |
|
|
used = (struct vring_used *)(vr + |
471 |
|
|
dev->vq[dev->cfg.queue_notify].vq_usedoffset); |
472 |
|
|
|
473 |
|
|
idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK; |
474 |
|
|
|
475 |
|
|
if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) { |
476 |
|
|
log_warnx("vioblk queue notify - nothing to do?"); |
477 |
|
|
goto out; |
478 |
|
|
} |
479 |
|
|
|
480 |
|
|
while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) { |
481 |
|
|
|
482 |
|
|
cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK; |
483 |
|
|
cmd_desc = &desc[cmd_desc_idx]; |
484 |
|
|
|
485 |
|
|
if ((cmd_desc->flags & VRING_DESC_F_NEXT) == 0) { |
486 |
|
|
log_warnx("unchained vioblk cmd descriptor received " |
487 |
|
|
"(idx %d)", cmd_desc_idx); |
488 |
|
|
goto out; |
489 |
|
|
} |
490 |
|
|
|
491 |
|
|
/* Read command from descriptor ring */ |
492 |
|
|
if (read_mem(cmd_desc->addr, &cmd, cmd_desc->len)) { |
493 |
|
|
log_warnx("vioblk: command read_mem error @ 0x%llx", |
494 |
|
|
cmd_desc->addr); |
495 |
|
|
goto out; |
496 |
|
|
} |
497 |
|
|
|
498 |
|
|
switch (cmd.type) { |
499 |
|
|
case VIRTIO_BLK_T_IN: |
500 |
|
|
/* first descriptor */ |
501 |
|
|
secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; |
502 |
|
|
secdata_desc = &desc[secdata_desc_idx]; |
503 |
|
|
|
504 |
|
|
if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { |
505 |
|
|
log_warnx("unchained vioblk data descriptor " |
506 |
|
|
"received (idx %d)", cmd_desc_idx); |
507 |
|
|
goto out; |
508 |
|
|
} |
509 |
|
|
|
510 |
|
|
secbias = 0; |
511 |
|
|
do { |
512 |
|
|
struct ioinfo *info; |
513 |
|
|
const uint8_t *secdata; |
514 |
|
|
|
515 |
|
|
info = vioblk_start_read(dev, cmd.sector + secbias, |
516 |
|
|
(ssize_t)secdata_desc->len); |
517 |
|
|
|
518 |
|
|
/* read the data (use current data descriptor) */ |
519 |
|
|
secdata = vioblk_finish_read(info); |
520 |
|
|
if (secdata == NULL) { |
521 |
|
|
vioblk_free_info(info); |
522 |
|
|
log_warnx("vioblk: block read error, " |
523 |
|
|
"sector %lld", cmd.sector); |
524 |
|
|
goto out; |
525 |
|
|
} |
526 |
|
|
|
527 |
|
|
if (write_mem(secdata_desc->addr, secdata, |
528 |
|
|
secdata_desc->len)) { |
529 |
|
|
log_warnx("can't write sector " |
530 |
|
|
"data to gpa @ 0x%llx", |
531 |
|
|
secdata_desc->addr); |
532 |
|
|
dump_descriptor_chain(desc, cmd_desc_idx); |
533 |
|
|
vioblk_free_info(info); |
534 |
|
|
goto out; |
535 |
|
|
} |
536 |
|
|
|
537 |
|
|
vioblk_free_info(info); |
538 |
|
|
|
539 |
|
|
secbias += (secdata_desc->len / VIRTIO_BLK_SECTOR_SIZE); |
540 |
|
|
secdata_desc_idx = secdata_desc->next & |
541 |
|
|
VIOBLK_QUEUE_MASK; |
542 |
|
|
secdata_desc = &desc[secdata_desc_idx]; |
543 |
|
|
} while (secdata_desc->flags & VRING_DESC_F_NEXT); |
544 |
|
|
|
545 |
|
|
ds_desc_idx = secdata_desc_idx; |
546 |
|
|
ds_desc = secdata_desc; |
547 |
|
|
|
548 |
|
|
ds = VIRTIO_BLK_S_OK; |
549 |
|
|
if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { |
550 |
|
|
log_warnx("can't write device status data @ " |
551 |
|
|
"0x%llx", ds_desc->addr); |
552 |
|
|
dump_descriptor_chain(desc, cmd_desc_idx); |
553 |
|
|
goto out; |
554 |
|
|
} |
555 |
|
|
|
556 |
|
|
ret = 1; |
557 |
|
|
dev->cfg.isr_status = 1; |
558 |
|
|
used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx; |
559 |
|
|
used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len; |
560 |
|
|
used->idx++; |
561 |
|
|
|
562 |
|
|
dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & |
563 |
|
|
VIOBLK_QUEUE_MASK; |
564 |
|
|
|
565 |
|
|
if (write_mem(q_gpa, vr, vr_sz)) { |
566 |
|
|
log_warnx("vioblk: error writing vio ring"); |
567 |
|
|
} |
568 |
|
|
break; |
569 |
|
|
case VIRTIO_BLK_T_OUT: |
570 |
|
|
secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; |
571 |
|
|
secdata_desc = &desc[secdata_desc_idx]; |
572 |
|
|
|
573 |
|
|
if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { |
574 |
|
|
log_warnx("wr vioblk: unchained vioblk data " |
575 |
|
|
"descriptor received (idx %d)", |
576 |
|
|
cmd_desc_idx); |
577 |
|
|
goto out; |
578 |
|
|
} |
579 |
|
|
|
580 |
|
|
if (secdata_desc->len > dev->max_xfer) { |
581 |
|
|
log_warnx("%s: invalid read size %d requested", |
582 |
|
|
__func__, secdata_desc->len); |
583 |
|
|
goto out; |
584 |
|
|
} |
585 |
|
|
|
586 |
|
|
secbias = 0; |
587 |
|
|
do { |
588 |
|
|
struct ioinfo *info; |
589 |
|
|
|
590 |
|
|
info = vioblk_start_write(dev, cmd.sector + secbias, |
591 |
|
|
secdata_desc->addr, secdata_desc->len); |
592 |
|
|
|
593 |
|
|
if (info == NULL) { |
594 |
|
|
log_warnx("wr vioblk: can't read " |
595 |
|
|
"sector data @ 0x%llx", |
596 |
|
|
secdata_desc->addr); |
597 |
|
|
dump_descriptor_chain(desc, |
598 |
|
|
cmd_desc_idx); |
599 |
|
|
goto out; |
600 |
|
|
} |
601 |
|
|
|
602 |
|
|
if (vioblk_finish_write(info)) { |
603 |
|
|
log_warnx("wr vioblk: disk write " |
604 |
|
|
"error"); |
605 |
|
|
vioblk_free_info(info); |
606 |
|
|
goto out; |
607 |
|
|
} |
608 |
|
|
|
609 |
|
|
vioblk_free_info(info); |
610 |
|
|
|
611 |
|
|
secbias += secdata_desc->len / |
612 |
|
|
VIRTIO_BLK_SECTOR_SIZE; |
613 |
|
|
|
614 |
|
|
secdata_desc_idx = secdata_desc->next & |
615 |
|
|
VIOBLK_QUEUE_MASK; |
616 |
|
|
secdata_desc = &desc[secdata_desc_idx]; |
617 |
|
|
} while (secdata_desc->flags & VRING_DESC_F_NEXT); |
618 |
|
|
|
619 |
|
|
ds_desc_idx = secdata_desc_idx; |
620 |
|
|
ds_desc = secdata_desc; |
621 |
|
|
|
622 |
|
|
ds = VIRTIO_BLK_S_OK; |
623 |
|
|
if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { |
624 |
|
|
log_warnx("wr vioblk: can't write device " |
625 |
|
|
"status data @ 0x%llx", ds_desc->addr); |
626 |
|
|
dump_descriptor_chain(desc, cmd_desc_idx); |
627 |
|
|
goto out; |
628 |
|
|
} |
629 |
|
|
|
630 |
|
|
ret = 1; |
631 |
|
|
dev->cfg.isr_status = 1; |
632 |
|
|
used->ring[used->idx & VIOBLK_QUEUE_MASK].id = |
633 |
|
|
cmd_desc_idx; |
634 |
|
|
used->ring[used->idx & VIOBLK_QUEUE_MASK].len = |
635 |
|
|
cmd_desc->len; |
636 |
|
|
used->idx++; |
637 |
|
|
|
638 |
|
|
dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & |
639 |
|
|
VIOBLK_QUEUE_MASK; |
640 |
|
|
if (write_mem(q_gpa, vr, vr_sz)) |
641 |
|
|
log_warnx("wr vioblk: error writing vio ring"); |
642 |
|
|
break; |
643 |
|
|
case VIRTIO_BLK_T_FLUSH: |
644 |
|
|
case VIRTIO_BLK_T_FLUSH_OUT: |
645 |
|
|
ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; |
646 |
|
|
ds_desc = &desc[ds_desc_idx]; |
647 |
|
|
|
648 |
|
|
ds = VIRTIO_BLK_S_OK; |
649 |
|
|
if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { |
650 |
|
|
log_warnx("fl vioblk: can't write device status " |
651 |
|
|
"data @ 0x%llx", ds_desc->addr); |
652 |
|
|
dump_descriptor_chain(desc, cmd_desc_idx); |
653 |
|
|
goto out; |
654 |
|
|
} |
655 |
|
|
|
656 |
|
|
ret = 1; |
657 |
|
|
dev->cfg.isr_status = 1; |
658 |
|
|
used->ring[used->idx & VIOBLK_QUEUE_MASK].id = |
659 |
|
|
cmd_desc_idx; |
660 |
|
|
used->ring[used->idx & VIOBLK_QUEUE_MASK].len = |
661 |
|
|
cmd_desc->len; |
662 |
|
|
used->idx++; |
663 |
|
|
|
664 |
|
|
dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & |
665 |
|
|
VIOBLK_QUEUE_MASK; |
666 |
|
|
if (write_mem(q_gpa, vr, vr_sz)) { |
667 |
|
|
log_warnx("fl vioblk: error writing vio ring"); |
668 |
|
|
} |
669 |
|
|
break; |
670 |
|
|
default: |
671 |
|
|
log_warnx("%s: unsupported command 0x%x", __func__, |
672 |
|
|
cmd.type); |
673 |
|
|
|
674 |
|
|
ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; |
675 |
|
|
ds_desc = &desc[ds_desc_idx]; |
676 |
|
|
|
677 |
|
|
ds = VIRTIO_BLK_S_UNSUPP; |
678 |
|
|
if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { |
679 |
|
|
log_warnx("%s: get id : can't write device " |
680 |
|
|
"status data @ 0x%llx", __func__, |
681 |
|
|
ds_desc->addr); |
682 |
|
|
dump_descriptor_chain(desc, cmd_desc_idx); |
683 |
|
|
goto out; |
684 |
|
|
} |
685 |
|
|
|
686 |
|
|
ret = 1; |
687 |
|
|
dev->cfg.isr_status = 1; |
688 |
|
|
used->ring[used->idx & VIOBLK_QUEUE_MASK].id = |
689 |
|
|
cmd_desc_idx; |
690 |
|
|
used->ring[used->idx & VIOBLK_QUEUE_MASK].len = |
691 |
|
|
cmd_desc->len; |
692 |
|
|
used->idx++; |
693 |
|
|
|
694 |
|
|
dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & |
695 |
|
|
VIOBLK_QUEUE_MASK; |
696 |
|
|
if (write_mem(q_gpa, vr, vr_sz)) { |
697 |
|
|
log_warnx("%s: get id : error writing vio ring", |
698 |
|
|
__func__); |
699 |
|
|
} |
700 |
|
|
break; |
701 |
|
|
} |
702 |
|
|
|
703 |
|
|
idx = (idx + 1) & VIOBLK_QUEUE_MASK; |
704 |
|
|
} |
705 |
|
|
out: |
706 |
|
|
free(vr); |
707 |
|
|
return (ret); |
708 |
|
|
} |
709 |
|
|
|
710 |
|
|
int |
711 |
|
|
virtio_blk_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, |
712 |
|
|
void *cookie, uint8_t sz) |
713 |
|
|
{ |
714 |
|
|
struct vioblk_dev *dev = (struct vioblk_dev *)cookie; |
715 |
|
|
|
716 |
|
|
*intr = 0xFF; |
717 |
|
|
|
718 |
|
|
|
719 |
|
|
if (dir == 0) { |
720 |
|
|
switch (reg) { |
721 |
|
|
case VIRTIO_CONFIG_DEVICE_FEATURES: |
722 |
|
|
case VIRTIO_CONFIG_QUEUE_SIZE: |
723 |
|
|
case VIRTIO_CONFIG_ISR_STATUS: |
724 |
|
|
log_warnx("%s: illegal write %x to %s", |
725 |
|
|
__progname, *data, virtio_reg_name(reg)); |
726 |
|
|
break; |
727 |
|
|
case VIRTIO_CONFIG_GUEST_FEATURES: |
728 |
|
|
dev->cfg.guest_feature = *data; |
729 |
|
|
break; |
730 |
|
|
case VIRTIO_CONFIG_QUEUE_ADDRESS: |
731 |
|
|
dev->cfg.queue_address = *data; |
732 |
|
|
vioblk_update_qa(dev); |
733 |
|
|
break; |
734 |
|
|
case VIRTIO_CONFIG_QUEUE_SELECT: |
735 |
|
|
dev->cfg.queue_select = *data; |
736 |
|
|
vioblk_update_qs(dev); |
737 |
|
|
break; |
738 |
|
|
case VIRTIO_CONFIG_QUEUE_NOTIFY: |
739 |
|
|
dev->cfg.queue_notify = *data; |
740 |
|
|
if (vioblk_notifyq(dev)) |
741 |
|
|
*intr = 1; |
742 |
|
|
break; |
743 |
|
|
case VIRTIO_CONFIG_DEVICE_STATUS: |
744 |
|
|
dev->cfg.device_status = *data; |
745 |
|
|
if (dev->cfg.device_status == 0) { |
746 |
|
|
log_debug("%s: device reset", __func__); |
747 |
|
|
dev->cfg.guest_feature = 0; |
748 |
|
|
dev->cfg.queue_address = 0; |
749 |
|
|
vioblk_update_qa(dev); |
750 |
|
|
dev->cfg.queue_size = 0; |
751 |
|
|
vioblk_update_qs(dev); |
752 |
|
|
dev->cfg.queue_select = 0; |
753 |
|
|
dev->cfg.queue_notify = 0; |
754 |
|
|
dev->cfg.isr_status = 0; |
755 |
|
|
dev->vq[0].last_avail = 0; |
756 |
|
|
} |
757 |
|
|
break; |
758 |
|
|
default: |
759 |
|
|
break; |
760 |
|
|
} |
761 |
|
|
} else { |
762 |
|
|
switch (reg) { |
763 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: |
764 |
|
|
switch (sz) { |
765 |
|
|
case 4: |
766 |
|
|
*data = (uint32_t)(dev->sz); |
767 |
|
|
break; |
768 |
|
|
case 2: |
769 |
|
|
*data &= 0xFFFF0000; |
770 |
|
|
*data |= (uint32_t)(dev->sz) & 0xFFFF; |
771 |
|
|
break; |
772 |
|
|
case 1: |
773 |
|
|
*data &= 0xFFFFFF00; |
774 |
|
|
*data |= (uint32_t)(dev->sz) & 0xFF; |
775 |
|
|
break; |
776 |
|
|
} |
777 |
|
|
/* XXX handle invalid sz */ |
778 |
|
|
break; |
779 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: |
780 |
|
|
if (sz == 1) { |
781 |
|
|
*data &= 0xFFFFFF00; |
782 |
|
|
*data |= (uint32_t)(dev->sz >> 8) & 0xFF; |
783 |
|
|
} |
784 |
|
|
/* XXX handle invalid sz */ |
785 |
|
|
break; |
786 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: |
787 |
|
|
if (sz == 1) { |
788 |
|
|
*data &= 0xFFFFFF00; |
789 |
|
|
*data |= (uint32_t)(dev->sz >> 16) & 0xFF; |
790 |
|
|
} else if (sz == 2) { |
791 |
|
|
*data &= 0xFFFF0000; |
792 |
|
|
*data |= (uint32_t)(dev->sz >> 16) & 0xFFFF; |
793 |
|
|
} |
794 |
|
|
/* XXX handle invalid sz */ |
795 |
|
|
break; |
796 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: |
797 |
|
|
if (sz == 1) { |
798 |
|
|
*data &= 0xFFFFFF00; |
799 |
|
|
*data |= (uint32_t)(dev->sz >> 24) & 0xFF; |
800 |
|
|
} |
801 |
|
|
/* XXX handle invalid sz */ |
802 |
|
|
break; |
803 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: |
804 |
|
|
switch (sz) { |
805 |
|
|
case 4: |
806 |
|
|
*data = (uint32_t)(dev->sz >> 32); |
807 |
|
|
break; |
808 |
|
|
case 2: |
809 |
|
|
*data &= 0xFFFF0000; |
810 |
|
|
*data |= (uint32_t)(dev->sz >> 32) & 0xFFFF; |
811 |
|
|
break; |
812 |
|
|
case 1: |
813 |
|
|
*data &= 0xFFFFFF00; |
814 |
|
|
*data |= (uint32_t)(dev->sz >> 32) & 0xFF; |
815 |
|
|
break; |
816 |
|
|
} |
817 |
|
|
/* XXX handle invalid sz */ |
818 |
|
|
break; |
819 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: |
820 |
|
|
if (sz == 1) { |
821 |
|
|
*data &= 0xFFFFFF00; |
822 |
|
|
*data |= (uint32_t)(dev->sz >> 40) & 0xFF; |
823 |
|
|
} |
824 |
|
|
/* XXX handle invalid sz */ |
825 |
|
|
break; |
826 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 6: |
827 |
|
|
if (sz == 1) { |
828 |
|
|
*data &= 0xFFFFFF00; |
829 |
|
|
*data |= (uint32_t)(dev->sz >> 48) & 0xFF; |
830 |
|
|
} else if (sz == 2) { |
831 |
|
|
*data &= 0xFFFF0000; |
832 |
|
|
*data |= (uint32_t)(dev->sz >> 48) & 0xFFFF; |
833 |
|
|
} |
834 |
|
|
/* XXX handle invalid sz */ |
835 |
|
|
break; |
836 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 7: |
837 |
|
|
if (sz == 1) { |
838 |
|
|
*data &= 0xFFFFFF00; |
839 |
|
|
*data |= (uint32_t)(dev->sz >> 56) & 0xFF; |
840 |
|
|
} |
841 |
|
|
/* XXX handle invalid sz */ |
842 |
|
|
break; |
843 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: |
844 |
|
|
switch (sz) { |
845 |
|
|
case 4: |
846 |
|
|
*data = (uint32_t)(dev->max_xfer); |
847 |
|
|
break; |
848 |
|
|
case 2: |
849 |
|
|
*data &= 0xFFFF0000; |
850 |
|
|
*data |= (uint32_t)(dev->max_xfer) & 0xFFFF; |
851 |
|
|
break; |
852 |
|
|
case 1: |
853 |
|
|
*data &= 0xFFFFFF00; |
854 |
|
|
*data |= (uint32_t)(dev->max_xfer) & 0xFF; |
855 |
|
|
break; |
856 |
|
|
} |
857 |
|
|
/* XXX handle invalid sz */ |
858 |
|
|
break; |
859 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 9: |
860 |
|
|
if (sz == 1) { |
861 |
|
|
*data &= 0xFFFFFF00; |
862 |
|
|
*data |= (uint32_t)(dev->max_xfer >> 8) & 0xFF; |
863 |
|
|
} |
864 |
|
|
/* XXX handle invalid sz */ |
865 |
|
|
break; |
866 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 10: |
867 |
|
|
if (sz == 1) { |
868 |
|
|
*data &= 0xFFFFFF00; |
869 |
|
|
*data |= (uint32_t)(dev->max_xfer >> 16) & 0xFF; |
870 |
|
|
} else if (sz == 2) { |
871 |
|
|
*data &= 0xFFFF0000; |
872 |
|
|
*data |= (uint32_t)(dev->max_xfer >> 16) & 0xFFFF; |
873 |
|
|
} |
874 |
|
|
/* XXX handle invalid sz */ |
875 |
|
|
break; |
876 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 11: |
877 |
|
|
if (sz == 1) { |
878 |
|
|
*data &= 0xFFFFFF00; |
879 |
|
|
*data |= (uint32_t)(dev->max_xfer >> 24) & 0xFF; |
880 |
|
|
} |
881 |
|
|
/* XXX handle invalid sz */ |
882 |
|
|
break; |
883 |
|
|
case VIRTIO_CONFIG_DEVICE_FEATURES: |
884 |
|
|
*data = dev->cfg.device_feature; |
885 |
|
|
break; |
886 |
|
|
case VIRTIO_CONFIG_GUEST_FEATURES: |
887 |
|
|
*data = dev->cfg.guest_feature; |
888 |
|
|
break; |
889 |
|
|
case VIRTIO_CONFIG_QUEUE_ADDRESS: |
890 |
|
|
*data = dev->cfg.queue_address; |
891 |
|
|
break; |
892 |
|
|
case VIRTIO_CONFIG_QUEUE_SIZE: |
893 |
|
|
if (sz == 4) |
894 |
|
|
*data = dev->cfg.queue_size; |
895 |
|
|
else if (sz == 2) { |
896 |
|
|
*data &= 0xFFFF0000; |
897 |
|
|
*data |= (uint16_t)dev->cfg.queue_size; |
898 |
|
|
} else if (sz == 1) { |
899 |
|
|
*data &= 0xFFFFFF00; |
900 |
|
|
*data |= (uint8_t)dev->cfg.queue_size; |
901 |
|
|
} |
902 |
|
|
break; |
903 |
|
|
case VIRTIO_CONFIG_QUEUE_SELECT: |
904 |
|
|
*data = dev->cfg.queue_select; |
905 |
|
|
break; |
906 |
|
|
case VIRTIO_CONFIG_QUEUE_NOTIFY: |
907 |
|
|
*data = dev->cfg.queue_notify; |
908 |
|
|
break; |
909 |
|
|
case VIRTIO_CONFIG_DEVICE_STATUS: |
910 |
|
|
if (sz == 4) |
911 |
|
|
*data = dev->cfg.device_status; |
912 |
|
|
else if (sz == 2) { |
913 |
|
|
*data &= 0xFFFF0000; |
914 |
|
|
*data |= (uint16_t)dev->cfg.device_status; |
915 |
|
|
} else if (sz == 1) { |
916 |
|
|
*data &= 0xFFFFFF00; |
917 |
|
|
*data |= (uint8_t)dev->cfg.device_status; |
918 |
|
|
} |
919 |
|
|
break; |
920 |
|
|
case VIRTIO_CONFIG_ISR_STATUS: |
921 |
|
|
*data = dev->cfg.isr_status; |
922 |
|
|
dev->cfg.isr_status = 0; |
923 |
|
|
break; |
924 |
|
|
} |
925 |
|
|
} |
926 |
|
|
return (0); |
927 |
|
|
} |
928 |
|
|
|
929 |
|
|
int |
930 |
|
|
virtio_net_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, |
931 |
|
|
void *cookie, uint8_t sz) |
932 |
|
|
{ |
933 |
|
|
struct vionet_dev *dev = (struct vionet_dev *)cookie; |
934 |
|
|
|
935 |
|
|
*intr = 0xFF; |
936 |
|
|
mutex_lock(&dev->mutex); |
937 |
|
|
|
938 |
|
|
if (dir == 0) { |
939 |
|
|
switch (reg) { |
940 |
|
|
case VIRTIO_CONFIG_DEVICE_FEATURES: |
941 |
|
|
case VIRTIO_CONFIG_QUEUE_SIZE: |
942 |
|
|
case VIRTIO_CONFIG_ISR_STATUS: |
943 |
|
|
log_warnx("%s: illegal write %x to %s", |
944 |
|
|
__progname, *data, virtio_reg_name(reg)); |
945 |
|
|
break; |
946 |
|
|
case VIRTIO_CONFIG_GUEST_FEATURES: |
947 |
|
|
dev->cfg.guest_feature = *data; |
948 |
|
|
break; |
949 |
|
|
case VIRTIO_CONFIG_QUEUE_ADDRESS: |
950 |
|
|
dev->cfg.queue_address = *data; |
951 |
|
|
vionet_update_qa(dev); |
952 |
|
|
break; |
953 |
|
|
case VIRTIO_CONFIG_QUEUE_SELECT: |
954 |
|
|
dev->cfg.queue_select = *data; |
955 |
|
|
vionet_update_qs(dev); |
956 |
|
|
break; |
957 |
|
|
case VIRTIO_CONFIG_QUEUE_NOTIFY: |
958 |
|
|
dev->cfg.queue_notify = *data; |
959 |
|
|
if (vionet_notifyq(dev)) |
960 |
|
|
*intr = 1; |
961 |
|
|
break; |
962 |
|
|
case VIRTIO_CONFIG_DEVICE_STATUS: |
963 |
|
|
dev->cfg.device_status = *data; |
964 |
|
|
if (dev->cfg.device_status == 0) { |
965 |
|
|
log_debug("%s: device reset", __func__); |
966 |
|
|
dev->cfg.guest_feature = 0; |
967 |
|
|
dev->cfg.queue_address = 0; |
968 |
|
|
vionet_update_qa(dev); |
969 |
|
|
dev->cfg.queue_size = 0; |
970 |
|
|
vionet_update_qs(dev); |
971 |
|
|
dev->cfg.queue_select = 0; |
972 |
|
|
dev->cfg.queue_notify = 0; |
973 |
|
|
dev->cfg.isr_status = 0; |
974 |
|
|
dev->vq[0].last_avail = 0; |
975 |
|
|
dev->vq[0].notified_avail = 0; |
976 |
|
|
dev->vq[1].last_avail = 0; |
977 |
|
|
dev->vq[1].notified_avail = 0; |
978 |
|
|
} |
979 |
|
|
break; |
980 |
|
|
default: |
981 |
|
|
break; |
982 |
|
|
} |
983 |
|
|
} else { |
984 |
|
|
switch (reg) { |
985 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: |
986 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: |
987 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: |
988 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: |
989 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: |
990 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: |
991 |
|
|
*data = dev->mac[reg - |
992 |
|
|
VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI]; |
993 |
|
|
break; |
994 |
|
|
case VIRTIO_CONFIG_DEVICE_FEATURES: |
995 |
|
|
*data = dev->cfg.device_feature; |
996 |
|
|
break; |
997 |
|
|
case VIRTIO_CONFIG_GUEST_FEATURES: |
998 |
|
|
*data = dev->cfg.guest_feature; |
999 |
|
|
break; |
1000 |
|
|
case VIRTIO_CONFIG_QUEUE_ADDRESS: |
1001 |
|
|
*data = dev->cfg.queue_address; |
1002 |
|
|
break; |
1003 |
|
|
case VIRTIO_CONFIG_QUEUE_SIZE: |
1004 |
|
|
*data = dev->cfg.queue_size; |
1005 |
|
|
break; |
1006 |
|
|
case VIRTIO_CONFIG_QUEUE_SELECT: |
1007 |
|
|
*data = dev->cfg.queue_select; |
1008 |
|
|
break; |
1009 |
|
|
case VIRTIO_CONFIG_QUEUE_NOTIFY: |
1010 |
|
|
*data = dev->cfg.queue_notify; |
1011 |
|
|
break; |
1012 |
|
|
case VIRTIO_CONFIG_DEVICE_STATUS: |
1013 |
|
|
*data = dev->cfg.device_status; |
1014 |
|
|
break; |
1015 |
|
|
case VIRTIO_CONFIG_ISR_STATUS: |
1016 |
|
|
*data = dev->cfg.isr_status; |
1017 |
|
|
dev->cfg.isr_status = 0; |
1018 |
|
|
break; |
1019 |
|
|
} |
1020 |
|
|
} |
1021 |
|
|
|
1022 |
|
|
mutex_unlock(&dev->mutex); |
1023 |
|
|
return (0); |
1024 |
|
|
} |
1025 |
|
|
|
1026 |
|
|
/* |
1027 |
|
|
* Must be called with dev->mutex acquired. |
1028 |
|
|
*/ |
1029 |
|
|
void |
1030 |
|
|
vionet_update_qa(struct vionet_dev *dev) |
1031 |
|
|
{ |
1032 |
|
|
/* Invalid queue? */ |
1033 |
|
|
if (dev->cfg.queue_select > 1) |
1034 |
|
|
return; |
1035 |
|
|
|
1036 |
|
|
dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address; |
1037 |
|
|
} |
1038 |
|
|
|
1039 |
|
|
/* |
1040 |
|
|
* Must be called with dev->mutex acquired. |
1041 |
|
|
*/ |
1042 |
|
|
void |
1043 |
|
|
vionet_update_qs(struct vionet_dev *dev) |
1044 |
|
|
{ |
1045 |
|
|
/* Invalid queue? */ |
1046 |
|
|
if (dev->cfg.queue_select > 1) { |
1047 |
|
|
dev->cfg.queue_size = 0; |
1048 |
|
|
return; |
1049 |
|
|
} |
1050 |
|
|
|
1051 |
|
|
/* Update queue address/size based on queue select */ |
1052 |
|
|
dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa; |
1053 |
|
|
dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs; |
1054 |
|
|
} |
1055 |
|
|
|
1056 |
|
|
/* |
1057 |
|
|
* Must be called with dev->mutex acquired. |
1058 |
|
|
*/ |
1059 |
|
|
int |
1060 |
|
|
vionet_enq_rx(struct vionet_dev *dev, char *pkt, ssize_t sz, int *spc) |
1061 |
|
|
{ |
1062 |
|
|
uint64_t q_gpa; |
1063 |
|
|
uint32_t vr_sz; |
1064 |
|
|
uint16_t idx, pkt_desc_idx, hdr_desc_idx; |
1065 |
|
|
ptrdiff_t off; |
1066 |
|
|
int ret; |
1067 |
|
|
char *vr; |
1068 |
|
|
struct vring_desc *desc, *pkt_desc, *hdr_desc; |
1069 |
|
|
struct vring_avail *avail; |
1070 |
|
|
struct vring_used *used; |
1071 |
|
|
struct vring_used_elem *ue; |
1072 |
|
|
|
1073 |
|
|
ret = 0; |
1074 |
|
|
|
1075 |
|
|
if (!(dev->cfg.device_status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK)) |
1076 |
|
|
return ret; |
1077 |
|
|
|
1078 |
|
|
vr_sz = vring_size(VIONET_QUEUE_SIZE); |
1079 |
|
|
q_gpa = dev->vq[0].qa; |
1080 |
|
|
q_gpa = q_gpa * VIRTIO_PAGE_SIZE; |
1081 |
|
|
|
1082 |
|
|
vr = calloc(1, vr_sz); |
1083 |
|
|
if (vr == NULL) { |
1084 |
|
|
log_warn("rx enq: calloc error getting vionet ring"); |
1085 |
|
|
return (0); |
1086 |
|
|
} |
1087 |
|
|
|
1088 |
|
|
if (read_mem(q_gpa, vr, vr_sz)) { |
1089 |
|
|
log_warnx("rx enq: error reading gpa 0x%llx", q_gpa); |
1090 |
|
|
goto out; |
1091 |
|
|
} |
1092 |
|
|
|
1093 |
|
|
/* Compute offsets in ring of descriptors, avail ring, and used ring */ |
1094 |
|
|
desc = (struct vring_desc *)(vr); |
1095 |
|
|
avail = (struct vring_avail *)(vr + dev->vq[0].vq_availoffset); |
1096 |
|
|
used = (struct vring_used *)(vr + dev->vq[0].vq_usedoffset); |
1097 |
|
|
|
1098 |
|
|
idx = dev->vq[0].last_avail & VIONET_QUEUE_MASK; |
1099 |
|
|
|
1100 |
|
|
if ((dev->vq[0].notified_avail & VIONET_QUEUE_MASK) == idx) { |
1101 |
|
|
log_debug("vionet queue notify - no space, dropping packet"); |
1102 |
|
|
goto out; |
1103 |
|
|
} |
1104 |
|
|
|
1105 |
|
|
hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK; |
1106 |
|
|
hdr_desc = &desc[hdr_desc_idx]; |
1107 |
|
|
|
1108 |
|
|
pkt_desc_idx = hdr_desc->next & VIONET_QUEUE_MASK; |
1109 |
|
|
pkt_desc = &desc[pkt_desc_idx]; |
1110 |
|
|
|
1111 |
|
|
/* must be not readable */ |
1112 |
|
|
if ((pkt_desc->flags & VRING_DESC_F_WRITE) == 0) { |
1113 |
|
|
log_warnx("unexpected readable rx descriptor %d", |
1114 |
|
|
pkt_desc_idx); |
1115 |
|
|
goto out; |
1116 |
|
|
} |
1117 |
|
|
|
1118 |
|
|
/* Write packet to descriptor ring */ |
1119 |
|
|
if (write_mem(pkt_desc->addr, pkt, sz)) { |
1120 |
|
|
log_warnx("vionet: rx enq packet write_mem error @ " |
1121 |
|
|
"0x%llx", pkt_desc->addr); |
1122 |
|
|
goto out; |
1123 |
|
|
} |
1124 |
|
|
|
1125 |
|
|
ret = 1; |
1126 |
|
|
dev->cfg.isr_status = 1; |
1127 |
|
|
ue = &used->ring[used->idx & VIONET_QUEUE_MASK]; |
1128 |
|
|
ue->id = hdr_desc_idx; |
1129 |
|
|
ue->len = hdr_desc->len + sz; |
1130 |
|
|
used->idx++; |
1131 |
|
|
dev->vq[0].last_avail = (dev->vq[0].last_avail + 1); |
1132 |
|
|
*spc = dev->vq[0].notified_avail - dev->vq[0].last_avail; |
1133 |
|
|
|
1134 |
|
|
off = (char *)ue - vr; |
1135 |
|
|
if (write_mem(q_gpa + off, ue, sizeof *ue)) |
1136 |
|
|
log_warnx("vionet: error writing vio ring"); |
1137 |
|
|
else { |
1138 |
|
|
off = (char *)&used->idx - vr; |
1139 |
|
|
if (write_mem(q_gpa + off, &used->idx, sizeof used->idx)) |
1140 |
|
|
log_warnx("vionet: error writing vio ring"); |
1141 |
|
|
} |
1142 |
|
|
out: |
1143 |
|
|
free(vr); |
1144 |
|
|
return (ret); |
1145 |
|
|
} |
1146 |
|
|
|
1147 |
|
|
/* |
1148 |
|
|
* vionet_rx |
1149 |
|
|
* |
1150 |
|
|
* Enqueue data that was received on a tap file descriptor |
1151 |
|
|
* to the vionet device queue. |
1152 |
|
|
* |
1153 |
|
|
* Must be called with dev->mutex acquired. |
1154 |
|
|
*/ |
1155 |
|
|
static int |
1156 |
|
|
vionet_rx(struct vionet_dev *dev) |
1157 |
|
|
{ |
1158 |
|
|
char buf[PAGE_SIZE]; |
1159 |
|
|
int hasdata, num_enq = 0, spc = 0; |
1160 |
|
|
struct ether_header *eh; |
1161 |
|
|
ssize_t sz; |
1162 |
|
|
|
1163 |
|
|
do { |
1164 |
|
|
sz = read(dev->fd, buf, sizeof buf); |
1165 |
|
|
if (sz == -1) { |
1166 |
|
|
/* |
1167 |
|
|
* If we get EAGAIN, No data is currently available. |
1168 |
|
|
* Do not treat this as an error. |
1169 |
|
|
*/ |
1170 |
|
|
if (errno != EAGAIN) |
1171 |
|
|
log_warn("unexpected read error on vionet " |
1172 |
|
|
"device"); |
1173 |
|
|
} else if (sz != 0) { |
1174 |
|
|
eh = (struct ether_header *)buf; |
1175 |
|
|
if (!dev->lockedmac || sz < ETHER_HDR_LEN || |
1176 |
|
|
ETHER_IS_MULTICAST(eh->ether_dhost) || |
1177 |
|
|
memcmp(eh->ether_dhost, dev->mac, |
1178 |
|
|
sizeof(eh->ether_dhost)) == 0) |
1179 |
|
|
num_enq += vionet_enq_rx(dev, buf, sz, &spc); |
1180 |
|
|
} else if (sz == 0) { |
1181 |
|
|
log_debug("process_rx: no data"); |
1182 |
|
|
hasdata = 0; |
1183 |
|
|
break; |
1184 |
|
|
} |
1185 |
|
|
|
1186 |
|
|
hasdata = fd_hasdata(dev->fd); |
1187 |
|
|
} while (spc && hasdata); |
1188 |
|
|
|
1189 |
|
|
dev->rx_pending = hasdata; |
1190 |
|
|
return (num_enq); |
1191 |
|
|
} |
1192 |
|
|
|
1193 |
|
|
/* |
1194 |
|
|
* vionet_rx_event |
1195 |
|
|
* |
1196 |
|
|
* Called from the event handling thread when new data can be |
1197 |
|
|
* received on the tap fd of a vionet device. |
1198 |
|
|
*/ |
1199 |
|
|
static void |
1200 |
|
|
vionet_rx_event(int fd, short kind, void *arg) |
1201 |
|
|
{ |
1202 |
|
|
struct vionet_dev *dev = arg; |
1203 |
|
|
|
1204 |
|
|
mutex_lock(&dev->mutex); |
1205 |
|
|
|
1206 |
|
|
/* |
1207 |
|
|
* We already have other data pending to be received. The data that |
1208 |
|
|
* has become available now will be enqueued to the vionet_dev |
1209 |
|
|
* later. |
1210 |
|
|
*/ |
1211 |
|
|
if (dev->rx_pending) { |
1212 |
|
|
mutex_unlock(&dev->mutex); |
1213 |
|
|
return; |
1214 |
|
|
} |
1215 |
|
|
|
1216 |
|
|
if (vionet_rx(dev) > 0) { |
1217 |
|
|
/* XXX: vcpu_id */ |
1218 |
|
|
vcpu_assert_pic_irq(dev->vm_id, 0, dev->irq); |
1219 |
|
|
} |
1220 |
|
|
|
1221 |
|
|
mutex_unlock(&dev->mutex); |
1222 |
|
|
} |
1223 |
|
|
|
1224 |
|
|
/* |
1225 |
|
|
* vionet_process_rx |
1226 |
|
|
* |
1227 |
|
|
* Processes any remaining pending receivable data for a vionet device. |
1228 |
|
|
* Called on VCPU exit. Although we poll on the tap file descriptor of |
1229 |
|
|
* a vionet_dev in a separate thread, this function still needs to be |
1230 |
|
|
* called on VCPU exit: it can happen that not all data fits into the |
1231 |
|
|
* receive queue of the vionet_dev immediately. So any outstanding data |
1232 |
|
|
* is handled here. |
1233 |
|
|
* |
1234 |
|
|
* Parameters: |
1235 |
|
|
* vm_id: VM ID of the VM for which to process vionet events |
1236 |
|
|
*/ |
1237 |
|
|
void |
1238 |
|
|
vionet_process_rx(uint32_t vm_id) |
1239 |
|
|
{ |
1240 |
|
|
int i; |
1241 |
|
|
|
1242 |
|
|
for (i = 0 ; i < nr_vionet; i++) { |
1243 |
|
|
mutex_lock(&vionet[i].mutex); |
1244 |
|
|
if (!vionet[i].rx_added) { |
1245 |
|
|
mutex_unlock(&vionet[i].mutex); |
1246 |
|
|
continue; |
1247 |
|
|
} |
1248 |
|
|
|
1249 |
|
|
if (vionet[i].rx_pending) { |
1250 |
|
|
if (vionet_rx(&vionet[i])) { |
1251 |
|
|
vcpu_assert_pic_irq(vm_id, 0, vionet[i].irq); |
1252 |
|
|
} |
1253 |
|
|
} |
1254 |
|
|
mutex_unlock(&vionet[i].mutex); |
1255 |
|
|
} |
1256 |
|
|
} |
1257 |
|
|
|
1258 |
|
|
/* |
1259 |
|
|
* Must be called with dev->mutex acquired. |
1260 |
|
|
*/ |
1261 |
|
|
void |
1262 |
|
|
vionet_notify_rx(struct vionet_dev *dev) |
1263 |
|
|
{ |
1264 |
|
|
uint64_t q_gpa; |
1265 |
|
|
uint32_t vr_sz; |
1266 |
|
|
char *vr; |
1267 |
|
|
struct vring_avail *avail; |
1268 |
|
|
|
1269 |
|
|
vr_sz = vring_size(VIONET_QUEUE_SIZE); |
1270 |
|
|
q_gpa = dev->vq[dev->cfg.queue_notify].qa; |
1271 |
|
|
q_gpa = q_gpa * VIRTIO_PAGE_SIZE; |
1272 |
|
|
|
1273 |
|
|
vr = malloc(vr_sz); |
1274 |
|
|
if (vr == NULL) { |
1275 |
|
|
log_warn("malloc error getting vionet ring"); |
1276 |
|
|
return; |
1277 |
|
|
} |
1278 |
|
|
|
1279 |
|
|
if (read_mem(q_gpa, vr, vr_sz)) { |
1280 |
|
|
log_warnx("error reading gpa 0x%llx", q_gpa); |
1281 |
|
|
free(vr); |
1282 |
|
|
return; |
1283 |
|
|
} |
1284 |
|
|
|
1285 |
|
|
/* Compute offset into avail ring */ |
1286 |
|
|
avail = (struct vring_avail *)(vr + |
1287 |
|
|
dev->vq[dev->cfg.queue_notify].vq_availoffset); |
1288 |
|
|
|
1289 |
|
|
dev->rx_added = 1; |
1290 |
|
|
dev->vq[0].notified_avail = avail->idx; |
1291 |
|
|
|
1292 |
|
|
free(vr); |
1293 |
|
|
} |
1294 |
|
|
|
1295 |
|
|
/* |
1296 |
|
|
* Must be called with dev->mutex acquired. |
1297 |
|
|
* |
1298 |
|
|
* XXX cant trust ring data from VM, be extra cautious. |
1299 |
|
|
* XXX advertise link status to guest |
1300 |
|
|
*/ |
1301 |
|
|
int |
1302 |
|
|
vionet_notifyq(struct vionet_dev *dev) |
1303 |
|
|
{ |
1304 |
|
|
uint64_t q_gpa; |
1305 |
|
|
uint32_t vr_sz; |
1306 |
|
|
uint16_t idx, pkt_desc_idx, hdr_desc_idx, dxx; |
1307 |
|
|
size_t pktsz; |
1308 |
|
|
ssize_t dhcpsz; |
1309 |
|
|
int ret, num_enq, ofs, spc; |
1310 |
|
|
char *vr, *pkt, *dhcppkt; |
1311 |
|
|
struct vring_desc *desc, *pkt_desc, *hdr_desc; |
1312 |
|
|
struct vring_avail *avail; |
1313 |
|
|
struct vring_used *used; |
1314 |
|
|
struct ether_header *eh; |
1315 |
|
|
|
1316 |
|
|
vr = pkt = dhcppkt = NULL; |
1317 |
|
|
ret = spc = 0; |
1318 |
|
|
dhcpsz = 0; |
1319 |
|
|
|
1320 |
|
|
/* Invalid queue? */ |
1321 |
|
|
if (dev->cfg.queue_notify != 1) { |
1322 |
|
|
vionet_notify_rx(dev); |
1323 |
|
|
goto out; |
1324 |
|
|
} |
1325 |
|
|
|
1326 |
|
|
vr_sz = vring_size(VIONET_QUEUE_SIZE); |
1327 |
|
|
q_gpa = dev->vq[dev->cfg.queue_notify].qa; |
1328 |
|
|
q_gpa = q_gpa * VIRTIO_PAGE_SIZE; |
1329 |
|
|
|
1330 |
|
|
vr = calloc(1, vr_sz); |
1331 |
|
|
if (vr == NULL) { |
1332 |
|
|
log_warn("calloc error getting vionet ring"); |
1333 |
|
|
goto out; |
1334 |
|
|
} |
1335 |
|
|
|
1336 |
|
|
if (read_mem(q_gpa, vr, vr_sz)) { |
1337 |
|
|
log_warnx("error reading gpa 0x%llx", q_gpa); |
1338 |
|
|
goto out; |
1339 |
|
|
} |
1340 |
|
|
|
1341 |
|
|
/* Compute offsets in ring of descriptors, avail ring, and used ring */ |
1342 |
|
|
desc = (struct vring_desc *)(vr); |
1343 |
|
|
avail = (struct vring_avail *)(vr + |
1344 |
|
|
dev->vq[dev->cfg.queue_notify].vq_availoffset); |
1345 |
|
|
used = (struct vring_used *)(vr + |
1346 |
|
|
dev->vq[dev->cfg.queue_notify].vq_usedoffset); |
1347 |
|
|
|
1348 |
|
|
num_enq = 0; |
1349 |
|
|
|
1350 |
|
|
idx = dev->vq[dev->cfg.queue_notify].last_avail & VIONET_QUEUE_MASK; |
1351 |
|
|
|
1352 |
|
|
if ((avail->idx & VIONET_QUEUE_MASK) == idx) { |
1353 |
|
|
log_warnx("vionet tx queue notify - nothing to do?"); |
1354 |
|
|
goto out; |
1355 |
|
|
} |
1356 |
|
|
|
1357 |
|
|
while ((avail->idx & VIONET_QUEUE_MASK) != idx) { |
1358 |
|
|
hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK; |
1359 |
|
|
hdr_desc = &desc[hdr_desc_idx]; |
1360 |
|
|
pktsz = 0; |
1361 |
|
|
|
1362 |
|
|
dxx = hdr_desc_idx; |
1363 |
|
|
do { |
1364 |
|
|
pktsz += desc[dxx].len; |
1365 |
|
|
dxx = desc[dxx].next; |
1366 |
|
|
} while (desc[dxx].flags & VRING_DESC_F_NEXT); |
1367 |
|
|
|
1368 |
|
|
pktsz += desc[dxx].len; |
1369 |
|
|
|
1370 |
|
|
/* Remove virtio header descriptor len */ |
1371 |
|
|
pktsz -= hdr_desc->len; |
1372 |
|
|
|
1373 |
|
|
/* |
1374 |
|
|
* XXX check sanity pktsz |
1375 |
|
|
* XXX too long and > PAGE_SIZE checks |
1376 |
|
|
* (PAGE_SIZE can be relaxed to 16384 later) |
1377 |
|
|
*/ |
1378 |
|
|
pkt = malloc(pktsz); |
1379 |
|
|
if (pkt == NULL) { |
1380 |
|
|
log_warn("malloc error alloc packet buf"); |
1381 |
|
|
goto out; |
1382 |
|
|
} |
1383 |
|
|
|
1384 |
|
|
ofs = 0; |
1385 |
|
|
pkt_desc_idx = hdr_desc->next & VIONET_QUEUE_MASK; |
1386 |
|
|
pkt_desc = &desc[pkt_desc_idx]; |
1387 |
|
|
|
1388 |
|
|
while (pkt_desc->flags & VRING_DESC_F_NEXT) { |
1389 |
|
|
/* must be not writable */ |
1390 |
|
|
if (pkt_desc->flags & VRING_DESC_F_WRITE) { |
1391 |
|
|
log_warnx("unexpected writable tx desc " |
1392 |
|
|
"%d", pkt_desc_idx); |
1393 |
|
|
goto out; |
1394 |
|
|
} |
1395 |
|
|
|
1396 |
|
|
/* Read packet from descriptor ring */ |
1397 |
|
|
if (read_mem(pkt_desc->addr, pkt + ofs, |
1398 |
|
|
pkt_desc->len)) { |
1399 |
|
|
log_warnx("vionet: packet read_mem error " |
1400 |
|
|
"@ 0x%llx", pkt_desc->addr); |
1401 |
|
|
goto out; |
1402 |
|
|
} |
1403 |
|
|
|
1404 |
|
|
ofs += pkt_desc->len; |
1405 |
|
|
pkt_desc_idx = pkt_desc->next & VIONET_QUEUE_MASK; |
1406 |
|
|
pkt_desc = &desc[pkt_desc_idx]; |
1407 |
|
|
} |
1408 |
|
|
|
1409 |
|
|
/* Now handle tail descriptor - must be not writable */ |
1410 |
|
|
if (pkt_desc->flags & VRING_DESC_F_WRITE) { |
1411 |
|
|
log_warnx("unexpected writable tx descriptor %d", |
1412 |
|
|
pkt_desc_idx); |
1413 |
|
|
goto out; |
1414 |
|
|
} |
1415 |
|
|
|
1416 |
|
|
/* Read packet from descriptor ring */ |
1417 |
|
|
if (read_mem(pkt_desc->addr, pkt + ofs, |
1418 |
|
|
pkt_desc->len)) { |
1419 |
|
|
log_warnx("vionet: packet read_mem error @ " |
1420 |
|
|
"0x%llx", pkt_desc->addr); |
1421 |
|
|
goto out; |
1422 |
|
|
} |
1423 |
|
|
|
1424 |
|
|
/* reject other source addresses */ |
1425 |
|
|
if (dev->lockedmac && pktsz >= ETHER_HDR_LEN && |
1426 |
|
|
(eh = (struct ether_header *)pkt) && |
1427 |
|
|
memcmp(eh->ether_shost, dev->mac, |
1428 |
|
|
sizeof(eh->ether_shost)) != 0) |
1429 |
|
|
log_debug("vionet: wrong source address %s for vm %d", |
1430 |
|
|
ether_ntoa((struct ether_addr *) |
1431 |
|
|
eh->ether_shost), dev->vm_id); |
1432 |
|
|
else if (dev->local && dhcpsz == 0 && |
1433 |
|
|
(dhcpsz = dhcp_request(dev, pkt, pktsz, &dhcppkt)) != -1) { |
1434 |
|
|
log_debug("vionet: dhcp request," |
1435 |
|
|
" local response size %zd", dhcpsz); |
1436 |
|
|
|
1437 |
|
|
/* XXX signed vs unsigned here, funky cast */ |
1438 |
|
|
} else if (write(dev->fd, pkt, pktsz) != (int)pktsz) { |
1439 |
|
|
log_warnx("vionet: tx failed writing to tap: " |
1440 |
|
|
"%d", errno); |
1441 |
|
|
goto out; |
1442 |
|
|
} |
1443 |
|
|
|
1444 |
|
|
ret = 1; |
1445 |
|
|
dev->cfg.isr_status = 1; |
1446 |
|
|
used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_desc_idx; |
1447 |
|
|
used->ring[used->idx & VIONET_QUEUE_MASK].len = hdr_desc->len; |
1448 |
|
|
used->idx++; |
1449 |
|
|
|
1450 |
|
|
dev->vq[dev->cfg.queue_notify].last_avail = |
1451 |
|
|
(dev->vq[dev->cfg.queue_notify].last_avail + 1); |
1452 |
|
|
num_enq++; |
1453 |
|
|
|
1454 |
|
|
idx = dev->vq[dev->cfg.queue_notify].last_avail & |
1455 |
|
|
VIONET_QUEUE_MASK; |
1456 |
|
|
} |
1457 |
|
|
|
1458 |
|
|
if (write_mem(q_gpa, vr, vr_sz)) { |
1459 |
|
|
log_warnx("vionet: tx error writing vio ring"); |
1460 |
|
|
} |
1461 |
|
|
|
1462 |
|
|
if (dhcpsz > 0) { |
1463 |
|
|
if (vionet_enq_rx(dev, dhcppkt, dhcpsz, &spc)) |
1464 |
|
|
ret = 1; |
1465 |
|
|
} |
1466 |
|
|
|
1467 |
|
|
out: |
1468 |
|
|
free(vr); |
1469 |
|
|
free(pkt); |
1470 |
|
|
free(dhcppkt); |
1471 |
|
|
|
1472 |
|
|
return (ret); |
1473 |
|
|
} |
1474 |
|
|
|
1475 |
|
|
int |
1476 |
|
|
vmmci_ctl(unsigned int cmd) |
1477 |
|
|
{ |
1478 |
|
|
struct timeval tv = { 0, 0 }; |
1479 |
|
|
|
1480 |
|
|
if ((vmmci.cfg.device_status & |
1481 |
|
|
VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) == 0) |
1482 |
|
|
return (-1); |
1483 |
|
|
|
1484 |
|
|
if (cmd == vmmci.cmd) |
1485 |
|
|
return (0); |
1486 |
|
|
|
1487 |
|
|
switch (cmd) { |
1488 |
|
|
case VMMCI_NONE: |
1489 |
|
|
break; |
1490 |
|
|
case VMMCI_SHUTDOWN: |
1491 |
|
|
case VMMCI_REBOOT: |
1492 |
|
|
/* Update command */ |
1493 |
|
|
vmmci.cmd = cmd; |
1494 |
|
|
|
1495 |
|
|
/* |
1496 |
|
|
* vmm VMs do not support powerdown, send a reboot request |
1497 |
|
|
* instead and turn it off after the triple fault. |
1498 |
|
|
*/ |
1499 |
|
|
if (cmd == VMMCI_SHUTDOWN) |
1500 |
|
|
cmd = VMMCI_REBOOT; |
1501 |
|
|
|
1502 |
|
|
/* Trigger interrupt */ |
1503 |
|
|
vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; |
1504 |
|
|
vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq); |
1505 |
|
|
|
1506 |
|
|
/* Add ACK timeout */ |
1507 |
|
|
tv.tv_sec = VMMCI_TIMEOUT; |
1508 |
|
|
evtimer_add(&vmmci.timeout, &tv); |
1509 |
|
|
break; |
1510 |
|
|
case VMMCI_SYNCRTC: |
1511 |
|
|
if (vmmci.cfg.guest_feature & VMMCI_F_SYNCRTC) { |
1512 |
|
|
/* RTC updated, request guest VM resync of its RTC */ |
1513 |
|
|
vmmci.cmd = cmd; |
1514 |
|
|
|
1515 |
|
|
vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; |
1516 |
|
|
vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq); |
1517 |
|
|
} else { |
1518 |
|
|
log_debug("%s: RTC sync skipped (guest does not " |
1519 |
|
|
"support RTC sync)\n", __func__); |
1520 |
|
|
} |
1521 |
|
|
break; |
1522 |
|
|
default: |
1523 |
|
|
fatalx("invalid vmmci command: %d", cmd); |
1524 |
|
|
} |
1525 |
|
|
|
1526 |
|
|
return (0); |
1527 |
|
|
} |
1528 |
|
|
|
1529 |
|
|
void |
1530 |
|
|
vmmci_ack(unsigned int cmd) |
1531 |
|
|
{ |
1532 |
|
|
struct timeval tv = { 0, 0 }; |
1533 |
|
|
|
1534 |
|
|
switch (cmd) { |
1535 |
|
|
case VMMCI_NONE: |
1536 |
|
|
break; |
1537 |
|
|
case VMMCI_SHUTDOWN: |
1538 |
|
|
/* |
1539 |
|
|
* The shutdown was requested by the VM if we don't have |
1540 |
|
|
* a pending shutdown request. In this case add a short |
1541 |
|
|
* timeout to give the VM a chance to reboot before the |
1542 |
|
|
* timer is expired. |
1543 |
|
|
*/ |
1544 |
|
|
if (vmmci.cmd == 0) { |
1545 |
|
|
log_debug("%s: vm %u requested shutdown", __func__, |
1546 |
|
|
vmmci.vm_id); |
1547 |
|
|
tv.tv_sec = VMMCI_TIMEOUT; |
1548 |
|
|
evtimer_add(&vmmci.timeout, &tv); |
1549 |
|
|
return; |
1550 |
|
|
} |
1551 |
|
|
/* FALLTHROUGH */ |
1552 |
|
|
case VMMCI_REBOOT: |
1553 |
|
|
/* |
1554 |
|
|
* If the VM acknowleged our shutdown request, give it |
1555 |
|
|
* enough time to shutdown or reboot gracefully. This |
1556 |
|
|
* might take a considerable amount of time (running |
1557 |
|
|
* rc.shutdown on the VM), so increase the timeout before |
1558 |
|
|
* killing it forcefully. |
1559 |
|
|
*/ |
1560 |
|
|
if (cmd == vmmci.cmd && |
1561 |
|
|
evtimer_pending(&vmmci.timeout, NULL)) { |
1562 |
|
|
log_debug("%s: vm %u acknowledged shutdown request", |
1563 |
|
|
__func__, vmmci.vm_id); |
1564 |
|
|
tv.tv_sec = VMMCI_SHUTDOWN_TIMEOUT; |
1565 |
|
|
evtimer_add(&vmmci.timeout, &tv); |
1566 |
|
|
} |
1567 |
|
|
break; |
1568 |
|
|
case VMMCI_SYNCRTC: |
1569 |
|
|
log_debug("%s: vm %u acknowledged RTC sync request", |
1570 |
|
|
__func__, vmmci.vm_id); |
1571 |
|
|
vmmci.cmd = VMMCI_NONE; |
1572 |
|
|
break; |
1573 |
|
|
default: |
1574 |
|
|
log_warnx("%s: illegal request %u", __func__, cmd); |
1575 |
|
|
break; |
1576 |
|
|
} |
1577 |
|
|
} |
1578 |
|
|
|
1579 |
|
|
void |
1580 |
|
|
vmmci_timeout(int fd, short type, void *arg) |
1581 |
|
|
{ |
1582 |
|
|
log_debug("%s: vm %u shutdown", __progname, vmmci.vm_id); |
1583 |
|
|
vm_shutdown(vmmci.cmd == VMMCI_REBOOT ? VMMCI_REBOOT : VMMCI_SHUTDOWN); |
1584 |
|
|
} |
1585 |
|
|
|
1586 |
|
|
int |
1587 |
|
|
vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, |
1588 |
|
|
void *unused, uint8_t sz) |
1589 |
|
|
{ |
1590 |
|
|
*intr = 0xFF; |
1591 |
|
|
|
1592 |
|
|
if (dir == 0) { |
1593 |
|
|
switch (reg) { |
1594 |
|
|
case VIRTIO_CONFIG_DEVICE_FEATURES: |
1595 |
|
|
case VIRTIO_CONFIG_QUEUE_SIZE: |
1596 |
|
|
case VIRTIO_CONFIG_ISR_STATUS: |
1597 |
|
|
log_warnx("%s: illegal write %x to %s", |
1598 |
|
|
__progname, *data, virtio_reg_name(reg)); |
1599 |
|
|
break; |
1600 |
|
|
case VIRTIO_CONFIG_GUEST_FEATURES: |
1601 |
|
|
vmmci.cfg.guest_feature = *data; |
1602 |
|
|
break; |
1603 |
|
|
case VIRTIO_CONFIG_QUEUE_ADDRESS: |
1604 |
|
|
vmmci.cfg.queue_address = *data; |
1605 |
|
|
break; |
1606 |
|
|
case VIRTIO_CONFIG_QUEUE_SELECT: |
1607 |
|
|
vmmci.cfg.queue_select = *data; |
1608 |
|
|
break; |
1609 |
|
|
case VIRTIO_CONFIG_QUEUE_NOTIFY: |
1610 |
|
|
vmmci.cfg.queue_notify = *data; |
1611 |
|
|
break; |
1612 |
|
|
case VIRTIO_CONFIG_DEVICE_STATUS: |
1613 |
|
|
vmmci.cfg.device_status = *data; |
1614 |
|
|
break; |
1615 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: |
1616 |
|
|
vmmci_ack(*data); |
1617 |
|
|
break; |
1618 |
|
|
} |
1619 |
|
|
} else { |
1620 |
|
|
switch (reg) { |
1621 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: |
1622 |
|
|
*data = vmmci.cmd; |
1623 |
|
|
break; |
1624 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: |
1625 |
|
|
/* Update time once when reading the first register */ |
1626 |
|
|
gettimeofday(&vmmci.time, NULL); |
1627 |
|
|
*data = (uint64_t)vmmci.time.tv_sec; |
1628 |
|
|
break; |
1629 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: |
1630 |
|
|
*data = (uint64_t)vmmci.time.tv_sec << 32; |
1631 |
|
|
break; |
1632 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: |
1633 |
|
|
*data = (uint64_t)vmmci.time.tv_usec; |
1634 |
|
|
break; |
1635 |
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: |
1636 |
|
|
*data = (uint64_t)vmmci.time.tv_usec << 32; |
1637 |
|
|
break; |
1638 |
|
|
case VIRTIO_CONFIG_DEVICE_FEATURES: |
1639 |
|
|
*data = vmmci.cfg.device_feature; |
1640 |
|
|
break; |
1641 |
|
|
case VIRTIO_CONFIG_GUEST_FEATURES: |
1642 |
|
|
*data = vmmci.cfg.guest_feature; |
1643 |
|
|
break; |
1644 |
|
|
case VIRTIO_CONFIG_QUEUE_ADDRESS: |
1645 |
|
|
*data = vmmci.cfg.queue_address; |
1646 |
|
|
break; |
1647 |
|
|
case VIRTIO_CONFIG_QUEUE_SIZE: |
1648 |
|
|
*data = vmmci.cfg.queue_size; |
1649 |
|
|
break; |
1650 |
|
|
case VIRTIO_CONFIG_QUEUE_SELECT: |
1651 |
|
|
*data = vmmci.cfg.queue_select; |
1652 |
|
|
break; |
1653 |
|
|
case VIRTIO_CONFIG_QUEUE_NOTIFY: |
1654 |
|
|
*data = vmmci.cfg.queue_notify; |
1655 |
|
|
break; |
1656 |
|
|
case VIRTIO_CONFIG_DEVICE_STATUS: |
1657 |
|
|
*data = vmmci.cfg.device_status; |
1658 |
|
|
break; |
1659 |
|
|
case VIRTIO_CONFIG_ISR_STATUS: |
1660 |
|
|
*data = vmmci.cfg.isr_status; |
1661 |
|
|
vmmci.cfg.isr_status = 0; |
1662 |
|
|
break; |
1663 |
|
|
} |
1664 |
|
|
} |
1665 |
|
|
return (0); |
1666 |
|
|
} |
1667 |
|
|
|
1668 |
|
|
void |
1669 |
|
|
virtio_init(struct vmd_vm *vm, int *child_disks, int *child_taps) |
1670 |
|
|
{ |
1671 |
|
|
struct vmop_create_params *vmc = &vm->vm_params; |
1672 |
|
|
struct vm_create_params *vcp = &vmc->vmc_params; |
1673 |
|
|
uint8_t id; |
1674 |
|
|
uint8_t i; |
1675 |
|
|
int ret; |
1676 |
|
|
off_t sz; |
1677 |
|
|
|
1678 |
|
|
/* Virtio entropy device */ |
1679 |
|
|
if (pci_add_device(&id, PCI_VENDOR_QUMRANET, |
1680 |
|
|
PCI_PRODUCT_QUMRANET_VIO_RNG, PCI_CLASS_SYSTEM, |
1681 |
|
|
PCI_SUBCLASS_SYSTEM_MISC, |
1682 |
|
|
PCI_VENDOR_OPENBSD, |
1683 |
|
|
PCI_PRODUCT_VIRTIO_ENTROPY, 1, NULL)) { |
1684 |
|
|
log_warnx("%s: can't add PCI virtio rng device", |
1685 |
|
|
__progname); |
1686 |
|
|
return; |
1687 |
|
|
} |
1688 |
|
|
|
1689 |
|
|
if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_rnd_io, NULL)) { |
1690 |
|
|
log_warnx("%s: can't add bar for virtio rng device", |
1691 |
|
|
__progname); |
1692 |
|
|
return; |
1693 |
|
|
} |
1694 |
|
|
|
1695 |
|
|
memset(&viornd, 0, sizeof(viornd)); |
1696 |
|
|
viornd.vq[0].qs = VIORND_QUEUE_SIZE; |
1697 |
|
|
viornd.vq[0].vq_availoffset = sizeof(struct vring_desc) * |
1698 |
|
|
VIORND_QUEUE_SIZE; |
1699 |
|
|
viornd.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( |
1700 |
|
|
sizeof(struct vring_desc) * VIORND_QUEUE_SIZE |
1701 |
|
|
+ sizeof(uint16_t) * (2 + VIORND_QUEUE_SIZE)); |
1702 |
|
|
viornd.pci_id = id; |
1703 |
|
|
|
1704 |
|
|
if (vcp->vcp_ndisks > 0) { |
1705 |
|
|
nr_vioblk = vcp->vcp_ndisks; |
1706 |
|
|
vioblk = calloc(vcp->vcp_ndisks, sizeof(struct vioblk_dev)); |
1707 |
|
|
if (vioblk == NULL) { |
1708 |
|
|
log_warn("%s: calloc failure allocating vioblks", |
1709 |
|
|
__progname); |
1710 |
|
|
return; |
1711 |
|
|
} |
1712 |
|
|
|
1713 |
|
|
/* One virtio block device for each disk defined in vcp */ |
1714 |
|
|
for (i = 0; i < vcp->vcp_ndisks; i++) { |
1715 |
|
|
if ((sz = lseek(child_disks[i], 0, SEEK_END)) == -1) |
1716 |
|
|
continue; |
1717 |
|
|
|
1718 |
|
|
if (pci_add_device(&id, PCI_VENDOR_QUMRANET, |
1719 |
|
|
PCI_PRODUCT_QUMRANET_VIO_BLOCK, |
1720 |
|
|
PCI_CLASS_MASS_STORAGE, |
1721 |
|
|
PCI_SUBCLASS_MASS_STORAGE_SCSI, |
1722 |
|
|
PCI_VENDOR_OPENBSD, |
1723 |
|
|
PCI_PRODUCT_VIRTIO_BLOCK, 1, NULL)) { |
1724 |
|
|
log_warnx("%s: can't add PCI virtio block " |
1725 |
|
|
"device", __progname); |
1726 |
|
|
return; |
1727 |
|
|
} |
1728 |
|
|
if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_blk_io, |
1729 |
|
|
&vioblk[i])) { |
1730 |
|
|
log_warnx("%s: can't add bar for virtio block " |
1731 |
|
|
"device", __progname); |
1732 |
|
|
return; |
1733 |
|
|
} |
1734 |
|
|
vioblk[i].vq[0].qs = VIOBLK_QUEUE_SIZE; |
1735 |
|
|
vioblk[i].vq[0].vq_availoffset = |
1736 |
|
|
sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE; |
1737 |
|
|
vioblk[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( |
1738 |
|
|
sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE |
1739 |
|
|
+ sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE)); |
1740 |
|
|
vioblk[i].vq[0].last_avail = 0; |
1741 |
|
|
vioblk[i].fd = child_disks[i]; |
1742 |
|
|
vioblk[i].sz = sz / 512; |
1743 |
|
|
vioblk[i].cfg.device_feature = VIRTIO_BLK_F_SIZE_MAX; |
1744 |
|
|
vioblk[i].max_xfer = 1048576; |
1745 |
|
|
vioblk[i].pci_id = id; |
1746 |
|
|
} |
1747 |
|
|
} |
1748 |
|
|
|
1749 |
|
|
if (vcp->vcp_nnics > 0) { |
1750 |
|
|
vionet = calloc(vcp->vcp_nnics, sizeof(struct vionet_dev)); |
1751 |
|
|
if (vionet == NULL) { |
1752 |
|
|
log_warn("%s: calloc failure allocating vionets", |
1753 |
|
|
__progname); |
1754 |
|
|
return; |
1755 |
|
|
} |
1756 |
|
|
|
1757 |
|
|
nr_vionet = vcp->vcp_nnics; |
1758 |
|
|
/* Virtio network */ |
1759 |
|
|
for (i = 0; i < vcp->vcp_nnics; i++) { |
1760 |
|
|
if (pci_add_device(&id, PCI_VENDOR_QUMRANET, |
1761 |
|
|
PCI_PRODUCT_QUMRANET_VIO_NET, PCI_CLASS_SYSTEM, |
1762 |
|
|
PCI_SUBCLASS_SYSTEM_MISC, |
1763 |
|
|
PCI_VENDOR_OPENBSD, |
1764 |
|
|
PCI_PRODUCT_VIRTIO_NETWORK, 1, NULL)) { |
1765 |
|
|
log_warnx("%s: can't add PCI virtio net device", |
1766 |
|
|
__progname); |
1767 |
|
|
return; |
1768 |
|
|
} |
1769 |
|
|
|
1770 |
|
|
if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_net_io, |
1771 |
|
|
&vionet[i])) { |
1772 |
|
|
log_warnx("%s: can't add bar for virtio net " |
1773 |
|
|
"device", __progname); |
1774 |
|
|
return; |
1775 |
|
|
} |
1776 |
|
|
|
1777 |
|
|
ret = pthread_mutex_init(&vionet[i].mutex, NULL); |
1778 |
|
|
if (ret) { |
1779 |
|
|
errno = ret; |
1780 |
|
|
log_warn("%s: could not initialize mutex " |
1781 |
|
|
"for vionet device", __progname); |
1782 |
|
|
return; |
1783 |
|
|
} |
1784 |
|
|
|
1785 |
|
|
vionet[i].vq[0].qs = VIONET_QUEUE_SIZE; |
1786 |
|
|
vionet[i].vq[0].vq_availoffset = |
1787 |
|
|
sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; |
1788 |
|
|
vionet[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( |
1789 |
|
|
sizeof(struct vring_desc) * VIONET_QUEUE_SIZE |
1790 |
|
|
+ sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); |
1791 |
|
|
vionet[i].vq[0].last_avail = 0; |
1792 |
|
|
vionet[i].vq[1].qs = VIONET_QUEUE_SIZE; |
1793 |
|
|
vionet[i].vq[1].vq_availoffset = |
1794 |
|
|
sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; |
1795 |
|
|
vionet[i].vq[1].vq_usedoffset = VIRTQUEUE_ALIGN( |
1796 |
|
|
sizeof(struct vring_desc) * VIONET_QUEUE_SIZE |
1797 |
|
|
+ sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); |
1798 |
|
|
vionet[i].vq[1].last_avail = 0; |
1799 |
|
|
vionet[i].vq[1].notified_avail = 0; |
1800 |
|
|
vionet[i].fd = child_taps[i]; |
1801 |
|
|
vionet[i].rx_pending = 0; |
1802 |
|
|
vionet[i].vm_id = vcp->vcp_id; |
1803 |
|
|
vionet[i].vm_vmid = vm->vm_vmid; |
1804 |
|
|
vionet[i].irq = pci_get_dev_irq(id); |
1805 |
|
|
|
1806 |
|
|
event_set(&vionet[i].event, vionet[i].fd, |
1807 |
|
|
EV_READ | EV_PERSIST, vionet_rx_event, &vionet[i]); |
1808 |
|
|
if (event_add(&vionet[i].event, NULL)) { |
1809 |
|
|
log_warn("could not initialize vionet event " |
1810 |
|
|
"handler"); |
1811 |
|
|
return; |
1812 |
|
|
} |
1813 |
|
|
|
1814 |
|
|
/* MAC address has been assigned by the parent */ |
1815 |
|
|
memcpy(&vionet[i].mac, &vcp->vcp_macs[i], 6); |
1816 |
|
|
vionet[i].cfg.device_feature = VIRTIO_NET_F_MAC; |
1817 |
|
|
|
1818 |
|
|
vionet[i].lockedmac = |
1819 |
|
|
vmc->vmc_ifflags[i] & VMIFF_LOCKED ? 1 : 0; |
1820 |
|
|
vionet[i].local = |
1821 |
|
|
vmc->vmc_ifflags[i] & VMIFF_LOCAL ? 1 : 0; |
1822 |
|
|
vionet[i].idx = i; |
1823 |
|
|
vionet[i].pci_id = id; |
1824 |
|
|
|
1825 |
|
|
log_debug("%s: vm \"%s\" vio%u lladdr %s%s%s", |
1826 |
|
|
__func__, vcp->vcp_name, i, |
1827 |
|
|
ether_ntoa((void *)vionet[i].mac), |
1828 |
|
|
vionet[i].lockedmac ? ", locked" : "", |
1829 |
|
|
vionet[i].local ? ", local" : ""); |
1830 |
|
|
} |
1831 |
|
|
} |
1832 |
|
|
|
1833 |
|
|
/* virtio control device */ |
1834 |
|
|
if (pci_add_device(&id, PCI_VENDOR_OPENBSD, |
1835 |
|
|
PCI_PRODUCT_OPENBSD_CONTROL, |
1836 |
|
|
PCI_CLASS_COMMUNICATIONS, |
1837 |
|
|
PCI_SUBCLASS_COMMUNICATIONS_MISC, |
1838 |
|
|
PCI_VENDOR_OPENBSD, |
1839 |
|
|
PCI_PRODUCT_VIRTIO_VMMCI, 1, NULL)) { |
1840 |
|
|
log_warnx("%s: can't add PCI vmm control device", |
1841 |
|
|
__progname); |
1842 |
|
|
return; |
1843 |
|
|
} |
1844 |
|
|
|
1845 |
|
|
if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vmmci_io, NULL)) { |
1846 |
|
|
log_warnx("%s: can't add bar for vmm control device", |
1847 |
|
|
__progname); |
1848 |
|
|
return; |
1849 |
|
|
} |
1850 |
|
|
|
1851 |
|
|
memset(&vmmci, 0, sizeof(vmmci)); |
1852 |
|
|
vmmci.cfg.device_feature = VMMCI_F_TIMESYNC | VMMCI_F_ACK | |
1853 |
|
|
VMMCI_F_SYNCRTC; |
1854 |
|
|
vmmci.vm_id = vcp->vcp_id; |
1855 |
|
|
vmmci.irq = pci_get_dev_irq(id); |
1856 |
|
|
vmmci.pci_id = id; |
1857 |
|
|
|
1858 |
|
|
evtimer_set(&vmmci.timeout, vmmci_timeout, NULL); |
1859 |
|
|
} |
1860 |
|
|
|
1861 |
|
|
int |
1862 |
|
|
vmmci_restore(int fd, uint32_t vm_id) |
1863 |
|
|
{ |
1864 |
|
|
log_debug("%s: receiving vmmci", __func__); |
1865 |
|
|
if (atomicio(read, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { |
1866 |
|
|
log_warnx("%s: error reading vmmci from fd", __func__); |
1867 |
|
|
return (-1); |
1868 |
|
|
} |
1869 |
|
|
|
1870 |
|
|
if (pci_set_bar_fn(vmmci.pci_id, 0, vmmci_io, NULL)) { |
1871 |
|
|
log_warnx("%s: can't set bar fn for vmm control device", |
1872 |
|
|
__progname); |
1873 |
|
|
return (-1); |
1874 |
|
|
} |
1875 |
|
|
vmmci.vm_id = vm_id; |
1876 |
|
|
memset(&vmmci.timeout, 0, sizeof(struct event)); |
1877 |
|
|
evtimer_set(&vmmci.timeout, vmmci_timeout, NULL); |
1878 |
|
|
return (0); |
1879 |
|
|
} |
1880 |
|
|
|
1881 |
|
|
int |
1882 |
|
|
viornd_restore(int fd) |
1883 |
|
|
{ |
1884 |
|
|
log_debug("%s: receiving viornd", __func__); |
1885 |
|
|
if (atomicio(read, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { |
1886 |
|
|
log_warnx("%s: error reading viornd from fd", __func__); |
1887 |
|
|
return (-1); |
1888 |
|
|
} |
1889 |
|
|
if (pci_set_bar_fn(viornd.pci_id, 0, virtio_rnd_io, NULL)) { |
1890 |
|
|
log_warnx("%s: can't set bar fn for virtio rng device", |
1891 |
|
|
__progname); |
1892 |
|
|
return (-1); |
1893 |
|
|
} |
1894 |
|
|
return (0); |
1895 |
|
|
} |
1896 |
|
|
|
1897 |
|
|
int |
1898 |
|
|
vionet_restore(int fd, struct vmd_vm *vm, int *child_taps) |
1899 |
|
|
{ |
1900 |
|
|
struct vmop_create_params *vmc = &vm->vm_params; |
1901 |
|
|
struct vm_create_params *vcp = &vmc->vmc_params; |
1902 |
|
|
uint8_t i; |
1903 |
|
|
int ret; |
1904 |
|
|
|
1905 |
|
|
nr_vionet = vcp->vcp_nnics; |
1906 |
|
|
if (vcp->vcp_nnics > 0) { |
1907 |
|
|
vionet = calloc(vcp->vcp_nnics, sizeof(struct vionet_dev)); |
1908 |
|
|
if (vionet == NULL) { |
1909 |
|
|
log_warn("%s: calloc failure allocating vionets", |
1910 |
|
|
__progname); |
1911 |
|
|
return (-1); |
1912 |
|
|
} |
1913 |
|
|
log_debug("%s: receiving vionet", __func__); |
1914 |
|
|
if (atomicio(read, fd, vionet, |
1915 |
|
|
vcp->vcp_nnics * sizeof(struct vionet_dev)) != |
1916 |
|
|
vcp->vcp_nnics * sizeof(struct vionet_dev)) { |
1917 |
|
|
log_warnx("%s: error reading vionet from fd", |
1918 |
|
|
__func__); |
1919 |
|
|
return (-1); |
1920 |
|
|
} |
1921 |
|
|
|
1922 |
|
|
/* Virtio network */ |
1923 |
|
|
for (i = 0; i < vcp->vcp_nnics; i++) { |
1924 |
|
|
if (pci_set_bar_fn(vionet[i].pci_id, 0, virtio_net_io, |
1925 |
|
|
&vionet[i])) { |
1926 |
|
|
log_warnx("%s: can't set bar fn for virtio net " |
1927 |
|
|
"device", __progname); |
1928 |
|
|
return (-1); |
1929 |
|
|
} |
1930 |
|
|
|
1931 |
|
|
memset(&vionet[i].mutex, 0, sizeof(pthread_mutex_t)); |
1932 |
|
|
ret = pthread_mutex_init(&vionet[i].mutex, NULL); |
1933 |
|
|
|
1934 |
|
|
if (ret) { |
1935 |
|
|
errno = ret; |
1936 |
|
|
log_warn("%s: could not initialize mutex " |
1937 |
|
|
"for vionet device", __progname); |
1938 |
|
|
return (-1); |
1939 |
|
|
} |
1940 |
|
|
vionet[i].fd = child_taps[i]; |
1941 |
|
|
vionet[i].rx_pending = 0; |
1942 |
|
|
vionet[i].vm_id = vcp->vcp_id; |
1943 |
|
|
vionet[i].vm_vmid = vm->vm_vmid; |
1944 |
|
|
|
1945 |
|
|
memset(&vionet[i].event, 0, sizeof(struct event)); |
1946 |
|
|
event_set(&vionet[i].event, vionet[i].fd, |
1947 |
|
|
EV_READ | EV_PERSIST, vionet_rx_event, &vionet[i]); |
1948 |
|
|
if (event_add(&vionet[i].event, NULL)) { |
1949 |
|
|
log_warn("could not initialize vionet event " |
1950 |
|
|
"handler"); |
1951 |
|
|
return (-1); |
1952 |
|
|
} |
1953 |
|
|
} |
1954 |
|
|
} |
1955 |
|
|
return (0); |
1956 |
|
|
} |
1957 |
|
|
|
1958 |
|
|
int |
1959 |
|
|
vioblk_restore(int fd, struct vm_create_params *vcp, int *child_disks) |
1960 |
|
|
{ |
1961 |
|
|
uint8_t i; |
1962 |
|
|
off_t sz; |
1963 |
|
|
|
1964 |
|
|
nr_vioblk = vcp->vcp_ndisks; |
1965 |
|
|
vioblk = calloc(vcp->vcp_ndisks, sizeof(struct vioblk_dev)); |
1966 |
|
|
if (vioblk == NULL) { |
1967 |
|
|
log_warn("%s: calloc failure allocating vioblks", __progname); |
1968 |
|
|
return (-1); |
1969 |
|
|
} |
1970 |
|
|
log_debug("%s: receiving vioblk", __func__); |
1971 |
|
|
if (atomicio(read, fd, vioblk, |
1972 |
|
|
nr_vioblk * sizeof(struct vioblk_dev)) != |
1973 |
|
|
nr_vioblk * sizeof(struct vioblk_dev)) { |
1974 |
|
|
log_warnx("%s: error reading vioblk from fd", __func__); |
1975 |
|
|
return (-1); |
1976 |
|
|
} |
1977 |
|
|
for (i = 0; i < vcp->vcp_ndisks; i++) { |
1978 |
|
|
if ((sz = lseek(child_disks[i], 0, SEEK_END)) == -1) |
1979 |
|
|
continue; |
1980 |
|
|
|
1981 |
|
|
if (pci_set_bar_fn(vioblk[i].pci_id, 0, virtio_blk_io, |
1982 |
|
|
&vioblk[i])) { |
1983 |
|
|
log_warnx("%s: can't set bar fn for virtio block " |
1984 |
|
|
"device", __progname); |
1985 |
|
|
return (-1); |
1986 |
|
|
} |
1987 |
|
|
vioblk[i].fd = child_disks[i]; |
1988 |
|
|
} |
1989 |
|
|
return (0); |
1990 |
|
|
} |
1991 |
|
|
|
1992 |
|
|
int |
1993 |
|
|
virtio_restore(int fd, struct vmd_vm *vm, int *child_disks, int *child_taps) |
1994 |
|
|
{ |
1995 |
|
|
struct vmop_create_params *vmc = &vm->vm_params; |
1996 |
|
|
struct vm_create_params *vcp = &vmc->vmc_params; |
1997 |
|
|
int ret; |
1998 |
|
|
|
1999 |
|
|
if ((ret = viornd_restore(fd)) == -1) |
2000 |
|
|
return ret; |
2001 |
|
|
|
2002 |
|
|
if ((ret = vioblk_restore(fd, vcp, child_disks)) == -1) |
2003 |
|
|
return ret; |
2004 |
|
|
|
2005 |
|
|
if ((ret = vionet_restore(fd, vm, child_taps)) == -1) |
2006 |
|
|
return ret; |
2007 |
|
|
|
2008 |
|
|
if ((ret = vmmci_restore(fd, vcp->vcp_id)) == -1) |
2009 |
|
|
return ret; |
2010 |
|
|
|
2011 |
|
|
return (0); |
2012 |
|
|
} |
2013 |
|
|
|
2014 |
|
|
int |
2015 |
|
|
viornd_dump(int fd) |
2016 |
|
|
{ |
2017 |
|
|
log_debug("%s: sending viornd", __func__); |
2018 |
|
|
if (atomicio(vwrite, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { |
2019 |
|
|
log_warnx("%s: error writing viornd to fd", __func__); |
2020 |
|
|
return (-1); |
2021 |
|
|
} |
2022 |
|
|
return (0); |
2023 |
|
|
} |
2024 |
|
|
|
2025 |
|
|
int |
2026 |
|
|
vmmci_dump(int fd) |
2027 |
|
|
{ |
2028 |
|
|
log_debug("%s: sending vmmci", __func__); |
2029 |
|
|
if (atomicio(vwrite, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { |
2030 |
|
|
log_warnx("%s: error writing vmmci to fd", __func__); |
2031 |
|
|
return (-1); |
2032 |
|
|
} |
2033 |
|
|
return (0); |
2034 |
|
|
} |
2035 |
|
|
|
2036 |
|
|
int |
2037 |
|
|
vionet_dump(int fd) |
2038 |
|
|
{ |
2039 |
|
|
log_debug("%s: sending vionet", __func__); |
2040 |
|
|
if (atomicio(vwrite, fd, vionet, |
2041 |
|
|
nr_vionet * sizeof(struct vionet_dev)) != |
2042 |
|
|
nr_vionet * sizeof(struct vionet_dev)) { |
2043 |
|
|
log_warnx("%s: error writing vionet to fd", __func__); |
2044 |
|
|
return (-1); |
2045 |
|
|
} |
2046 |
|
|
return (0); |
2047 |
|
|
} |
2048 |
|
|
|
2049 |
|
|
int |
2050 |
|
|
vioblk_dump(int fd) |
2051 |
|
|
{ |
2052 |
|
|
log_debug("%s: sending vioblk", __func__); |
2053 |
|
|
if (atomicio(vwrite, fd, vioblk, |
2054 |
|
|
nr_vioblk * sizeof(struct vioblk_dev)) != |
2055 |
|
|
nr_vioblk * sizeof(struct vioblk_dev)) { |
2056 |
|
|
log_warnx("%s: error writing vioblk to fd", __func__); |
2057 |
|
|
return (-1); |
2058 |
|
|
} |
2059 |
|
|
return (0); |
2060 |
|
|
} |
2061 |
|
|
|
2062 |
|
|
int |
2063 |
|
|
virtio_dump(int fd) |
2064 |
|
|
{ |
2065 |
|
|
int ret; |
2066 |
|
|
|
2067 |
|
|
if ((ret = viornd_dump(fd)) == -1) |
2068 |
|
|
return ret; |
2069 |
|
|
|
2070 |
|
|
if ((ret = vioblk_dump(fd)) == -1) |
2071 |
|
|
return ret; |
2072 |
|
|
|
2073 |
|
|
if ((ret = vionet_dump(fd)) == -1) |
2074 |
|
|
return ret; |
2075 |
|
|
|
2076 |
|
|
if ((ret = vmmci_dump(fd)) == -1) |
2077 |
|
|
return ret; |
2078 |
|
|
|
2079 |
|
|
return (0); |
2080 |
|
|
} |