GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.sbin/vmd/virtio.c Lines: 0 1029 0.0 %
Date: 2017-11-07 Branches: 0 491 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: virtio.c,v 1.54 2017/09/17 23:07:56 pd Exp $	*/
2
3
/*
4
 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
5
 *
6
 * Permission to use, copy, modify, and distribute this software for any
7
 * purpose with or without fee is hereby granted, provided that the above
8
 * copyright notice and this permission notice appear in all copies.
9
 *
10
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
 */
18
19
#include <sys/param.h>	/* PAGE_SIZE */
20
#include <sys/socket.h>
21
22
#include <machine/vmmvar.h>
23
#include <dev/pci/pcireg.h>
24
#include <dev/pci/pcidevs.h>
25
#include <dev/pv/virtioreg.h>
26
#include <dev/pv/vioblkreg.h>
27
28
#include <net/if.h>
29
#include <netinet/in.h>
30
#include <netinet/if_ether.h>
31
32
#include <errno.h>
33
#include <event.h>
34
#include <poll.h>
35
#include <stddef.h>
36
#include <stdlib.h>
37
#include <string.h>
38
#include <unistd.h>
39
40
#include "pci.h"
41
#include "vmd.h"
42
#include "vmm.h"
43
#include "virtio.h"
44
#include "loadfile.h"
45
#include "atomicio.h"
46
47
extern char *__progname;
48
49
struct viornd_dev viornd;
50
struct vioblk_dev *vioblk;
51
struct vionet_dev *vionet;
52
struct vmmci_dev vmmci;
53
54
int nr_vionet;
55
int nr_vioblk;
56
57
#define MAXPHYS	(64 * 1024)	/* max raw I/O transfer size */
58
59
#define VIRTIO_NET_F_MAC	(1<<5)
60
61
#define VMMCI_F_TIMESYNC	(1<<0)
62
#define VMMCI_F_ACK		(1<<1)
63
#define VMMCI_F_SYNCRTC		(1<<2)
64
65
struct ioinfo {
66
	uint8_t *buf;
67
	ssize_t len;
68
	off_t offset;
69
	int fd;
70
	int error;
71
};
72
73
const char *
74
vioblk_cmd_name(uint32_t type)
75
{
76
	switch (type) {
77
	case VIRTIO_BLK_T_IN: return "read";
78
	case VIRTIO_BLK_T_OUT: return "write";
79
	case VIRTIO_BLK_T_SCSI_CMD: return "scsi read";
80
	case VIRTIO_BLK_T_SCSI_CMD_OUT: return "scsi write";
81
	case VIRTIO_BLK_T_FLUSH: return "flush";
82
	case VIRTIO_BLK_T_FLUSH_OUT: return "flush out";
83
	case VIRTIO_BLK_T_GET_ID: return "get id";
84
	default: return "unknown";
85
	}
86
}
87
88
static void
89
dump_descriptor_chain(struct vring_desc *desc, int16_t dxx)
90
{
91
	log_debug("descriptor chain @ %d", dxx);
92
	do {
93
		log_debug("desc @%d addr/len/flags/next = 0x%llx / 0x%x "
94
		    "/ 0x%x / 0x%x",
95
		    dxx,
96
		    desc[dxx].addr,
97
		    desc[dxx].len,
98
		    desc[dxx].flags,
99
		    desc[dxx].next);
100
		dxx = desc[dxx].next;
101
	} while (desc[dxx].flags & VRING_DESC_F_NEXT);
102
103
	log_debug("desc @%d addr/len/flags/next = 0x%llx / 0x%x / 0x%x "
104
	    "/ 0x%x",
105
	    dxx,
106
	    desc[dxx].addr,
107
	    desc[dxx].len,
108
	    desc[dxx].flags,
109
	    desc[dxx].next);
110
}
111
112
static const char *
113
virtio_reg_name(uint8_t reg)
114
{
115
	switch (reg) {
116
	case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature";
117
	case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature";
118
	case VIRTIO_CONFIG_QUEUE_ADDRESS: return "queue address";
119
	case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size";
120
	case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select";
121
	case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify";
122
	case VIRTIO_CONFIG_DEVICE_STATUS: return "device status";
123
	case VIRTIO_CONFIG_ISR_STATUS: return "isr status";
124
	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: return "device config 0";
125
	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: return "device config 1";
126
	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: return "device config 2";
127
	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: return "device config 3";
128
	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: return "device config 4";
129
	default: return "unknown";
130
	}
131
}
132
133
uint32_t
134
vring_size(uint32_t vq_size)
135
{
136
	uint32_t allocsize1, allocsize2;
137
138
	/* allocsize1: descriptor table + avail ring + pad */
139
	allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size
140
	    + sizeof(uint16_t) * (2 + vq_size));
141
	/* allocsize2: used ring + pad */
142
	allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * 2
143
	    + sizeof(struct vring_used_elem) * vq_size);
144
145
	return allocsize1 + allocsize2;
146
}
147
148
/* Update queue select */
149
void
150
viornd_update_qs(void)
151
{
152
	/* Invalid queue? */
153
	if (viornd.cfg.queue_select > 0) {
154
		viornd.cfg.queue_size = 0;
155
		return;
156
	}
157
158
	/* Update queue address/size based on queue select */
159
	viornd.cfg.queue_address = viornd.vq[viornd.cfg.queue_select].qa;
160
	viornd.cfg.queue_size = viornd.vq[viornd.cfg.queue_select].qs;
161
}
162
163
/* Update queue address */
164
void
165
viornd_update_qa(void)
166
{
167
	/* Invalid queue? */
168
	if (viornd.cfg.queue_select > 0)
169
		return;
170
171
	viornd.vq[viornd.cfg.queue_select].qa = viornd.cfg.queue_address;
172
}
173
174
int
175
viornd_notifyq(void)
176
{
177
	uint64_t q_gpa;
178
	uint32_t vr_sz;
179
	size_t sz;
180
	int ret;
181
	uint16_t aidx, uidx;
182
	char *buf, *rnd_data;
183
	struct vring_desc *desc;
184
	struct vring_avail *avail;
185
	struct vring_used *used;
186
187
	ret = 0;
188
189
	/* Invalid queue? */
190
	if (viornd.cfg.queue_notify > 0)
191
		return (0);
192
193
	vr_sz = vring_size(VIORND_QUEUE_SIZE);
194
	q_gpa = viornd.vq[viornd.cfg.queue_notify].qa;
195
	q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
196
197
	buf = calloc(1, vr_sz);
198
	if (buf == NULL) {
199
		log_warn("calloc error getting viornd ring");
200
		return (0);
201
	}
202
203
	if (read_mem(q_gpa, buf, vr_sz)) {
204
		free(buf);
205
		return (0);
206
	}
207
208
	desc = (struct vring_desc *)(buf);
209
	avail = (struct vring_avail *)(buf +
210
	    viornd.vq[viornd.cfg.queue_notify].vq_availoffset);
211
	used = (struct vring_used *)(buf +
212
	    viornd.vq[viornd.cfg.queue_notify].vq_usedoffset);
213
214
	aidx = avail->idx & VIORND_QUEUE_MASK;
215
	uidx = used->idx & VIORND_QUEUE_MASK;
216
217
	sz = desc[avail->ring[aidx]].len;
218
	if (sz > MAXPHYS)
219
		fatal("viornd descriptor size too large (%zu)", sz);
220
221
	rnd_data = malloc(sz);
222
223
	if (rnd_data != NULL) {
224
		arc4random_buf(rnd_data, desc[avail->ring[aidx]].len);
225
		if (write_mem(desc[avail->ring[aidx]].addr,
226
		    rnd_data, desc[avail->ring[aidx]].len)) {
227
			log_warnx("viornd: can't write random data @ "
228
			    "0x%llx",
229
			    desc[avail->ring[aidx]].addr);
230
		} else {
231
			/* ret == 1 -> interrupt needed */
232
			/* XXX check VIRTIO_F_NO_INTR */
233
			ret = 1;
234
			viornd.cfg.isr_status = 1;
235
			used->ring[uidx].id = avail->ring[aidx] &
236
			    VIORND_QUEUE_MASK;
237
			used->ring[uidx].len = desc[avail->ring[aidx]].len;
238
			used->idx++;
239
240
			if (write_mem(q_gpa, buf, vr_sz)) {
241
				log_warnx("viornd: error writing vio ring");
242
			}
243
		}
244
		free(rnd_data);
245
	} else
246
		fatal("memory allocation error for viornd data");
247
248
	free(buf);
249
250
	return (ret);
251
}
252
253
int
254
virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr,
255
    void *unused, uint8_t sz)
256
{
257
	*intr = 0xFF;
258
259
	if (dir == 0) {
260
		switch (reg) {
261
		case VIRTIO_CONFIG_DEVICE_FEATURES:
262
		case VIRTIO_CONFIG_QUEUE_SIZE:
263
		case VIRTIO_CONFIG_ISR_STATUS:
264
			log_warnx("%s: illegal write %x to %s",
265
			    __progname, *data, virtio_reg_name(reg));
266
			break;
267
		case VIRTIO_CONFIG_GUEST_FEATURES:
268
			viornd.cfg.guest_feature = *data;
269
			break;
270
		case VIRTIO_CONFIG_QUEUE_ADDRESS:
271
			viornd.cfg.queue_address = *data;
272
			viornd_update_qa();
273
			break;
274
		case VIRTIO_CONFIG_QUEUE_SELECT:
275
			viornd.cfg.queue_select = *data;
276
			viornd_update_qs();
277
			break;
278
		case VIRTIO_CONFIG_QUEUE_NOTIFY:
279
			viornd.cfg.queue_notify = *data;
280
			if (viornd_notifyq())
281
				*intr = 1;
282
			break;
283
		case VIRTIO_CONFIG_DEVICE_STATUS:
284
			viornd.cfg.device_status = *data;
285
			break;
286
		}
287
	} else {
288
		switch (reg) {
289
		case VIRTIO_CONFIG_DEVICE_FEATURES:
290
			*data = viornd.cfg.device_feature;
291
			break;
292
		case VIRTIO_CONFIG_GUEST_FEATURES:
293
			*data = viornd.cfg.guest_feature;
294
			break;
295
		case VIRTIO_CONFIG_QUEUE_ADDRESS:
296
			*data = viornd.cfg.queue_address;
297
			break;
298
		case VIRTIO_CONFIG_QUEUE_SIZE:
299
			*data = viornd.cfg.queue_size;
300
			break;
301
		case VIRTIO_CONFIG_QUEUE_SELECT:
302
			*data = viornd.cfg.queue_select;
303
			break;
304
		case VIRTIO_CONFIG_QUEUE_NOTIFY:
305
			*data = viornd.cfg.queue_notify;
306
			break;
307
		case VIRTIO_CONFIG_DEVICE_STATUS:
308
			*data = viornd.cfg.device_status;
309
			break;
310
		case VIRTIO_CONFIG_ISR_STATUS:
311
			*data = viornd.cfg.isr_status;
312
			viornd.cfg.isr_status = 0;
313
			break;
314
		}
315
	}
316
	return (0);
317
}
318
319
void
320
vioblk_update_qa(struct vioblk_dev *dev)
321
{
322
	/* Invalid queue? */
323
	if (dev->cfg.queue_select > 0)
324
		return;
325
326
	dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address;
327
}
328
329
void
330
vioblk_update_qs(struct vioblk_dev *dev)
331
{
332
	/* Invalid queue? */
333
	if (dev->cfg.queue_select > 0) {
334
		dev->cfg.queue_size = 0;
335
		return;
336
	}
337
338
	/* Update queue address/size based on queue select */
339
	dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa;
340
	dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
341
}
342
343
static void
344
vioblk_free_info(struct ioinfo *info)
345
{
346
	if (!info)
347
		return;
348
	free(info->buf);
349
	free(info);
350
}
351
352
static struct ioinfo *
353
vioblk_start_read(struct vioblk_dev *dev, off_t sector, ssize_t sz)
354
{
355
	struct ioinfo *info;
356
357
	info = calloc(1, sizeof(*info));
358
	if (!info)
359
		goto nomem;
360
	info->buf = malloc(sz);
361
	if (info->buf == NULL)
362
		goto nomem;
363
	info->len = sz;
364
	info->offset = sector * VIRTIO_BLK_SECTOR_SIZE;
365
	info->fd = dev->fd;
366
367
	return info;
368
369
nomem:
370
	free(info);
371
	log_warn("malloc errror vioblk read");
372
	return (NULL);
373
}
374
375
376
static const uint8_t *
377
vioblk_finish_read(struct ioinfo *info)
378
{
379
	if (pread(info->fd, info->buf, info->len, info->offset) != info->len) {
380
		info->error = errno;
381
		log_warn("vioblk read error");
382
		return NULL;
383
	}
384
385
	return info->buf;
386
}
387
388
static struct ioinfo *
389
vioblk_start_write(struct vioblk_dev *dev, off_t sector, paddr_t addr, size_t len)
390
{
391
	struct ioinfo *info;
392
393
	info = calloc(1, sizeof(*info));
394
	if (!info)
395
		goto nomem;
396
	info->buf = malloc(len);
397
	if (info->buf == NULL)
398
		goto nomem;
399
	info->len = len;
400
	info->offset = sector * VIRTIO_BLK_SECTOR_SIZE;
401
	info->fd = dev->fd;
402
403
	if (read_mem(addr, info->buf, len)) {
404
		vioblk_free_info(info);
405
		return NULL;
406
	}
407
408
	return info;
409
410
nomem:
411
	free(info);
412
	log_warn("malloc errror vioblk write");
413
	return (NULL);
414
}
415
416
static int
417
vioblk_finish_write(struct ioinfo *info)
418
{
419
	if (pwrite(info->fd, info->buf, info->len, info->offset) != info->len) {
420
		log_warn("vioblk write error");
421
		return EIO;
422
	}
423
	return 0;
424
}
425
426
/*
427
 * XXX in various cases, ds should be set to VIRTIO_BLK_S_IOERR, if we can
428
 * XXX cant trust ring data from VM, be extra cautious.
429
 */
430
int
431
vioblk_notifyq(struct vioblk_dev *dev)
432
{
433
	uint64_t q_gpa;
434
	uint32_t vr_sz;
435
	uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx;
436
	uint8_t ds;
437
	int ret;
438
	off_t secbias;
439
	char *vr;
440
	struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc;
441
	struct vring_avail *avail;
442
	struct vring_used *used;
443
	struct virtio_blk_req_hdr cmd;
444
445
	ret = 0;
446
447
	/* Invalid queue? */
448
	if (dev->cfg.queue_notify > 0)
449
		return (0);
450
451
	vr_sz = vring_size(VIOBLK_QUEUE_SIZE);
452
	q_gpa = dev->vq[dev->cfg.queue_notify].qa;
453
	q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
454
455
	vr = calloc(1, vr_sz);
456
	if (vr == NULL) {
457
		log_warn("calloc error getting vioblk ring");
458
		return (0);
459
	}
460
461
	if (read_mem(q_gpa, vr, vr_sz)) {
462
		log_warnx("error reading gpa 0x%llx", q_gpa);
463
		goto out;
464
	}
465
466
	/* Compute offsets in ring of descriptors, avail ring, and used ring */
467
	desc = (struct vring_desc *)(vr);
468
	avail = (struct vring_avail *)(vr +
469
	    dev->vq[dev->cfg.queue_notify].vq_availoffset);
470
	used = (struct vring_used *)(vr +
471
	    dev->vq[dev->cfg.queue_notify].vq_usedoffset);
472
473
	idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK;
474
475
	if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) {
476
		log_warnx("vioblk queue notify - nothing to do?");
477
		goto out;
478
	}
479
480
	while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) {
481
482
		cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK;
483
		cmd_desc = &desc[cmd_desc_idx];
484
485
		if ((cmd_desc->flags & VRING_DESC_F_NEXT) == 0) {
486
			log_warnx("unchained vioblk cmd descriptor received "
487
			    "(idx %d)", cmd_desc_idx);
488
			goto out;
489
		}
490
491
		/* Read command from descriptor ring */
492
		if (read_mem(cmd_desc->addr, &cmd, cmd_desc->len)) {
493
			log_warnx("vioblk: command read_mem error @ 0x%llx",
494
			    cmd_desc->addr);
495
			goto out;
496
		}
497
498
		switch (cmd.type) {
499
		case VIRTIO_BLK_T_IN:
500
			/* first descriptor */
501
			secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
502
			secdata_desc = &desc[secdata_desc_idx];
503
504
			if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) {
505
				log_warnx("unchained vioblk data descriptor "
506
				    "received (idx %d)", cmd_desc_idx);
507
				goto out;
508
			}
509
510
			secbias = 0;
511
			do {
512
				struct ioinfo *info;
513
				const uint8_t *secdata;
514
515
				info = vioblk_start_read(dev, cmd.sector + secbias,
516
				    (ssize_t)secdata_desc->len);
517
518
				/* read the data (use current data descriptor) */
519
				secdata = vioblk_finish_read(info);
520
				if (secdata == NULL) {
521
					vioblk_free_info(info);
522
					log_warnx("vioblk: block read error, "
523
					    "sector %lld", cmd.sector);
524
					goto out;
525
				}
526
527
				if (write_mem(secdata_desc->addr, secdata,
528
				    secdata_desc->len)) {
529
					log_warnx("can't write sector "
530
					    "data to gpa @ 0x%llx",
531
					    secdata_desc->addr);
532
					dump_descriptor_chain(desc, cmd_desc_idx);
533
					vioblk_free_info(info);
534
					goto out;
535
				}
536
537
				vioblk_free_info(info);
538
539
				secbias += (secdata_desc->len / VIRTIO_BLK_SECTOR_SIZE);
540
				secdata_desc_idx = secdata_desc->next &
541
				    VIOBLK_QUEUE_MASK;
542
				secdata_desc = &desc[secdata_desc_idx];
543
			} while (secdata_desc->flags & VRING_DESC_F_NEXT);
544
545
			ds_desc_idx = secdata_desc_idx;
546
			ds_desc = secdata_desc;
547
548
			ds = VIRTIO_BLK_S_OK;
549
			if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
550
				log_warnx("can't write device status data @ "
551
				    "0x%llx", ds_desc->addr);
552
				dump_descriptor_chain(desc, cmd_desc_idx);
553
				goto out;
554
			}
555
556
			ret = 1;
557
			dev->cfg.isr_status = 1;
558
			used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx;
559
			used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len;
560
			used->idx++;
561
562
			dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
563
			    VIOBLK_QUEUE_MASK;
564
565
			if (write_mem(q_gpa, vr, vr_sz)) {
566
				log_warnx("vioblk: error writing vio ring");
567
			}
568
			break;
569
		case VIRTIO_BLK_T_OUT:
570
			secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
571
			secdata_desc = &desc[secdata_desc_idx];
572
573
			if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) {
574
				log_warnx("wr vioblk: unchained vioblk data "
575
				    "descriptor received (idx %d)",
576
				    cmd_desc_idx);
577
				goto out;
578
			}
579
580
			if (secdata_desc->len > dev->max_xfer) {
581
				log_warnx("%s: invalid read size %d requested",
582
				    __func__, secdata_desc->len);
583
				goto out;
584
			}
585
586
			secbias = 0;
587
			do {
588
				struct ioinfo *info;
589
590
				info = vioblk_start_write(dev, cmd.sector + secbias,
591
				    secdata_desc->addr, secdata_desc->len);
592
593
				if (info == NULL) {
594
					log_warnx("wr vioblk: can't read "
595
					    "sector data @ 0x%llx",
596
					    secdata_desc->addr);
597
					dump_descriptor_chain(desc,
598
					    cmd_desc_idx);
599
					goto out;
600
				}
601
602
				if (vioblk_finish_write(info)) {
603
					log_warnx("wr vioblk: disk write "
604
					    "error");
605
					vioblk_free_info(info);
606
					goto out;
607
				}
608
609
				vioblk_free_info(info);
610
611
				secbias += secdata_desc->len /
612
				    VIRTIO_BLK_SECTOR_SIZE;
613
614
				secdata_desc_idx = secdata_desc->next &
615
				    VIOBLK_QUEUE_MASK;
616
				secdata_desc = &desc[secdata_desc_idx];
617
			} while (secdata_desc->flags & VRING_DESC_F_NEXT);
618
619
			ds_desc_idx = secdata_desc_idx;
620
			ds_desc = secdata_desc;
621
622
			ds = VIRTIO_BLK_S_OK;
623
			if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
624
				log_warnx("wr vioblk: can't write device "
625
				    "status data @ 0x%llx", ds_desc->addr);
626
				dump_descriptor_chain(desc, cmd_desc_idx);
627
				goto out;
628
			}
629
630
			ret = 1;
631
			dev->cfg.isr_status = 1;
632
			used->ring[used->idx & VIOBLK_QUEUE_MASK].id =
633
			    cmd_desc_idx;
634
			used->ring[used->idx & VIOBLK_QUEUE_MASK].len =
635
			    cmd_desc->len;
636
			used->idx++;
637
638
			dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
639
			    VIOBLK_QUEUE_MASK;
640
			if (write_mem(q_gpa, vr, vr_sz))
641
				log_warnx("wr vioblk: error writing vio ring");
642
			break;
643
		case VIRTIO_BLK_T_FLUSH:
644
		case VIRTIO_BLK_T_FLUSH_OUT:
645
			ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
646
			ds_desc = &desc[ds_desc_idx];
647
648
			ds = VIRTIO_BLK_S_OK;
649
			if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
650
				log_warnx("fl vioblk: can't write device status "
651
				    "data @ 0x%llx", ds_desc->addr);
652
				dump_descriptor_chain(desc, cmd_desc_idx);
653
				goto out;
654
			}
655
656
			ret = 1;
657
			dev->cfg.isr_status = 1;
658
			used->ring[used->idx & VIOBLK_QUEUE_MASK].id =
659
			    cmd_desc_idx;
660
			used->ring[used->idx & VIOBLK_QUEUE_MASK].len =
661
			    cmd_desc->len;
662
			used->idx++;
663
664
			dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
665
			    VIOBLK_QUEUE_MASK;
666
			if (write_mem(q_gpa, vr, vr_sz)) {
667
				log_warnx("fl vioblk: error writing vio ring");
668
			}
669
			break;
670
		default:
671
			log_warnx("%s: unsupported command 0x%x", __func__,
672
			    cmd.type);
673
674
			ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
675
			ds_desc = &desc[ds_desc_idx];
676
677
			ds = VIRTIO_BLK_S_UNSUPP;
678
			if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
679
				log_warnx("%s: get id : can't write device "
680
				    "status data @ 0x%llx", __func__,
681
				    ds_desc->addr);
682
				dump_descriptor_chain(desc, cmd_desc_idx);
683
				goto out;
684
			}
685
686
			ret = 1;
687
			dev->cfg.isr_status = 1;
688
			used->ring[used->idx & VIOBLK_QUEUE_MASK].id =
689
			    cmd_desc_idx;
690
			used->ring[used->idx & VIOBLK_QUEUE_MASK].len =
691
			    cmd_desc->len;
692
			used->idx++;
693
694
			dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
695
			    VIOBLK_QUEUE_MASK;
696
			if (write_mem(q_gpa, vr, vr_sz)) {
697
				log_warnx("%s: get id : error writing vio ring",
698
				    __func__);
699
			}
700
			break;
701
		}
702
703
		idx = (idx + 1) & VIOBLK_QUEUE_MASK;
704
	}
705
out:
706
	free(vr);
707
	return (ret);
708
}
709
710
int
711
virtio_blk_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr,
712
    void *cookie, uint8_t sz)
713
{
714
	struct vioblk_dev *dev = (struct vioblk_dev *)cookie;
715
716
	*intr = 0xFF;
717
718
719
	if (dir == 0) {
720
		switch (reg) {
721
		case VIRTIO_CONFIG_DEVICE_FEATURES:
722
		case VIRTIO_CONFIG_QUEUE_SIZE:
723
		case VIRTIO_CONFIG_ISR_STATUS:
724
			log_warnx("%s: illegal write %x to %s",
725
			    __progname, *data, virtio_reg_name(reg));
726
			break;
727
		case VIRTIO_CONFIG_GUEST_FEATURES:
728
			dev->cfg.guest_feature = *data;
729
			break;
730
		case VIRTIO_CONFIG_QUEUE_ADDRESS:
731
			dev->cfg.queue_address = *data;
732
			vioblk_update_qa(dev);
733
			break;
734
		case VIRTIO_CONFIG_QUEUE_SELECT:
735
			dev->cfg.queue_select = *data;
736
			vioblk_update_qs(dev);
737
			break;
738
		case VIRTIO_CONFIG_QUEUE_NOTIFY:
739
			dev->cfg.queue_notify = *data;
740
			if (vioblk_notifyq(dev))
741
				*intr = 1;
742
			break;
743
		case VIRTIO_CONFIG_DEVICE_STATUS:
744
			dev->cfg.device_status = *data;
745
			if (dev->cfg.device_status == 0) {
746
				log_debug("%s: device reset", __func__);
747
				dev->cfg.guest_feature = 0;
748
				dev->cfg.queue_address = 0;
749
				vioblk_update_qa(dev);
750
				dev->cfg.queue_size = 0;
751
				vioblk_update_qs(dev);
752
				dev->cfg.queue_select = 0;
753
				dev->cfg.queue_notify = 0;
754
				dev->cfg.isr_status = 0;
755
				dev->vq[0].last_avail = 0;
756
			}
757
			break;
758
		default:
759
			break;
760
		}
761
	} else {
762
		switch (reg) {
763
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI:
764
			switch (sz) {
765
			case 4:
766
				*data = (uint32_t)(dev->sz);
767
				break;
768
			case 2:
769
				*data &= 0xFFFF0000;
770
				*data |= (uint32_t)(dev->sz) & 0xFFFF;
771
				break;
772
			case 1:
773
				*data &= 0xFFFFFF00;
774
				*data |= (uint32_t)(dev->sz) & 0xFF;
775
				break;
776
			}
777
			/* XXX handle invalid sz */
778
			break;
779
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1:
780
			if (sz == 1) {
781
				*data &= 0xFFFFFF00;
782
				*data |= (uint32_t)(dev->sz >> 8) & 0xFF;
783
			}
784
			/* XXX handle invalid sz */
785
			break;
786
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2:
787
			if (sz == 1) {
788
				*data &= 0xFFFFFF00;
789
				*data |= (uint32_t)(dev->sz >> 16) & 0xFF;
790
			} else if (sz == 2) {
791
				*data &= 0xFFFF0000;
792
				*data |= (uint32_t)(dev->sz >> 16) & 0xFFFF;
793
			}
794
			/* XXX handle invalid sz */
795
			break;
796
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3:
797
			if (sz == 1) {
798
				*data &= 0xFFFFFF00;
799
				*data |= (uint32_t)(dev->sz >> 24) & 0xFF;
800
			}
801
			/* XXX handle invalid sz */
802
			break;
803
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4:
804
			switch (sz) {
805
			case 4:
806
				*data = (uint32_t)(dev->sz >> 32);
807
				break;
808
			case 2:
809
				*data &= 0xFFFF0000;
810
				*data |= (uint32_t)(dev->sz >> 32) & 0xFFFF;
811
				break;
812
			case 1:
813
				*data &= 0xFFFFFF00;
814
				*data |= (uint32_t)(dev->sz >> 32) & 0xFF;
815
				break;
816
			}
817
			/* XXX handle invalid sz */
818
			break;
819
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5:
820
			if (sz == 1) {
821
				*data &= 0xFFFFFF00;
822
				*data |= (uint32_t)(dev->sz >> 40) & 0xFF;
823
			}
824
			/* XXX handle invalid sz */
825
			break;
826
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 6:
827
			if (sz == 1) {
828
				*data &= 0xFFFFFF00;
829
				*data |= (uint32_t)(dev->sz >> 48) & 0xFF;
830
			} else if (sz == 2) {
831
				*data &= 0xFFFF0000;
832
				*data |= (uint32_t)(dev->sz >> 48) & 0xFFFF;
833
			}
834
			/* XXX handle invalid sz */
835
			break;
836
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 7:
837
			if (sz == 1) {
838
				*data &= 0xFFFFFF00;
839
				*data |= (uint32_t)(dev->sz >> 56) & 0xFF;
840
			}
841
			/* XXX handle invalid sz */
842
			break;
843
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8:
844
			switch (sz) {
845
			case 4:
846
				*data = (uint32_t)(dev->max_xfer);
847
				break;
848
			case 2:
849
				*data &= 0xFFFF0000;
850
				*data |= (uint32_t)(dev->max_xfer) & 0xFFFF;
851
				break;
852
			case 1:
853
				*data &= 0xFFFFFF00;
854
				*data |= (uint32_t)(dev->max_xfer) & 0xFF;
855
				break;
856
			}
857
			/* XXX handle invalid sz */
858
			break;
859
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 9:
860
			if (sz == 1) {
861
				*data &= 0xFFFFFF00;
862
				*data |= (uint32_t)(dev->max_xfer >> 8) & 0xFF;
863
			}
864
			/* XXX handle invalid sz */
865
			break;
866
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 10:
867
			if (sz == 1) {
868
				*data &= 0xFFFFFF00;
869
				*data |= (uint32_t)(dev->max_xfer >> 16) & 0xFF;
870
			} else if (sz == 2) {
871
				*data &= 0xFFFF0000;
872
				*data |= (uint32_t)(dev->max_xfer >> 16) & 0xFFFF;
873
			}
874
			/* XXX handle invalid sz */
875
			break;
876
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 11:
877
			if (sz == 1) {
878
				*data &= 0xFFFFFF00;
879
				*data |= (uint32_t)(dev->max_xfer >> 24) & 0xFF;
880
			}
881
			/* XXX handle invalid sz */
882
			break;
883
		case VIRTIO_CONFIG_DEVICE_FEATURES:
884
			*data = dev->cfg.device_feature;
885
			break;
886
		case VIRTIO_CONFIG_GUEST_FEATURES:
887
			*data = dev->cfg.guest_feature;
888
			break;
889
		case VIRTIO_CONFIG_QUEUE_ADDRESS:
890
			*data = dev->cfg.queue_address;
891
			break;
892
		case VIRTIO_CONFIG_QUEUE_SIZE:
893
			if (sz == 4)
894
				*data = dev->cfg.queue_size;
895
			else if (sz == 2) {
896
				*data &= 0xFFFF0000;
897
				*data |= (uint16_t)dev->cfg.queue_size;
898
			} else if (sz == 1) {
899
				*data &= 0xFFFFFF00;
900
				*data |= (uint8_t)dev->cfg.queue_size;
901
			}
902
			break;
903
		case VIRTIO_CONFIG_QUEUE_SELECT:
904
			*data = dev->cfg.queue_select;
905
			break;
906
		case VIRTIO_CONFIG_QUEUE_NOTIFY:
907
			*data = dev->cfg.queue_notify;
908
			break;
909
		case VIRTIO_CONFIG_DEVICE_STATUS:
910
			if (sz == 4)
911
				*data = dev->cfg.device_status;
912
			else if (sz == 2) {
913
				*data &= 0xFFFF0000;
914
				*data |= (uint16_t)dev->cfg.device_status;
915
			} else if (sz == 1) {
916
				*data &= 0xFFFFFF00;
917
				*data |= (uint8_t)dev->cfg.device_status;
918
			}
919
			break;
920
		case VIRTIO_CONFIG_ISR_STATUS:
921
			*data = dev->cfg.isr_status;
922
			dev->cfg.isr_status = 0;
923
			break;
924
		}
925
	}
926
	return (0);
927
}
928
929
int
930
virtio_net_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr,
931
    void *cookie, uint8_t sz)
932
{
933
	struct vionet_dev *dev = (struct vionet_dev *)cookie;
934
935
	*intr = 0xFF;
936
	mutex_lock(&dev->mutex);
937
938
	if (dir == 0) {
939
		switch (reg) {
940
		case VIRTIO_CONFIG_DEVICE_FEATURES:
941
		case VIRTIO_CONFIG_QUEUE_SIZE:
942
		case VIRTIO_CONFIG_ISR_STATUS:
943
			log_warnx("%s: illegal write %x to %s",
944
			    __progname, *data, virtio_reg_name(reg));
945
			break;
946
		case VIRTIO_CONFIG_GUEST_FEATURES:
947
			dev->cfg.guest_feature = *data;
948
			break;
949
		case VIRTIO_CONFIG_QUEUE_ADDRESS:
950
			dev->cfg.queue_address = *data;
951
			vionet_update_qa(dev);
952
			break;
953
		case VIRTIO_CONFIG_QUEUE_SELECT:
954
			dev->cfg.queue_select = *data;
955
			vionet_update_qs(dev);
956
			break;
957
		case VIRTIO_CONFIG_QUEUE_NOTIFY:
958
			dev->cfg.queue_notify = *data;
959
			if (vionet_notifyq(dev))
960
				*intr = 1;
961
			break;
962
		case VIRTIO_CONFIG_DEVICE_STATUS:
963
			dev->cfg.device_status = *data;
964
			if (dev->cfg.device_status == 0) {
965
				log_debug("%s: device reset", __func__);
966
				dev->cfg.guest_feature = 0;
967
				dev->cfg.queue_address = 0;
968
				vionet_update_qa(dev);
969
				dev->cfg.queue_size = 0;
970
				vionet_update_qs(dev);
971
				dev->cfg.queue_select = 0;
972
				dev->cfg.queue_notify = 0;
973
				dev->cfg.isr_status = 0;
974
				dev->vq[0].last_avail = 0;
975
				dev->vq[0].notified_avail = 0;
976
				dev->vq[1].last_avail = 0;
977
				dev->vq[1].notified_avail = 0;
978
			}
979
			break;
980
		default:
981
			break;
982
		}
983
	} else {
984
		switch (reg) {
985
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI:
986
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1:
987
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2:
988
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3:
989
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4:
990
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5:
991
			*data = dev->mac[reg -
992
			    VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI];
993
			break;
994
		case VIRTIO_CONFIG_DEVICE_FEATURES:
995
			*data = dev->cfg.device_feature;
996
			break;
997
		case VIRTIO_CONFIG_GUEST_FEATURES:
998
			*data = dev->cfg.guest_feature;
999
			break;
1000
		case VIRTIO_CONFIG_QUEUE_ADDRESS:
1001
			*data = dev->cfg.queue_address;
1002
			break;
1003
		case VIRTIO_CONFIG_QUEUE_SIZE:
1004
			*data = dev->cfg.queue_size;
1005
			break;
1006
		case VIRTIO_CONFIG_QUEUE_SELECT:
1007
			*data = dev->cfg.queue_select;
1008
			break;
1009
		case VIRTIO_CONFIG_QUEUE_NOTIFY:
1010
			*data = dev->cfg.queue_notify;
1011
			break;
1012
		case VIRTIO_CONFIG_DEVICE_STATUS:
1013
			*data = dev->cfg.device_status;
1014
			break;
1015
		case VIRTIO_CONFIG_ISR_STATUS:
1016
			*data = dev->cfg.isr_status;
1017
			dev->cfg.isr_status = 0;
1018
			break;
1019
		}
1020
	}
1021
1022
	mutex_unlock(&dev->mutex);
1023
	return (0);
1024
}
1025
1026
/*
1027
 * Must be called with dev->mutex acquired.
1028
 */
1029
void
1030
vionet_update_qa(struct vionet_dev *dev)
1031
{
1032
	/* Invalid queue? */
1033
	if (dev->cfg.queue_select > 1)
1034
		return;
1035
1036
	dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address;
1037
}
1038
1039
/*
1040
 * Must be called with dev->mutex acquired.
1041
 */
1042
void
1043
vionet_update_qs(struct vionet_dev *dev)
1044
{
1045
	/* Invalid queue? */
1046
	if (dev->cfg.queue_select > 1) {
1047
		dev->cfg.queue_size = 0;
1048
		return;
1049
	}
1050
1051
	/* Update queue address/size based on queue select */
1052
	dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa;
1053
	dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
1054
}
1055
1056
/*
1057
 * Must be called with dev->mutex acquired.
1058
 */
1059
int
1060
vionet_enq_rx(struct vionet_dev *dev, char *pkt, ssize_t sz, int *spc)
1061
{
1062
	uint64_t q_gpa;
1063
	uint32_t vr_sz;
1064
	uint16_t idx, pkt_desc_idx, hdr_desc_idx;
1065
	ptrdiff_t off;
1066
	int ret;
1067
	char *vr;
1068
	struct vring_desc *desc, *pkt_desc, *hdr_desc;
1069
	struct vring_avail *avail;
1070
	struct vring_used *used;
1071
	struct vring_used_elem *ue;
1072
1073
	ret = 0;
1074
1075
	if (!(dev->cfg.device_status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK))
1076
		return ret;
1077
1078
	vr_sz = vring_size(VIONET_QUEUE_SIZE);
1079
	q_gpa = dev->vq[0].qa;
1080
	q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
1081
1082
	vr = calloc(1, vr_sz);
1083
	if (vr == NULL) {
1084
		log_warn("rx enq: calloc error getting vionet ring");
1085
		return (0);
1086
	}
1087
1088
	if (read_mem(q_gpa, vr, vr_sz)) {
1089
		log_warnx("rx enq: error reading gpa 0x%llx", q_gpa);
1090
		goto out;
1091
	}
1092
1093
	/* Compute offsets in ring of descriptors, avail ring, and used ring */
1094
	desc = (struct vring_desc *)(vr);
1095
	avail = (struct vring_avail *)(vr + dev->vq[0].vq_availoffset);
1096
	used = (struct vring_used *)(vr + dev->vq[0].vq_usedoffset);
1097
1098
	idx = dev->vq[0].last_avail & VIONET_QUEUE_MASK;
1099
1100
	if ((dev->vq[0].notified_avail & VIONET_QUEUE_MASK) == idx) {
1101
		log_debug("vionet queue notify - no space, dropping packet");
1102
		goto out;
1103
	}
1104
1105
	hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK;
1106
	hdr_desc = &desc[hdr_desc_idx];
1107
1108
	pkt_desc_idx = hdr_desc->next & VIONET_QUEUE_MASK;
1109
	pkt_desc = &desc[pkt_desc_idx];
1110
1111
	/* must be not readable */
1112
	if ((pkt_desc->flags & VRING_DESC_F_WRITE) == 0) {
1113
		log_warnx("unexpected readable rx descriptor %d",
1114
		    pkt_desc_idx);
1115
		goto out;
1116
	}
1117
1118
	/* Write packet to descriptor ring */
1119
	if (write_mem(pkt_desc->addr, pkt, sz)) {
1120
		log_warnx("vionet: rx enq packet write_mem error @ "
1121
		    "0x%llx", pkt_desc->addr);
1122
		goto out;
1123
	}
1124
1125
	ret = 1;
1126
	dev->cfg.isr_status = 1;
1127
	ue = &used->ring[used->idx & VIONET_QUEUE_MASK];
1128
	ue->id = hdr_desc_idx;
1129
	ue->len = hdr_desc->len + sz;
1130
	used->idx++;
1131
	dev->vq[0].last_avail = (dev->vq[0].last_avail + 1);
1132
	*spc = dev->vq[0].notified_avail - dev->vq[0].last_avail;
1133
1134
	off = (char *)ue - vr;
1135
	if (write_mem(q_gpa + off, ue, sizeof *ue))
1136
		log_warnx("vionet: error writing vio ring");
1137
	else {
1138
		off = (char *)&used->idx - vr;
1139
		if (write_mem(q_gpa + off, &used->idx, sizeof used->idx))
1140
			log_warnx("vionet: error writing vio ring");
1141
	}
1142
out:
1143
	free(vr);
1144
	return (ret);
1145
}
1146
1147
/*
1148
 * vionet_rx
1149
 *
1150
 * Enqueue data that was received on a tap file descriptor
1151
 * to the vionet device queue.
1152
 *
1153
 * Must be called with dev->mutex acquired.
1154
 */
1155
static int
1156
vionet_rx(struct vionet_dev *dev)
1157
{
1158
	char buf[PAGE_SIZE];
1159
	int hasdata, num_enq = 0, spc = 0;
1160
	struct ether_header *eh;
1161
	ssize_t sz;
1162
1163
	do {
1164
		sz = read(dev->fd, buf, sizeof buf);
1165
		if (sz == -1) {
1166
			/*
1167
			 * If we get EAGAIN, No data is currently available.
1168
			 * Do not treat this as an error.
1169
			 */
1170
			if (errno != EAGAIN)
1171
				log_warn("unexpected read error on vionet "
1172
				    "device");
1173
		} else if (sz != 0) {
1174
			eh = (struct ether_header *)buf;
1175
			if (!dev->lockedmac || sz < ETHER_HDR_LEN ||
1176
			    ETHER_IS_MULTICAST(eh->ether_dhost) ||
1177
			    memcmp(eh->ether_dhost, dev->mac,
1178
			    sizeof(eh->ether_dhost)) == 0)
1179
				num_enq += vionet_enq_rx(dev, buf, sz, &spc);
1180
		} else if (sz == 0) {
1181
			log_debug("process_rx: no data");
1182
			hasdata = 0;
1183
			break;
1184
		}
1185
1186
		hasdata = fd_hasdata(dev->fd);
1187
	} while (spc && hasdata);
1188
1189
	dev->rx_pending = hasdata;
1190
	return (num_enq);
1191
}
1192
1193
/*
1194
 * vionet_rx_event
1195
 *
1196
 * Called from the event handling thread when new data can be
1197
 * received on the tap fd of a vionet device.
1198
 */
1199
static void
1200
vionet_rx_event(int fd, short kind, void *arg)
1201
{
1202
	struct vionet_dev *dev = arg;
1203
1204
	mutex_lock(&dev->mutex);
1205
1206
	/*
1207
	 * We already have other data pending to be received. The data that
1208
	 * has become available now will be enqueued to the vionet_dev
1209
	 * later.
1210
	 */
1211
	if (dev->rx_pending) {
1212
		mutex_unlock(&dev->mutex);
1213
		return;
1214
	}
1215
1216
	if (vionet_rx(dev) > 0) {
1217
		/* XXX: vcpu_id */
1218
		vcpu_assert_pic_irq(dev->vm_id, 0, dev->irq);
1219
	}
1220
1221
	mutex_unlock(&dev->mutex);
1222
}
1223
1224
/*
1225
 * vionet_process_rx
1226
 *
1227
 * Processes any remaining pending receivable data for a vionet device.
1228
 * Called on VCPU exit. Although we poll on the tap file descriptor of
1229
 * a vionet_dev in a separate thread, this function still needs to be
1230
 * called on VCPU exit: it can happen that not all data fits into the
1231
 * receive queue of the vionet_dev immediately. So any outstanding data
1232
 * is handled here.
1233
 *
1234
 * Parameters:
1235
 *  vm_id: VM ID of the VM for which to process vionet events
1236
 */
1237
void
1238
vionet_process_rx(uint32_t vm_id)
1239
{
1240
	int i;
1241
1242
	for (i = 0 ; i < nr_vionet; i++) {
1243
		mutex_lock(&vionet[i].mutex);
1244
		if (!vionet[i].rx_added) {
1245
			mutex_unlock(&vionet[i].mutex);
1246
			continue;
1247
		}
1248
1249
		if (vionet[i].rx_pending) {
1250
			if (vionet_rx(&vionet[i])) {
1251
				vcpu_assert_pic_irq(vm_id, 0, vionet[i].irq);
1252
			}
1253
		}
1254
		mutex_unlock(&vionet[i].mutex);
1255
	}
1256
}
1257
1258
/*
1259
 * Must be called with dev->mutex acquired.
1260
 */
1261
void
1262
vionet_notify_rx(struct vionet_dev *dev)
1263
{
1264
	uint64_t q_gpa;
1265
	uint32_t vr_sz;
1266
	char *vr;
1267
	struct vring_avail *avail;
1268
1269
	vr_sz = vring_size(VIONET_QUEUE_SIZE);
1270
	q_gpa = dev->vq[dev->cfg.queue_notify].qa;
1271
	q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
1272
1273
	vr = malloc(vr_sz);
1274
	if (vr == NULL) {
1275
		log_warn("malloc error getting vionet ring");
1276
		return;
1277
	}
1278
1279
	if (read_mem(q_gpa, vr, vr_sz)) {
1280
		log_warnx("error reading gpa 0x%llx", q_gpa);
1281
		free(vr);
1282
		return;
1283
	}
1284
1285
	/* Compute offset into avail ring */
1286
	avail = (struct vring_avail *)(vr +
1287
	    dev->vq[dev->cfg.queue_notify].vq_availoffset);
1288
1289
	dev->rx_added = 1;
1290
	dev->vq[0].notified_avail = avail->idx;
1291
1292
	free(vr);
1293
}
1294
1295
/*
1296
 * Must be called with dev->mutex acquired.
1297
 *
1298
 * XXX cant trust ring data from VM, be extra cautious.
1299
 * XXX advertise link status to guest
1300
 */
1301
int
1302
vionet_notifyq(struct vionet_dev *dev)
1303
{
1304
	uint64_t q_gpa;
1305
	uint32_t vr_sz;
1306
	uint16_t idx, pkt_desc_idx, hdr_desc_idx, dxx;
1307
	size_t pktsz;
1308
	ssize_t dhcpsz;
1309
	int ret, num_enq, ofs, spc;
1310
	char *vr, *pkt, *dhcppkt;
1311
	struct vring_desc *desc, *pkt_desc, *hdr_desc;
1312
	struct vring_avail *avail;
1313
	struct vring_used *used;
1314
	struct ether_header *eh;
1315
1316
	vr = pkt = dhcppkt = NULL;
1317
	ret = spc = 0;
1318
	dhcpsz = 0;
1319
1320
	/* Invalid queue? */
1321
	if (dev->cfg.queue_notify != 1) {
1322
		vionet_notify_rx(dev);
1323
		goto out;
1324
	}
1325
1326
	vr_sz = vring_size(VIONET_QUEUE_SIZE);
1327
	q_gpa = dev->vq[dev->cfg.queue_notify].qa;
1328
	q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
1329
1330
	vr = calloc(1, vr_sz);
1331
	if (vr == NULL) {
1332
		log_warn("calloc error getting vionet ring");
1333
		goto out;
1334
	}
1335
1336
	if (read_mem(q_gpa, vr, vr_sz)) {
1337
		log_warnx("error reading gpa 0x%llx", q_gpa);
1338
		goto out;
1339
	}
1340
1341
	/* Compute offsets in ring of descriptors, avail ring, and used ring */
1342
	desc = (struct vring_desc *)(vr);
1343
	avail = (struct vring_avail *)(vr +
1344
	    dev->vq[dev->cfg.queue_notify].vq_availoffset);
1345
	used = (struct vring_used *)(vr +
1346
	    dev->vq[dev->cfg.queue_notify].vq_usedoffset);
1347
1348
	num_enq = 0;
1349
1350
	idx = dev->vq[dev->cfg.queue_notify].last_avail & VIONET_QUEUE_MASK;
1351
1352
	if ((avail->idx & VIONET_QUEUE_MASK) == idx) {
1353
		log_warnx("vionet tx queue notify - nothing to do?");
1354
		goto out;
1355
	}
1356
1357
	while ((avail->idx & VIONET_QUEUE_MASK) != idx) {
1358
		hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK;
1359
		hdr_desc = &desc[hdr_desc_idx];
1360
		pktsz = 0;
1361
1362
		dxx = hdr_desc_idx;
1363
		do {
1364
			pktsz += desc[dxx].len;
1365
			dxx = desc[dxx].next;
1366
		} while (desc[dxx].flags & VRING_DESC_F_NEXT);
1367
1368
		pktsz += desc[dxx].len;
1369
1370
		/* Remove virtio header descriptor len */
1371
		pktsz -= hdr_desc->len;
1372
1373
		/*
1374
		 * XXX check sanity pktsz
1375
		 * XXX too long and  > PAGE_SIZE checks
1376
		 *     (PAGE_SIZE can be relaxed to 16384 later)
1377
		 */
1378
		pkt = malloc(pktsz);
1379
		if (pkt == NULL) {
1380
			log_warn("malloc error alloc packet buf");
1381
			goto out;
1382
		}
1383
1384
		ofs = 0;
1385
		pkt_desc_idx = hdr_desc->next & VIONET_QUEUE_MASK;
1386
		pkt_desc = &desc[pkt_desc_idx];
1387
1388
		while (pkt_desc->flags & VRING_DESC_F_NEXT) {
1389
			/* must be not writable */
1390
			if (pkt_desc->flags & VRING_DESC_F_WRITE) {
1391
				log_warnx("unexpected writable tx desc "
1392
				    "%d", pkt_desc_idx);
1393
				goto out;
1394
			}
1395
1396
			/* Read packet from descriptor ring */
1397
			if (read_mem(pkt_desc->addr, pkt + ofs,
1398
			    pkt_desc->len)) {
1399
				log_warnx("vionet: packet read_mem error "
1400
				    "@ 0x%llx", pkt_desc->addr);
1401
				goto out;
1402
			}
1403
1404
			ofs += pkt_desc->len;
1405
			pkt_desc_idx = pkt_desc->next & VIONET_QUEUE_MASK;
1406
			pkt_desc = &desc[pkt_desc_idx];
1407
		}
1408
1409
		/* Now handle tail descriptor - must be not writable */
1410
		if (pkt_desc->flags & VRING_DESC_F_WRITE) {
1411
			log_warnx("unexpected writable tx descriptor %d",
1412
			    pkt_desc_idx);
1413
			goto out;
1414
		}
1415
1416
		/* Read packet from descriptor ring */
1417
		if (read_mem(pkt_desc->addr, pkt + ofs,
1418
		    pkt_desc->len)) {
1419
			log_warnx("vionet: packet read_mem error @ "
1420
			    "0x%llx", pkt_desc->addr);
1421
			goto out;
1422
		}
1423
1424
		/* reject other source addresses */
1425
		if (dev->lockedmac && pktsz >= ETHER_HDR_LEN &&
1426
		    (eh = (struct ether_header *)pkt) &&
1427
		    memcmp(eh->ether_shost, dev->mac,
1428
		    sizeof(eh->ether_shost)) != 0)
1429
			log_debug("vionet: wrong source address %s for vm %d",
1430
			    ether_ntoa((struct ether_addr *)
1431
			    eh->ether_shost), dev->vm_id);
1432
		else if (dev->local && dhcpsz == 0 &&
1433
		    (dhcpsz = dhcp_request(dev, pkt, pktsz, &dhcppkt)) != -1) {
1434
			log_debug("vionet: dhcp request,"
1435
			    " local response size %zd", dhcpsz);
1436
1437
		/* XXX signed vs unsigned here, funky cast */
1438
		} else if (write(dev->fd, pkt, pktsz) != (int)pktsz) {
1439
			log_warnx("vionet: tx failed writing to tap: "
1440
			    "%d", errno);
1441
			goto out;
1442
		}
1443
1444
		ret = 1;
1445
		dev->cfg.isr_status = 1;
1446
		used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_desc_idx;
1447
		used->ring[used->idx & VIONET_QUEUE_MASK].len = hdr_desc->len;
1448
		used->idx++;
1449
1450
		dev->vq[dev->cfg.queue_notify].last_avail =
1451
		    (dev->vq[dev->cfg.queue_notify].last_avail + 1);
1452
		num_enq++;
1453
1454
		idx = dev->vq[dev->cfg.queue_notify].last_avail &
1455
		    VIONET_QUEUE_MASK;
1456
	}
1457
1458
	if (write_mem(q_gpa, vr, vr_sz)) {
1459
		log_warnx("vionet: tx error writing vio ring");
1460
	}
1461
1462
	if (dhcpsz > 0) {
1463
		if (vionet_enq_rx(dev, dhcppkt, dhcpsz, &spc))
1464
			ret = 1;
1465
	}
1466
1467
out:
1468
	free(vr);
1469
	free(pkt);
1470
	free(dhcppkt);
1471
1472
	return (ret);
1473
}
1474
1475
int
1476
vmmci_ctl(unsigned int cmd)
1477
{
1478
	struct timeval tv = { 0, 0 };
1479
1480
	if ((vmmci.cfg.device_status &
1481
	    VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) == 0)
1482
		return (-1);
1483
1484
	if (cmd == vmmci.cmd)
1485
		return (0);
1486
1487
	switch (cmd) {
1488
	case VMMCI_NONE:
1489
		break;
1490
	case VMMCI_SHUTDOWN:
1491
	case VMMCI_REBOOT:
1492
		/* Update command */
1493
		vmmci.cmd = cmd;
1494
1495
		/*
1496
		 * vmm VMs do not support powerdown, send a reboot request
1497
		 * instead and turn it off after the triple fault.
1498
		 */
1499
		if (cmd == VMMCI_SHUTDOWN)
1500
			cmd = VMMCI_REBOOT;
1501
1502
		/* Trigger interrupt */
1503
		vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE;
1504
		vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq);
1505
1506
		/* Add ACK timeout */
1507
		tv.tv_sec = VMMCI_TIMEOUT;
1508
		evtimer_add(&vmmci.timeout, &tv);
1509
		break;
1510
	case VMMCI_SYNCRTC:
1511
		if (vmmci.cfg.guest_feature & VMMCI_F_SYNCRTC) {
1512
			/* RTC updated, request guest VM resync of its RTC */
1513
			vmmci.cmd = cmd;
1514
1515
			vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE;
1516
			vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq);
1517
		} else {
1518
			log_debug("%s: RTC sync skipped (guest does not "
1519
			    "support RTC sync)\n", __func__);
1520
		}
1521
		break;
1522
	default:
1523
		fatalx("invalid vmmci command: %d", cmd);
1524
	}
1525
1526
	return (0);
1527
}
1528
1529
void
1530
vmmci_ack(unsigned int cmd)
1531
{
1532
	struct timeval	 tv = { 0, 0 };
1533
1534
	switch (cmd) {
1535
	case VMMCI_NONE:
1536
		break;
1537
	case VMMCI_SHUTDOWN:
1538
		/*
1539
		 * The shutdown was requested by the VM if we don't have
1540
		 * a pending shutdown request.  In this case add a short
1541
		 * timeout to give the VM a chance to reboot before the
1542
		 * timer is expired.
1543
		 */
1544
		if (vmmci.cmd == 0) {
1545
			log_debug("%s: vm %u requested shutdown", __func__,
1546
			    vmmci.vm_id);
1547
			tv.tv_sec = VMMCI_TIMEOUT;
1548
			evtimer_add(&vmmci.timeout, &tv);
1549
			return;
1550
		}
1551
		/* FALLTHROUGH */
1552
	case VMMCI_REBOOT:
1553
		/*
1554
		 * If the VM acknowleged our shutdown request, give it
1555
		 * enough time to shutdown or reboot gracefully.  This
1556
		 * might take a considerable amount of time (running
1557
		 * rc.shutdown on the VM), so increase the timeout before
1558
		 * killing it forcefully.
1559
		 */
1560
		if (cmd == vmmci.cmd &&
1561
		    evtimer_pending(&vmmci.timeout, NULL)) {
1562
			log_debug("%s: vm %u acknowledged shutdown request",
1563
			    __func__, vmmci.vm_id);
1564
			tv.tv_sec = VMMCI_SHUTDOWN_TIMEOUT;
1565
			evtimer_add(&vmmci.timeout, &tv);
1566
		}
1567
		break;
1568
	case VMMCI_SYNCRTC:
1569
		log_debug("%s: vm %u acknowledged RTC sync request",
1570
		    __func__, vmmci.vm_id);
1571
		vmmci.cmd = VMMCI_NONE;
1572
		break;
1573
	default:
1574
		log_warnx("%s: illegal request %u", __func__, cmd);
1575
		break;
1576
	}
1577
}
1578
1579
void
1580
vmmci_timeout(int fd, short type, void *arg)
1581
{
1582
	log_debug("%s: vm %u shutdown", __progname, vmmci.vm_id);
1583
	vm_shutdown(vmmci.cmd == VMMCI_REBOOT ? VMMCI_REBOOT : VMMCI_SHUTDOWN);
1584
}
1585
1586
int
1587
vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr,
1588
    void *unused, uint8_t sz)
1589
{
1590
	*intr = 0xFF;
1591
1592
	if (dir == 0) {
1593
		switch (reg) {
1594
		case VIRTIO_CONFIG_DEVICE_FEATURES:
1595
		case VIRTIO_CONFIG_QUEUE_SIZE:
1596
		case VIRTIO_CONFIG_ISR_STATUS:
1597
			log_warnx("%s: illegal write %x to %s",
1598
			    __progname, *data, virtio_reg_name(reg));
1599
			break;
1600
		case VIRTIO_CONFIG_GUEST_FEATURES:
1601
			vmmci.cfg.guest_feature = *data;
1602
			break;
1603
		case VIRTIO_CONFIG_QUEUE_ADDRESS:
1604
			vmmci.cfg.queue_address = *data;
1605
			break;
1606
		case VIRTIO_CONFIG_QUEUE_SELECT:
1607
			vmmci.cfg.queue_select = *data;
1608
			break;
1609
		case VIRTIO_CONFIG_QUEUE_NOTIFY:
1610
			vmmci.cfg.queue_notify = *data;
1611
			break;
1612
		case VIRTIO_CONFIG_DEVICE_STATUS:
1613
			vmmci.cfg.device_status = *data;
1614
			break;
1615
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI:
1616
			vmmci_ack(*data);
1617
			break;
1618
		}
1619
	} else {
1620
		switch (reg) {
1621
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI:
1622
			*data = vmmci.cmd;
1623
			break;
1624
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4:
1625
			/* Update time once when reading the first register */
1626
			gettimeofday(&vmmci.time, NULL);
1627
			*data = (uint64_t)vmmci.time.tv_sec;
1628
			break;
1629
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8:
1630
			*data = (uint64_t)vmmci.time.tv_sec << 32;
1631
			break;
1632
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12:
1633
			*data = (uint64_t)vmmci.time.tv_usec;
1634
			break;
1635
		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16:
1636
			*data = (uint64_t)vmmci.time.tv_usec << 32;
1637
			break;
1638
		case VIRTIO_CONFIG_DEVICE_FEATURES:
1639
			*data = vmmci.cfg.device_feature;
1640
			break;
1641
		case VIRTIO_CONFIG_GUEST_FEATURES:
1642
			*data = vmmci.cfg.guest_feature;
1643
			break;
1644
		case VIRTIO_CONFIG_QUEUE_ADDRESS:
1645
			*data = vmmci.cfg.queue_address;
1646
			break;
1647
		case VIRTIO_CONFIG_QUEUE_SIZE:
1648
			*data = vmmci.cfg.queue_size;
1649
			break;
1650
		case VIRTIO_CONFIG_QUEUE_SELECT:
1651
			*data = vmmci.cfg.queue_select;
1652
			break;
1653
		case VIRTIO_CONFIG_QUEUE_NOTIFY:
1654
			*data = vmmci.cfg.queue_notify;
1655
			break;
1656
		case VIRTIO_CONFIG_DEVICE_STATUS:
1657
			*data = vmmci.cfg.device_status;
1658
			break;
1659
		case VIRTIO_CONFIG_ISR_STATUS:
1660
			*data = vmmci.cfg.isr_status;
1661
			vmmci.cfg.isr_status = 0;
1662
			break;
1663
		}
1664
	}
1665
	return (0);
1666
}
1667
1668
void
1669
virtio_init(struct vmd_vm *vm, int *child_disks, int *child_taps)
1670
{
1671
	struct vmop_create_params *vmc = &vm->vm_params;
1672
	struct vm_create_params *vcp = &vmc->vmc_params;
1673
	uint8_t id;
1674
	uint8_t i;
1675
	int ret;
1676
	off_t sz;
1677
1678
	/* Virtio entropy device */
1679
	if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
1680
	    PCI_PRODUCT_QUMRANET_VIO_RNG, PCI_CLASS_SYSTEM,
1681
	    PCI_SUBCLASS_SYSTEM_MISC,
1682
	    PCI_VENDOR_OPENBSD,
1683
	    PCI_PRODUCT_VIRTIO_ENTROPY, 1, NULL)) {
1684
		log_warnx("%s: can't add PCI virtio rng device",
1685
		    __progname);
1686
		return;
1687
	}
1688
1689
	if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_rnd_io, NULL)) {
1690
		log_warnx("%s: can't add bar for virtio rng device",
1691
		    __progname);
1692
		return;
1693
	}
1694
1695
	memset(&viornd, 0, sizeof(viornd));
1696
	viornd.vq[0].qs = VIORND_QUEUE_SIZE;
1697
	viornd.vq[0].vq_availoffset = sizeof(struct vring_desc) *
1698
	    VIORND_QUEUE_SIZE;
1699
	viornd.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN(
1700
	    sizeof(struct vring_desc) * VIORND_QUEUE_SIZE
1701
	    + sizeof(uint16_t) * (2 + VIORND_QUEUE_SIZE));
1702
	viornd.pci_id = id;
1703
1704
	if (vcp->vcp_ndisks > 0) {
1705
		nr_vioblk = vcp->vcp_ndisks;
1706
		vioblk = calloc(vcp->vcp_ndisks, sizeof(struct vioblk_dev));
1707
		if (vioblk == NULL) {
1708
			log_warn("%s: calloc failure allocating vioblks",
1709
			    __progname);
1710
			return;
1711
		}
1712
1713
		/* One virtio block device for each disk defined in vcp */
1714
		for (i = 0; i < vcp->vcp_ndisks; i++) {
1715
			if ((sz = lseek(child_disks[i], 0, SEEK_END)) == -1)
1716
				continue;
1717
1718
			if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
1719
			    PCI_PRODUCT_QUMRANET_VIO_BLOCK,
1720
			    PCI_CLASS_MASS_STORAGE,
1721
			    PCI_SUBCLASS_MASS_STORAGE_SCSI,
1722
			    PCI_VENDOR_OPENBSD,
1723
			    PCI_PRODUCT_VIRTIO_BLOCK, 1, NULL)) {
1724
				log_warnx("%s: can't add PCI virtio block "
1725
				    "device", __progname);
1726
				return;
1727
			}
1728
			if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_blk_io,
1729
			    &vioblk[i])) {
1730
				log_warnx("%s: can't add bar for virtio block "
1731
				    "device", __progname);
1732
				return;
1733
			}
1734
			vioblk[i].vq[0].qs = VIOBLK_QUEUE_SIZE;
1735
			vioblk[i].vq[0].vq_availoffset =
1736
			    sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE;
1737
			vioblk[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN(
1738
			    sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE
1739
			    + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE));
1740
			vioblk[i].vq[0].last_avail = 0;
1741
			vioblk[i].fd = child_disks[i];
1742
			vioblk[i].sz = sz / 512;
1743
			vioblk[i].cfg.device_feature = VIRTIO_BLK_F_SIZE_MAX;
1744
			vioblk[i].max_xfer = 1048576;
1745
			vioblk[i].pci_id = id;
1746
		}
1747
	}
1748
1749
	if (vcp->vcp_nnics > 0) {
1750
		vionet = calloc(vcp->vcp_nnics, sizeof(struct vionet_dev));
1751
		if (vionet == NULL) {
1752
			log_warn("%s: calloc failure allocating vionets",
1753
			    __progname);
1754
			return;
1755
		}
1756
1757
		nr_vionet = vcp->vcp_nnics;
1758
		/* Virtio network */
1759
		for (i = 0; i < vcp->vcp_nnics; i++) {
1760
			if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
1761
			    PCI_PRODUCT_QUMRANET_VIO_NET, PCI_CLASS_SYSTEM,
1762
			    PCI_SUBCLASS_SYSTEM_MISC,
1763
			    PCI_VENDOR_OPENBSD,
1764
			    PCI_PRODUCT_VIRTIO_NETWORK, 1, NULL)) {
1765
				log_warnx("%s: can't add PCI virtio net device",
1766
				    __progname);
1767
				return;
1768
			}
1769
1770
			if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_net_io,
1771
			    &vionet[i])) {
1772
				log_warnx("%s: can't add bar for virtio net "
1773
				    "device", __progname);
1774
				return;
1775
			}
1776
1777
			ret = pthread_mutex_init(&vionet[i].mutex, NULL);
1778
			if (ret) {
1779
				errno = ret;
1780
				log_warn("%s: could not initialize mutex "
1781
				    "for vionet device", __progname);
1782
				return;
1783
			}
1784
1785
			vionet[i].vq[0].qs = VIONET_QUEUE_SIZE;
1786
			vionet[i].vq[0].vq_availoffset =
1787
			    sizeof(struct vring_desc) * VIONET_QUEUE_SIZE;
1788
			vionet[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN(
1789
			    sizeof(struct vring_desc) * VIONET_QUEUE_SIZE
1790
			    + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE));
1791
			vionet[i].vq[0].last_avail = 0;
1792
			vionet[i].vq[1].qs = VIONET_QUEUE_SIZE;
1793
			vionet[i].vq[1].vq_availoffset =
1794
			    sizeof(struct vring_desc) * VIONET_QUEUE_SIZE;
1795
			vionet[i].vq[1].vq_usedoffset = VIRTQUEUE_ALIGN(
1796
			    sizeof(struct vring_desc) * VIONET_QUEUE_SIZE
1797
			    + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE));
1798
			vionet[i].vq[1].last_avail = 0;
1799
			vionet[i].vq[1].notified_avail = 0;
1800
			vionet[i].fd = child_taps[i];
1801
			vionet[i].rx_pending = 0;
1802
			vionet[i].vm_id = vcp->vcp_id;
1803
			vionet[i].vm_vmid = vm->vm_vmid;
1804
			vionet[i].irq = pci_get_dev_irq(id);
1805
1806
			event_set(&vionet[i].event, vionet[i].fd,
1807
			    EV_READ | EV_PERSIST, vionet_rx_event, &vionet[i]);
1808
			if (event_add(&vionet[i].event, NULL)) {
1809
				log_warn("could not initialize vionet event "
1810
				    "handler");
1811
				return;
1812
			}
1813
1814
			/* MAC address has been assigned by the parent */
1815
			memcpy(&vionet[i].mac, &vcp->vcp_macs[i], 6);
1816
			vionet[i].cfg.device_feature = VIRTIO_NET_F_MAC;
1817
1818
			vionet[i].lockedmac =
1819
			    vmc->vmc_ifflags[i] & VMIFF_LOCKED ? 1 : 0;
1820
			vionet[i].local =
1821
			    vmc->vmc_ifflags[i] & VMIFF_LOCAL ? 1 : 0;
1822
			vionet[i].idx = i;
1823
			vionet[i].pci_id = id;
1824
1825
			log_debug("%s: vm \"%s\" vio%u lladdr %s%s%s",
1826
			    __func__, vcp->vcp_name, i,
1827
			    ether_ntoa((void *)vionet[i].mac),
1828
			    vionet[i].lockedmac ? ", locked" : "",
1829
			    vionet[i].local ? ", local" : "");
1830
		}
1831
	}
1832
1833
	/* virtio control device */
1834
	if (pci_add_device(&id, PCI_VENDOR_OPENBSD,
1835
	    PCI_PRODUCT_OPENBSD_CONTROL,
1836
	    PCI_CLASS_COMMUNICATIONS,
1837
	    PCI_SUBCLASS_COMMUNICATIONS_MISC,
1838
	    PCI_VENDOR_OPENBSD,
1839
	    PCI_PRODUCT_VIRTIO_VMMCI, 1, NULL)) {
1840
		log_warnx("%s: can't add PCI vmm control device",
1841
		    __progname);
1842
		return;
1843
	}
1844
1845
	if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vmmci_io, NULL)) {
1846
		log_warnx("%s: can't add bar for vmm control device",
1847
		    __progname);
1848
		return;
1849
	}
1850
1851
	memset(&vmmci, 0, sizeof(vmmci));
1852
	vmmci.cfg.device_feature = VMMCI_F_TIMESYNC | VMMCI_F_ACK |
1853
	    VMMCI_F_SYNCRTC;
1854
	vmmci.vm_id = vcp->vcp_id;
1855
	vmmci.irq = pci_get_dev_irq(id);
1856
	vmmci.pci_id = id;
1857
1858
	evtimer_set(&vmmci.timeout, vmmci_timeout, NULL);
1859
}
1860
1861
int
1862
vmmci_restore(int fd, uint32_t vm_id)
1863
{
1864
	log_debug("%s: receiving vmmci", __func__);
1865
	if (atomicio(read, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) {
1866
		log_warnx("%s: error reading vmmci from fd", __func__);
1867
		return (-1);
1868
	}
1869
1870
	if (pci_set_bar_fn(vmmci.pci_id, 0, vmmci_io, NULL)) {
1871
		log_warnx("%s: can't set bar fn for vmm control device",
1872
		    __progname);
1873
		return (-1);
1874
	}
1875
	vmmci.vm_id = vm_id;
1876
	memset(&vmmci.timeout, 0, sizeof(struct event));
1877
	evtimer_set(&vmmci.timeout, vmmci_timeout, NULL);
1878
	return (0);
1879
}
1880
1881
int
1882
viornd_restore(int fd)
1883
{
1884
	log_debug("%s: receiving viornd", __func__);
1885
	if (atomicio(read, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) {
1886
		log_warnx("%s: error reading viornd from fd", __func__);
1887
		return (-1);
1888
	}
1889
	if (pci_set_bar_fn(viornd.pci_id, 0, virtio_rnd_io, NULL)) {
1890
		log_warnx("%s: can't set bar fn for virtio rng device",
1891
		    __progname);
1892
		return (-1);
1893
	}
1894
	return (0);
1895
}
1896
1897
int
1898
vionet_restore(int fd, struct vmd_vm *vm, int *child_taps)
1899
{
1900
	struct vmop_create_params *vmc = &vm->vm_params;
1901
	struct vm_create_params *vcp = &vmc->vmc_params;
1902
	uint8_t i;
1903
	int ret;
1904
1905
	nr_vionet = vcp->vcp_nnics;
1906
	if (vcp->vcp_nnics > 0) {
1907
		vionet = calloc(vcp->vcp_nnics, sizeof(struct vionet_dev));
1908
		if (vionet == NULL) {
1909
			log_warn("%s: calloc failure allocating vionets",
1910
			    __progname);
1911
			return (-1);
1912
		}
1913
		log_debug("%s: receiving vionet", __func__);
1914
		if (atomicio(read, fd, vionet,
1915
		    vcp->vcp_nnics * sizeof(struct vionet_dev)) !=
1916
		    vcp->vcp_nnics * sizeof(struct vionet_dev)) {
1917
			log_warnx("%s: error reading vionet from fd",
1918
			    __func__);
1919
			return (-1);
1920
		}
1921
1922
		/* Virtio network */
1923
		for (i = 0; i < vcp->vcp_nnics; i++) {
1924
			if (pci_set_bar_fn(vionet[i].pci_id, 0, virtio_net_io,
1925
			    &vionet[i])) {
1926
				log_warnx("%s: can't set bar fn for virtio net "
1927
				    "device", __progname);
1928
				return (-1);
1929
			}
1930
1931
			memset(&vionet[i].mutex, 0, sizeof(pthread_mutex_t));
1932
			ret = pthread_mutex_init(&vionet[i].mutex, NULL);
1933
1934
			if (ret) {
1935
				errno = ret;
1936
				log_warn("%s: could not initialize mutex "
1937
				    "for vionet device", __progname);
1938
				return (-1);
1939
			}
1940
			vionet[i].fd = child_taps[i];
1941
			vionet[i].rx_pending = 0;
1942
			vionet[i].vm_id = vcp->vcp_id;
1943
			vionet[i].vm_vmid = vm->vm_vmid;
1944
1945
			memset(&vionet[i].event, 0, sizeof(struct event));
1946
			event_set(&vionet[i].event, vionet[i].fd,
1947
			    EV_READ | EV_PERSIST, vionet_rx_event, &vionet[i]);
1948
			if (event_add(&vionet[i].event, NULL)) {
1949
				log_warn("could not initialize vionet event "
1950
				    "handler");
1951
				return (-1);
1952
			}
1953
		}
1954
	}
1955
	return (0);
1956
}
1957
1958
int
1959
vioblk_restore(int fd, struct vm_create_params *vcp, int *child_disks)
1960
{
1961
	uint8_t i;
1962
	off_t sz;
1963
1964
	nr_vioblk = vcp->vcp_ndisks;
1965
	vioblk = calloc(vcp->vcp_ndisks, sizeof(struct vioblk_dev));
1966
	if (vioblk == NULL) {
1967
		log_warn("%s: calloc failure allocating vioblks", __progname);
1968
		return (-1);
1969
	}
1970
	log_debug("%s: receiving vioblk", __func__);
1971
	if (atomicio(read, fd, vioblk,
1972
	    nr_vioblk * sizeof(struct vioblk_dev)) !=
1973
	    nr_vioblk * sizeof(struct vioblk_dev)) {
1974
		log_warnx("%s: error reading vioblk from fd", __func__);
1975
		return (-1);
1976
	}
1977
	for (i = 0; i < vcp->vcp_ndisks; i++) {
1978
		if ((sz = lseek(child_disks[i], 0, SEEK_END)) == -1)
1979
			continue;
1980
1981
		if (pci_set_bar_fn(vioblk[i].pci_id, 0, virtio_blk_io,
1982
		    &vioblk[i])) {
1983
			log_warnx("%s: can't set bar fn for virtio block "
1984
			    "device", __progname);
1985
			return (-1);
1986
		}
1987
		vioblk[i].fd = child_disks[i];
1988
	}
1989
	return (0);
1990
}
1991
1992
int
1993
virtio_restore(int fd, struct vmd_vm *vm, int *child_disks, int *child_taps)
1994
{
1995
	struct vmop_create_params *vmc = &vm->vm_params;
1996
	struct vm_create_params *vcp = &vmc->vmc_params;
1997
	int ret;
1998
1999
	if ((ret = viornd_restore(fd)) == -1)
2000
		return ret;
2001
2002
	if ((ret = vioblk_restore(fd, vcp, child_disks)) == -1)
2003
		return ret;
2004
2005
	if ((ret = vionet_restore(fd, vm, child_taps)) == -1)
2006
		return ret;
2007
2008
	if ((ret = vmmci_restore(fd, vcp->vcp_id)) == -1)
2009
		return ret;
2010
2011
	return (0);
2012
}
2013
2014
int
2015
viornd_dump(int fd)
2016
{
2017
	log_debug("%s: sending viornd", __func__);
2018
	if (atomicio(vwrite, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) {
2019
		log_warnx("%s: error writing viornd to fd", __func__);
2020
		return (-1);
2021
	}
2022
	return (0);
2023
}
2024
2025
int
2026
vmmci_dump(int fd)
2027
{
2028
	log_debug("%s: sending vmmci", __func__);
2029
	if (atomicio(vwrite, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) {
2030
		log_warnx("%s: error writing vmmci to fd", __func__);
2031
		return (-1);
2032
	}
2033
	return (0);
2034
}
2035
2036
int
2037
vionet_dump(int fd)
2038
{
2039
	log_debug("%s: sending vionet", __func__);
2040
	if (atomicio(vwrite, fd, vionet,
2041
	    nr_vionet * sizeof(struct vionet_dev)) !=
2042
	    nr_vionet * sizeof(struct vionet_dev)) {
2043
		log_warnx("%s: error writing vionet to fd", __func__);
2044
		return (-1);
2045
	}
2046
	return (0);
2047
}
2048
2049
int
2050
vioblk_dump(int fd)
2051
{
2052
	log_debug("%s: sending vioblk", __func__);
2053
	if (atomicio(vwrite, fd, vioblk,
2054
	    nr_vioblk * sizeof(struct vioblk_dev)) !=
2055
	    nr_vioblk * sizeof(struct vioblk_dev)) {
2056
		log_warnx("%s: error writing vioblk to fd", __func__);
2057
		return (-1);
2058
	}
2059
	return (0);
2060
}
2061
2062
int
2063
virtio_dump(int fd)
2064
{
2065
	int ret;
2066
2067
	if ((ret = viornd_dump(fd)) == -1)
2068
		return ret;
2069
2070
	if ((ret = vioblk_dump(fd)) == -1)
2071
		return ret;
2072
2073
	if ((ret = vionet_dump(fd)) == -1)
2074
		return ret;
2075
2076
	if ((ret = vmmci_dump(fd)) == -1)
2077
		return ret;
2078
2079
	return (0);
2080
}