GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: lib/libpcap/optimize.c Lines: 55 779 7.1 %
Date: 2017-11-13 Branches: 28 598 4.7 %

Line Branch Exec Source
1
/*	$OpenBSD: optimize.c,v 1.19 2016/02/05 16:58:39 canacar Exp $	*/
2
3
/*
4
 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
5
 *	The Regents of the University of California.  All rights reserved.
6
 *
7
 * Redistribution and use in source and binary forms, with or without
8
 * modification, are permitted provided that: (1) source code distributions
9
 * retain the above copyright notice and this paragraph in its entirety, (2)
10
 * distributions including binary code include the above copyright notice and
11
 * this paragraph in its entirety in the documentation or other materials
12
 * provided with the distribution, and (3) all advertising materials mentioning
13
 * features or use of this software display the following acknowledgement:
14
 * ``This product includes software developed by the University of California,
15
 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
16
 * the University nor the names of its contributors may be used to endorse
17
 * or promote products derived from this software without specific prior
18
 * written permission.
19
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
20
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
21
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
22
 *
23
 *  Optimization module for tcpdump intermediate representation.
24
 */
25
26
#include <sys/types.h>
27
#include <sys/time.h>
28
29
#include <stdio.h>
30
#include <stdlib.h>
31
#include <stdint.h>
32
#include <string.h>
33
34
#include "pcap-int.h"
35
36
#include "gencode.h"
37
38
#ifdef HAVE_OS_PROTO_H
39
#include "os-proto.h"
40
#endif
41
42
#ifdef BDEBUG
43
extern int dflag;
44
#endif
45
46
#define A_ATOM BPF_MEMWORDS
47
#define X_ATOM (BPF_MEMWORDS+1)
48
49
#define NOP -1
50
51
/*
52
 * This define is used to represent *both* the accumulator and
53
 * x register in use-def computations.
54
 * Currently, the use-def code assumes only one definition per instruction.
55
 */
56
#define AX_ATOM N_ATOMS
57
58
/*
59
 * A flag to indicate that further optimization is needed.
60
 * Iterative passes are continued until a given pass yields no
61
 * branch movement.
62
 */
63
static int done;
64
65
/*
66
 * A block is marked if only if its mark equals the current mark.
67
 * Rather than traverse the code array, marking each item, 'cur_mark' is
68
 * incremented.  This automatically makes each element unmarked.
69
 */
70
static int cur_mark;
71
#define isMarked(p) ((p)->mark == cur_mark)
72
#define unMarkAll() cur_mark += 1
73
#define Mark(p) ((p)->mark = cur_mark)
74
75
static void opt_init(struct block *);
76
static void opt_cleanup(void);
77
78
static void make_marks(struct block *);
79
static void mark_code(struct block *);
80
81
static void intern_blocks(struct block *);
82
83
static int eq_slist(struct slist *, struct slist *);
84
85
static void find_levels_r(struct block *);
86
87
static void find_levels(struct block *);
88
static void find_dom(struct block *);
89
static void propedom(struct edge *);
90
static void find_edom(struct block *);
91
static void find_closure(struct block *);
92
static int atomuse(struct stmt *);
93
static int atomdef(struct stmt *);
94
static void compute_local_ud(struct block *);
95
static void find_ud(struct block *);
96
static void init_val(void);
97
static int F(int, int, int);
98
static __inline void vstore(struct stmt *, int *, int, int);
99
static void opt_blk(struct block *, int);
100
static int use_conflict(struct block *, struct block *);
101
static void opt_j(struct edge *);
102
static void or_pullup(struct block *);
103
static void and_pullup(struct block *);
104
static void opt_blks(struct block *, int);
105
static __inline void link_inedge(struct edge *, struct block *);
106
static void find_inedges(struct block *);
107
static void opt_root(struct block **);
108
static void opt_loop(struct block *, int);
109
static void fold_op(struct stmt *, int, int);
110
static __inline struct slist *this_op(struct slist *);
111
static void opt_not(struct block *);
112
static void opt_peep(struct block *);
113
static void opt_stmt(struct stmt *, int[], int);
114
static void deadstmt(struct stmt *, struct stmt *[]);
115
static void opt_deadstores(struct block *);
116
static void opt_blk(struct block *, int);
117
static int use_conflict(struct block *, struct block *);
118
static void opt_j(struct edge *);
119
static struct block *fold_edge(struct block *, struct edge *);
120
static __inline int eq_blk(struct block *, struct block *);
121
static int slength(struct slist *);
122
static int count_blocks(struct block *);
123
static void number_blks_r(struct block *);
124
static int count_stmts(struct block *);
125
static int convert_code_r(struct block *);
126
#ifdef BDEBUG
127
static void opt_dump(struct block *);
128
#endif
129
130
static int n_blocks;
131
struct block **blocks;
132
static int n_edges;
133
struct edge **edges;
134
135
/*
136
 * A bit vector set representation of the dominators.
137
 * We round up the set size to the next power of two.
138
 */
139
static int nodewords;
140
static int edgewords;
141
struct block **levels;
142
bpf_u_int32 *space1;
143
bpf_u_int32 *space2;
144
#define BITS_PER_WORD (8*sizeof(bpf_u_int32))
145
/*
146
 * True if a is in uset {p}
147
 */
148
#define SET_MEMBER(p, a) \
149
((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
150
151
/*
152
 * Add 'a' to uset p.
153
 */
154
#define SET_INSERT(p, a) \
155
(p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
156
157
/*
158
 * Delete 'a' from uset p.
159
 */
160
#define SET_DELETE(p, a) \
161
(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
162
163
/*
164
 * a := a intersect b
165
 */
166
#define SET_INTERSECT(a, b, n)\
167
{\
168
	bpf_u_int32 *_x = a, *_y = b;\
169
	int _n = n;\
170
	while (--_n >= 0) *_x++ &= *_y++;\
171
}
172
173
/*
174
 * a := a - b
175
 */
176
#define SET_SUBTRACT(a, b, n)\
177
{\
178
	bpf_u_int32 *_x = a, *_y = b;\
179
	int _n = n;\
180
	while (--_n >= 0) *_x++ &=~ *_y++;\
181
}
182
183
/*
184
 * a := a union b
185
 */
186
#define SET_UNION(a, b, n)\
187
{\
188
	bpf_u_int32 *_x = a, *_y = b;\
189
	int _n = n;\
190
	while (--_n >= 0) *_x++ |= *_y++;\
191
}
192
193
static uset all_dom_sets;
194
static uset all_closure_sets;
195
static uset all_edge_sets;
196
197
#ifndef MAX
198
#define MAX(a,b) ((a)>(b)?(a):(b))
199
#endif
200
201
static void
202
find_levels_r(b)
203
	struct block *b;
204
{
205
	int level;
206
207
	if (isMarked(b))
208
		return;
209
210
	Mark(b);
211
	b->link = 0;
212
213
	if (JT(b)) {
214
		find_levels_r(JT(b));
215
		find_levels_r(JF(b));
216
		level = MAX(JT(b)->level, JF(b)->level) + 1;
217
	} else
218
		level = 0;
219
	b->level = level;
220
	b->link = levels[level];
221
	levels[level] = b;
222
}
223
224
/*
225
 * Level graph.  The levels go from 0 at the leaves to
226
 * N_LEVELS at the root.  The levels[] array points to the
227
 * first node of the level list, whose elements are linked
228
 * with the 'link' field of the struct block.
229
 */
230
static void
231
find_levels(root)
232
	struct block *root;
233
{
234
	memset((char *)levels, 0, n_blocks * sizeof(*levels));
235
	unMarkAll();
236
	find_levels_r(root);
237
}
238
239
/*
240
 * Find dominator relationships.
241
 * Assumes graph has been leveled.
242
 */
243
static void
244
find_dom(root)
245
	struct block *root;
246
{
247
	int i;
248
	struct block *b;
249
	bpf_u_int32 *x;
250
251
	/*
252
	 * Initialize sets to contain all nodes.
253
	 */
254
	x = all_dom_sets;
255
	i = n_blocks * nodewords;
256
	while (--i >= 0)
257
		*x++ = ~0;
258
	/* Root starts off empty. */
259
	for (i = nodewords; --i >= 0;)
260
		root->dom[i] = 0;
261
262
	/* root->level is the highest level no found. */
263
	for (i = root->level; i >= 0; --i) {
264
		for (b = levels[i]; b; b = b->link) {
265
			SET_INSERT(b->dom, b->id);
266
			if (JT(b) == 0)
267
				continue;
268
			SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
269
			SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
270
		}
271
	}
272
}
273
274
static void
275
propedom(ep)
276
	struct edge *ep;
277
{
278
	SET_INSERT(ep->edom, ep->id);
279
	if (ep->succ) {
280
		SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
281
		SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
282
	}
283
}
284
285
/*
286
 * Compute edge dominators.
287
 * Assumes graph has been leveled and predecessors established.
288
 */
289
static void
290
find_edom(root)
291
	struct block *root;
292
{
293
	int i;
294
	uset x;
295
	struct block *b;
296
297
	x = all_edge_sets;
298
	for (i = n_edges * edgewords; --i >= 0; )
299
		x[i] = ~0;
300
301
	/* root->level is the highest level no found. */
302
	memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
303
	memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
304
	for (i = root->level; i >= 0; --i) {
305
		for (b = levels[i]; b != 0; b = b->link) {
306
			propedom(&b->et);
307
			propedom(&b->ef);
308
		}
309
	}
310
}
311
312
/*
313
 * Find the backwards transitive closure of the flow graph.  These sets
314
 * are backwards in the sense that we find the set of nodes that reach
315
 * a given node, not the set of nodes that can be reached by a node.
316
 *
317
 * Assumes graph has been leveled.
318
 */
319
static void
320
find_closure(root)
321
	struct block *root;
322
{
323
	int i;
324
	struct block *b;
325
326
	/*
327
	 * Initialize sets to contain no nodes.
328
	 */
329
	memset((char *)all_closure_sets, 0,
330
	      n_blocks * nodewords * sizeof(*all_closure_sets));
331
332
	/* root->level is the highest level no found. */
333
	for (i = root->level; i >= 0; --i) {
334
		for (b = levels[i]; b; b = b->link) {
335
			SET_INSERT(b->closure, b->id);
336
			if (JT(b) == 0)
337
				continue;
338
			SET_UNION(JT(b)->closure, b->closure, nodewords);
339
			SET_UNION(JF(b)->closure, b->closure, nodewords);
340
		}
341
	}
342
}
343
344
/*
345
 * Return the register number that is used by s.  If A and X are both
346
 * used, return AX_ATOM.  If no register is used, return -1.
347
 *
348
 * The implementation should probably change to an array access.
349
 */
350
static int
351
atomuse(s)
352
	struct stmt *s;
353
{
354
	int c = s->code;
355
356
	if (c == NOP)
357
		return -1;
358
359
	switch (BPF_CLASS(c)) {
360
361
	case BPF_RET:
362
		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
363
			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
364
365
	case BPF_LD:
366
	case BPF_LDX:
367
		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
368
			(BPF_MODE(c) == BPF_MEM) ? s->k : -1;
369
370
	case BPF_ST:
371
		return A_ATOM;
372
373
	case BPF_STX:
374
		return X_ATOM;
375
376
	case BPF_JMP:
377
	case BPF_ALU:
378
		if (BPF_SRC(c) == BPF_X)
379
			return AX_ATOM;
380
		return A_ATOM;
381
382
	case BPF_MISC:
383
		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
384
	}
385
	abort();
386
	/* NOTREACHED */
387
}
388
389
/*
390
 * Return the register number that is defined by 's'.  We assume that
391
 * a single stmt cannot define more than one register.  If no register
392
 * is defined, return -1.
393
 *
394
 * The implementation should probably change to an array access.
395
 */
396
static int
397
atomdef(s)
398
	struct stmt *s;
399
{
400
	if (s->code == NOP)
401
		return -1;
402
403
	switch (BPF_CLASS(s->code)) {
404
405
	case BPF_LD:
406
	case BPF_ALU:
407
		return A_ATOM;
408
409
	case BPF_LDX:
410
		return X_ATOM;
411
412
	case BPF_ST:
413
	case BPF_STX:
414
		return s->k;
415
416
	case BPF_MISC:
417
		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
418
	}
419
	return -1;
420
}
421
422
static void
423
compute_local_ud(b)
424
	struct block *b;
425
{
426
	struct slist *s;
427
	atomset def = 0, use = 0, kill = 0;
428
	int atom;
429
430
	for (s = b->stmts; s; s = s->next) {
431
		if (s->s.code == NOP)
432
			continue;
433
		atom = atomuse(&s->s);
434
		if (atom >= 0) {
435
			if (atom == AX_ATOM) {
436
				if (!ATOMELEM(def, X_ATOM))
437
					use |= ATOMMASK(X_ATOM);
438
				if (!ATOMELEM(def, A_ATOM))
439
					use |= ATOMMASK(A_ATOM);
440
			}
441
			else if (atom < N_ATOMS) {
442
				if (!ATOMELEM(def, atom))
443
					use |= ATOMMASK(atom);
444
			}
445
			else
446
				abort();
447
		}
448
		atom = atomdef(&s->s);
449
		if (atom >= 0) {
450
			if (!ATOMELEM(use, atom))
451
				kill |= ATOMMASK(atom);
452
			def |= ATOMMASK(atom);
453
		}
454
	}
455
	if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
456
		use |= ATOMMASK(A_ATOM);
457
458
	b->def = def;
459
	b->kill = kill;
460
	b->in_use = use;
461
}
462
463
/*
464
 * Assume graph is already leveled.
465
 */
466
static void
467
find_ud(root)
468
	struct block *root;
469
{
470
	int i, maxlevel;
471
	struct block *p;
472
473
	/*
474
	 * root->level is the highest level no found;
475
	 * count down from there.
476
	 */
477
	maxlevel = root->level;
478
	for (i = maxlevel; i >= 0; --i)
479
		for (p = levels[i]; p; p = p->link) {
480
			compute_local_ud(p);
481
			p->out_use = 0;
482
		}
483
484
	for (i = 1; i <= maxlevel; ++i) {
485
		for (p = levels[i]; p; p = p->link) {
486
			p->out_use |= JT(p)->in_use | JF(p)->in_use;
487
			p->in_use |= p->out_use &~ p->kill;
488
		}
489
	}
490
}
491
492
/*
493
 * These data structures are used in a Cocke and Shwarz style
494
 * value numbering scheme.  Since the flowgraph is acyclic,
495
 * exit values can be propagated from a node's predecessors
496
 * provided it is uniquely defined.
497
 */
498
struct valnode {
499
	int code;
500
	int v0, v1;
501
	int val;
502
	struct valnode *next;
503
};
504
505
#define MODULUS 213
506
static struct valnode *hashtbl[MODULUS];
507
static int curval;
508
static int maxval;
509
510
/* Integer constants mapped with the load immediate opcode. */
511
#define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
512
513
struct vmapinfo {
514
	int is_const;
515
	bpf_int32 const_val;
516
};
517
518
struct vmapinfo *vmap;
519
struct valnode *vnode_base;
520
struct valnode *next_vnode;
521
522
static void
523
init_val()
524
{
525
	curval = 0;
526
	next_vnode = vnode_base;
527
	memset((char *)vmap, 0, maxval * sizeof(*vmap));
528
	memset((char *)hashtbl, 0, sizeof hashtbl);
529
}
530
531
/* Because we really don't have an IR, this stuff is a little messy. */
532
static int
533
F(code, v0, v1)
534
	int code;
535
	int v0, v1;
536
{
537
	u_int hash;
538
	int val;
539
	struct valnode *p;
540
541
	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
542
	hash %= MODULUS;
543
544
	for (p = hashtbl[hash]; p; p = p->next)
545
		if (p->code == code && p->v0 == v0 && p->v1 == v1)
546
			return p->val;
547
548
	val = ++curval;
549
	if (BPF_MODE(code) == BPF_IMM &&
550
	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
551
		vmap[val].const_val = v0;
552
		vmap[val].is_const = 1;
553
	}
554
	p = next_vnode++;
555
	p->val = val;
556
	p->code = code;
557
	p->v0 = v0;
558
	p->v1 = v1;
559
	p->next = hashtbl[hash];
560
	hashtbl[hash] = p;
561
562
	return val;
563
}
564
565
static __inline void
566
vstore(s, valp, newval, alter)
567
	struct stmt *s;
568
	int *valp;
569
	int newval;
570
	int alter;
571
{
572
	if (alter && *valp == newval)
573
		s->code = NOP;
574
	else
575
		*valp = newval;
576
}
577
578
static void
579
fold_op(s, v0, v1)
580
	struct stmt *s;
581
	int v0, v1;
582
{
583
	bpf_int32 a, b;
584
585
	a = vmap[v0].const_val;
586
	b = vmap[v1].const_val;
587
588
	switch (BPF_OP(s->code)) {
589
	case BPF_ADD:
590
		a += b;
591
		break;
592
593
	case BPF_SUB:
594
		a -= b;
595
		break;
596
597
	case BPF_MUL:
598
		a *= b;
599
		break;
600
601
	case BPF_DIV:
602
		if (b == 0)
603
			bpf_error("division by zero");
604
		a /= b;
605
		break;
606
607
	case BPF_AND:
608
		a &= b;
609
		break;
610
611
	case BPF_OR:
612
		a |= b;
613
		break;
614
615
	case BPF_LSH:
616
		a <<= b;
617
		break;
618
619
	case BPF_RSH:
620
		a >>= b;
621
		break;
622
623
	case BPF_NEG:
624
		a = -a;
625
		break;
626
627
	default:
628
		abort();
629
	}
630
	s->k = a;
631
	s->code = BPF_LD|BPF_IMM;
632
	done = 0;
633
}
634
635
static __inline struct slist *
636
this_op(s)
637
	struct slist *s;
638
{
639
	while (s != 0 && s->s.code == NOP)
640
		s = s->next;
641
	return s;
642
}
643
644
static void
645
opt_not(b)
646
	struct block *b;
647
{
648
	struct block *tmp = JT(b);
649
650
	JT(b) = JF(b);
651
	JF(b) = tmp;
652
}
653
654
static void
655
opt_peep(b)
656
	struct block *b;
657
{
658
	struct slist *s;
659
	struct slist *next, *last;
660
	int val;
661
662
	s = b->stmts;
663
	if (s == 0)
664
		return;
665
666
	last = s;
667
	while (1) {
668
		s = this_op(s);
669
		if (s == 0)
670
			break;
671
		next = this_op(s->next);
672
		if (next == 0)
673
			break;
674
		last = next;
675
676
		/*
677
		 * st  M[k]	-->	st  M[k]
678
		 * ldx M[k]		tax
679
		 */
680
		if (s->s.code == BPF_ST &&
681
		    next->s.code == (BPF_LDX|BPF_MEM) &&
682
		    s->s.k == next->s.k) {
683
			done = 0;
684
			next->s.code = BPF_MISC|BPF_TAX;
685
		}
686
		/*
687
		 * ld  #k	-->	ldx  #k
688
		 * tax			txa
689
		 */
690
		if (s->s.code == (BPF_LD|BPF_IMM) &&
691
		    next->s.code == (BPF_MISC|BPF_TAX)) {
692
			s->s.code = BPF_LDX|BPF_IMM;
693
			next->s.code = BPF_MISC|BPF_TXA;
694
			done = 0;
695
		}
696
		/*
697
		 * This is an ugly special case, but it happens
698
		 * when you say tcp[k] or udp[k] where k is a constant.
699
		 */
700
		if (s->s.code == (BPF_LD|BPF_IMM)) {
701
			struct slist *add, *tax, *ild;
702
703
			/*
704
			 * Check that X isn't used on exit from this
705
			 * block (which the optimizer might cause).
706
			 * We know the code generator won't generate
707
			 * any local dependencies.
708
			 */
709
			if (ATOMELEM(b->out_use, X_ATOM))
710
				break;
711
712
			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
713
				add = next;
714
			else
715
				add = this_op(next->next);
716
			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
717
				break;
718
719
			tax = this_op(add->next);
720
			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
721
				break;
722
723
			ild = this_op(tax->next);
724
			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
725
			    BPF_MODE(ild->s.code) != BPF_IND)
726
				break;
727
			/*
728
			 * XXX We need to check that X is not
729
			 * subsequently used.  We know we can eliminate the
730
			 * accumulator modifications since it is defined
731
			 * by the last stmt of this sequence.
732
			 *
733
			 * We want to turn this sequence:
734
			 *
735
			 * (004) ldi     #0x2		{s}
736
			 * (005) ldxms   [14]		{next}  -- optional
737
			 * (006) addx			{add}
738
			 * (007) tax			{tax}
739
			 * (008) ild     [x+0]		{ild}
740
			 *
741
			 * into this sequence:
742
			 *
743
			 * (004) nop
744
			 * (005) ldxms   [14]
745
			 * (006) nop
746
			 * (007) nop
747
			 * (008) ild     [x+2]
748
			 *
749
			 */
750
			ild->s.k += s->s.k;
751
			s->s.code = NOP;
752
			add->s.code = NOP;
753
			tax->s.code = NOP;
754
			done = 0;
755
		}
756
		s = next;
757
	}
758
	/*
759
	 * If we have a subtract to do a comparison, and the X register
760
	 * is a known constant, we can merge this value into the
761
	 * comparison.
762
	 */
763
	if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
764
	    !ATOMELEM(b->out_use, A_ATOM)) {
765
		val = b->val[X_ATOM];
766
		if (vmap[val].is_const) {
767
			int op;
768
769
			b->s.k += vmap[val].const_val;
770
			op = BPF_OP(b->s.code);
771
			if (op == BPF_JGT || op == BPF_JGE) {
772
				struct block *t = JT(b);
773
				JT(b) = JF(b);
774
				JF(b) = t;
775
				b->s.k += 0x80000000;
776
			}
777
			last->s.code = NOP;
778
			done = 0;
779
		} else if (b->s.k == 0) {
780
			/*
781
			 * sub x  ->	nop
782
			 * j  #0	j  x
783
			 */
784
			last->s.code = NOP;
785
			b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) |
786
				BPF_X;
787
			done = 0;
788
		}
789
	}
790
	/*
791
	 * Likewise, a constant subtract can be simplified.
792
	 */
793
	else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
794
		 !ATOMELEM(b->out_use, A_ATOM)) {
795
		int op;
796
797
		b->s.k += last->s.k;
798
		last->s.code = NOP;
799
		op = BPF_OP(b->s.code);
800
		if (op == BPF_JGT || op == BPF_JGE) {
801
			struct block *t = JT(b);
802
			JT(b) = JF(b);
803
			JF(b) = t;
804
			b->s.k += 0x80000000;
805
		}
806
		done = 0;
807
	}
808
	/*
809
	 * and #k	nop
810
	 * jeq #0  ->	jset #k
811
	 */
812
	if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
813
	    !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
814
		b->s.k = last->s.k;
815
		b->s.code = BPF_JMP|BPF_K|BPF_JSET;
816
		last->s.code = NOP;
817
		done = 0;
818
		opt_not(b);
819
	}
820
	/*
821
	 * If the accumulator is a known constant, we can compute the
822
	 * comparison result.
823
	 */
824
	val = b->val[A_ATOM];
825
	if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
826
		bpf_int32 v = vmap[val].const_val;
827
		switch (BPF_OP(b->s.code)) {
828
829
		case BPF_JEQ:
830
			v = v == b->s.k;
831
			break;
832
833
		case BPF_JGT:
834
			v = (unsigned)v > b->s.k;
835
			break;
836
837
		case BPF_JGE:
838
			v = (unsigned)v >= b->s.k;
839
			break;
840
841
		case BPF_JSET:
842
			v &= b->s.k;
843
			break;
844
845
		default:
846
			abort();
847
		}
848
		if (JF(b) != JT(b))
849
			done = 0;
850
		if (v)
851
			JF(b) = JT(b);
852
		else
853
			JT(b) = JF(b);
854
	}
855
}
856
857
/*
858
 * Compute the symbolic value of expression of 's', and update
859
 * anything it defines in the value table 'val'.  If 'alter' is true,
860
 * do various optimizations.  This code would be cleaner if symbolic
861
 * evaluation and code transformations weren't folded together.
862
 */
863
static void
864
opt_stmt(s, val, alter)
865
	struct stmt *s;
866
	int val[];
867
	int alter;
868
{
869
	int op;
870
	int v;
871
872
	switch (s->code) {
873
874
	case BPF_LD|BPF_ABS|BPF_W:
875
	case BPF_LD|BPF_ABS|BPF_H:
876
	case BPF_LD|BPF_ABS|BPF_B:
877
		v = F(s->code, s->k, 0L);
878
		vstore(s, &val[A_ATOM], v, alter);
879
		break;
880
881
	case BPF_LD|BPF_IND|BPF_W:
882
	case BPF_LD|BPF_IND|BPF_H:
883
	case BPF_LD|BPF_IND|BPF_B:
884
		v = val[X_ATOM];
885
		if (alter && vmap[v].is_const) {
886
			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
887
			s->k += vmap[v].const_val;
888
			v = F(s->code, s->k, 0L);
889
			done = 0;
890
		}
891
		else
892
			v = F(s->code, s->k, v);
893
		vstore(s, &val[A_ATOM], v, alter);
894
		break;
895
896
	case BPF_LD|BPF_LEN:
897
		v = F(s->code, 0L, 0L);
898
		vstore(s, &val[A_ATOM], v, alter);
899
		break;
900
901
	case BPF_LD|BPF_IMM:
902
		v = K(s->k);
903
		vstore(s, &val[A_ATOM], v, alter);
904
		break;
905
906
	case BPF_LDX|BPF_IMM:
907
		v = K(s->k);
908
		vstore(s, &val[X_ATOM], v, alter);
909
		break;
910
911
	case BPF_LDX|BPF_MSH|BPF_B:
912
		v = F(s->code, s->k, 0L);
913
		vstore(s, &val[X_ATOM], v, alter);
914
		break;
915
916
	case BPF_ALU|BPF_NEG:
917
		if (alter && vmap[val[A_ATOM]].is_const) {
918
			s->code = BPF_LD|BPF_IMM;
919
			s->k = -vmap[val[A_ATOM]].const_val;
920
			val[A_ATOM] = K(s->k);
921
		}
922
		else
923
			val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
924
		break;
925
926
	case BPF_ALU|BPF_ADD|BPF_K:
927
	case BPF_ALU|BPF_SUB|BPF_K:
928
	case BPF_ALU|BPF_MUL|BPF_K:
929
	case BPF_ALU|BPF_DIV|BPF_K:
930
	case BPF_ALU|BPF_AND|BPF_K:
931
	case BPF_ALU|BPF_OR|BPF_K:
932
	case BPF_ALU|BPF_LSH|BPF_K:
933
	case BPF_ALU|BPF_RSH|BPF_K:
934
		op = BPF_OP(s->code);
935
		if (alter) {
936
			if (s->k == 0) {
937
				if (op == BPF_ADD || op == BPF_SUB ||
938
				    op == BPF_LSH || op == BPF_RSH ||
939
				    op == BPF_OR) {
940
					s->code = NOP;
941
					break;
942
				}
943
				if (op == BPF_MUL || op == BPF_AND) {
944
					s->code = BPF_LD|BPF_IMM;
945
					val[A_ATOM] = K(s->k);
946
					break;
947
				}
948
			}
949
			if (vmap[val[A_ATOM]].is_const) {
950
				fold_op(s, val[A_ATOM], K(s->k));
951
				val[A_ATOM] = K(s->k);
952
				break;
953
			}
954
		}
955
		val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
956
		break;
957
958
	case BPF_ALU|BPF_ADD|BPF_X:
959
	case BPF_ALU|BPF_SUB|BPF_X:
960
	case BPF_ALU|BPF_MUL|BPF_X:
961
	case BPF_ALU|BPF_DIV|BPF_X:
962
	case BPF_ALU|BPF_AND|BPF_X:
963
	case BPF_ALU|BPF_OR|BPF_X:
964
	case BPF_ALU|BPF_LSH|BPF_X:
965
	case BPF_ALU|BPF_RSH|BPF_X:
966
		op = BPF_OP(s->code);
967
		if (alter && vmap[val[X_ATOM]].is_const) {
968
			if (vmap[val[A_ATOM]].is_const) {
969
				fold_op(s, val[A_ATOM], val[X_ATOM]);
970
				val[A_ATOM] = K(s->k);
971
			}
972
			else {
973
				s->code = BPF_ALU|BPF_K|op;
974
				s->k = vmap[val[X_ATOM]].const_val;
975
				done = 0;
976
				val[A_ATOM] =
977
					F(s->code, val[A_ATOM], K(s->k));
978
			}
979
			break;
980
		}
981
		/*
982
		 * Check if we're doing something to an accumulator
983
		 * that is 0, and simplify.  This may not seem like
984
		 * much of a simplification but it could open up further
985
		 * optimizations.
986
		 * XXX We could also check for mul by 1, and -1, etc.
987
		 */
988
		if (alter && vmap[val[A_ATOM]].is_const
989
		    && vmap[val[A_ATOM]].const_val == 0) {
990
			if (op == BPF_ADD || op == BPF_OR ||
991
			    op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) {
992
				s->code = BPF_MISC|BPF_TXA;
993
				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
994
				break;
995
			}
996
			else if (op == BPF_MUL || op == BPF_DIV ||
997
				 op == BPF_AND) {
998
				s->code = BPF_LD|BPF_IMM;
999
				s->k = 0;
1000
				vstore(s, &val[A_ATOM], K(s->k), alter);
1001
				break;
1002
			}
1003
			else if (op == BPF_NEG) {
1004
				s->code = NOP;
1005
				break;
1006
			}
1007
		}
1008
		val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1009
		break;
1010
1011
	case BPF_MISC|BPF_TXA:
1012
		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1013
		break;
1014
1015
	case BPF_LD|BPF_MEM:
1016
		v = val[s->k];
1017
		if (alter && vmap[v].is_const) {
1018
			s->code = BPF_LD|BPF_IMM;
1019
			s->k = vmap[v].const_val;
1020
			done = 0;
1021
		}
1022
		vstore(s, &val[A_ATOM], v, alter);
1023
		break;
1024
1025
	case BPF_MISC|BPF_TAX:
1026
		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1027
		break;
1028
1029
	case BPF_LDX|BPF_MEM:
1030
		v = val[s->k];
1031
		if (alter && vmap[v].is_const) {
1032
			s->code = BPF_LDX|BPF_IMM;
1033
			s->k = vmap[v].const_val;
1034
			done = 0;
1035
		}
1036
		vstore(s, &val[X_ATOM], v, alter);
1037
		break;
1038
1039
	case BPF_ST:
1040
		vstore(s, &val[s->k], val[A_ATOM], alter);
1041
		break;
1042
1043
	case BPF_STX:
1044
		vstore(s, &val[s->k], val[X_ATOM], alter);
1045
		break;
1046
	}
1047
}
1048
1049
static void
1050
deadstmt(s, last)
1051
	struct stmt *s;
1052
	struct stmt *last[];
1053
{
1054
	int atom;
1055
1056
	atom = atomuse(s);
1057
	if (atom >= 0) {
1058
		if (atom == AX_ATOM) {
1059
			last[X_ATOM] = 0;
1060
			last[A_ATOM] = 0;
1061
		}
1062
		else
1063
			last[atom] = 0;
1064
	}
1065
	atom = atomdef(s);
1066
	if (atom >= 0) {
1067
		if (last[atom]) {
1068
			done = 0;
1069
			last[atom]->code = NOP;
1070
		}
1071
		last[atom] = s;
1072
	}
1073
}
1074
1075
static void
1076
opt_deadstores(b)
1077
	struct block *b;
1078
{
1079
	struct slist *s;
1080
	int atom;
1081
	struct stmt *last[N_ATOMS];
1082
1083
	memset((char *)last, 0, sizeof last);
1084
1085
	for (s = b->stmts; s != 0; s = s->next)
1086
		deadstmt(&s->s, last);
1087
	deadstmt(&b->s, last);
1088
1089
	for (atom = 0; atom < N_ATOMS; ++atom)
1090
		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1091
			last[atom]->code = NOP;
1092
			done = 0;
1093
		}
1094
}
1095
1096
static void
1097
opt_blk(b, do_stmts)
1098
	struct block *b;
1099
	int do_stmts;
1100
{
1101
	struct slist *s;
1102
	struct edge *p;
1103
	int i;
1104
	bpf_int32 aval;
1105
1106
#if 0
1107
	for (s = b->stmts; s && s->next; s = s->next)
1108
		if (BPF_CLASS(s->s.code) == BPF_JMP) {
1109
			do_stmts = 0;
1110
			break;
1111
		}
1112
#endif
1113
1114
	/*
1115
	 * Initialize the atom values.
1116
	 * If we have no predecessors, everything is undefined.
1117
	 * Otherwise, we inherent our values from our predecessors.
1118
	 * If any register has an ambiguous value (i.e. control paths are
1119
	 * merging) give it the undefined value of 0.
1120
	 */
1121
	p = b->in_edges;
1122
	if (p == 0)
1123
		memset((char *)b->val, 0, sizeof(b->val));
1124
	else {
1125
		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1126
		while ((p = p->next) != NULL) {
1127
			for (i = 0; i < N_ATOMS; ++i)
1128
				if (b->val[i] != p->pred->val[i])
1129
					b->val[i] = 0;
1130
		}
1131
	}
1132
	aval = b->val[A_ATOM];
1133
	for (s = b->stmts; s; s = s->next)
1134
		opt_stmt(&s->s, b->val, do_stmts);
1135
1136
	/*
1137
	 * This is a special case: if we don't use anything from this
1138
	 * block, and we load the accumulator with value that is
1139
	 * already there, or if this block is a return,
1140
	 * eliminate all the statements.
1141
	 */
1142
	if (do_stmts &&
1143
	    ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) ||
1144
	     BPF_CLASS(b->s.code) == BPF_RET)) {
1145
		if (b->stmts != 0) {
1146
			b->stmts = 0;
1147
			done = 0;
1148
		}
1149
	} else {
1150
		opt_peep(b);
1151
		opt_deadstores(b);
1152
	}
1153
	/*
1154
	 * Set up values for branch optimizer.
1155
	 */
1156
	if (BPF_SRC(b->s.code) == BPF_K)
1157
		b->oval = K(b->s.k);
1158
	else
1159
		b->oval = b->val[X_ATOM];
1160
	b->et.code = b->s.code;
1161
	b->ef.code = -b->s.code;
1162
}
1163
1164
/*
1165
 * Return true if any register that is used on exit from 'succ', has
1166
 * an exit value that is different from the corresponding exit value
1167
 * from 'b'.
1168
 */
1169
static int
1170
use_conflict(b, succ)
1171
	struct block *b, *succ;
1172
{
1173
	int atom;
1174
	atomset use = succ->out_use;
1175
1176
	if (use == 0)
1177
		return 0;
1178
1179
	for (atom = 0; atom < N_ATOMS; ++atom)
1180
		if (ATOMELEM(use, atom))
1181
			if (b->val[atom] != succ->val[atom])
1182
				return 1;
1183
	return 0;
1184
}
1185
1186
static struct block *
1187
fold_edge(child, ep)
1188
	struct block *child;
1189
	struct edge *ep;
1190
{
1191
	int sense;
1192
	int aval0, aval1, oval0, oval1;
1193
	int code = ep->code;
1194
1195
	if (code < 0) {
1196
		code = -code;
1197
		sense = 0;
1198
	} else
1199
		sense = 1;
1200
1201
	if (child->s.code != code)
1202
		return 0;
1203
1204
	aval0 = child->val[A_ATOM];
1205
	oval0 = child->oval;
1206
	aval1 = ep->pred->val[A_ATOM];
1207
	oval1 = ep->pred->oval;
1208
1209
	if (aval0 != aval1)
1210
		return 0;
1211
1212
	if (oval0 == oval1)
1213
		/*
1214
		 * The operands are identical, so the
1215
		 * result is true if a true branch was
1216
		 * taken to get here, otherwise false.
1217
		 */
1218
		return sense ? JT(child) : JF(child);
1219
1220
	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1221
		/*
1222
		 * At this point, we only know the comparison if we
1223
		 * came down the true branch, and it was an equality
1224
		 * comparison with a constant.  We rely on the fact that
1225
		 * distinct constants have distinct value numbers.
1226
		 */
1227
		return JF(child);
1228
1229
	return 0;
1230
}
1231
1232
static void
1233
opt_j(ep)
1234
	struct edge *ep;
1235
{
1236
	int i, k;
1237
	struct block *target;
1238
1239
	if (JT(ep->succ) == 0)
1240
		return;
1241
1242
	if (JT(ep->succ) == JF(ep->succ)) {
1243
		/*
1244
		 * Common branch targets can be eliminated, provided
1245
		 * there is no data dependency.
1246
		 */
1247
		if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1248
			done = 0;
1249
			ep->succ = JT(ep->succ);
1250
		}
1251
	}
1252
	/*
1253
	 * For each edge dominator that matches the successor of this
1254
	 * edge, promote the edge successor to the its grandchild.
1255
	 *
1256
	 * XXX We violate the set abstraction here in favor a reasonably
1257
	 * efficient loop.
1258
	 */
1259
 top:
1260
	for (i = 0; i < edgewords; ++i) {
1261
		bpf_u_int32 x = ep->edom[i];
1262
1263
		while (x != 0) {
1264
			k = ffs(x) - 1;
1265
			x &=~ (1 << k);
1266
			k += i * BITS_PER_WORD;
1267
1268
			target = fold_edge(ep->succ, edges[k]);
1269
			/*
1270
			 * Check that there is no data dependency between
1271
			 * nodes that will be violated if we move the edge.
1272
			 */
1273
			if (target != 0 && !use_conflict(ep->pred, target)) {
1274
				done = 0;
1275
				ep->succ = target;
1276
				if (JT(target) != 0)
1277
					/*
1278
					 * Start over unless we hit a leaf.
1279
					 */
1280
					goto top;
1281
				return;
1282
			}
1283
		}
1284
	}
1285
}
1286
1287
1288
static void
1289
or_pullup(b)
1290
	struct block *b;
1291
{
1292
	int val, at_top;
1293
	struct block *pull;
1294
	struct block **diffp, **samep;
1295
	struct edge *ep;
1296
1297
	ep = b->in_edges;
1298
	if (ep == 0)
1299
		return;
1300
1301
	/*
1302
	 * Make sure each predecessor loads the same value.
1303
	 * XXX why?
1304
	 */
1305
	val = ep->pred->val[A_ATOM];
1306
	for (ep = ep->next; ep != 0; ep = ep->next)
1307
		if (val != ep->pred->val[A_ATOM])
1308
			return;
1309
1310
	if (JT(b->in_edges->pred) == b)
1311
		diffp = &JT(b->in_edges->pred);
1312
	else
1313
		diffp = &JF(b->in_edges->pred);
1314
1315
	at_top = 1;
1316
	while (1) {
1317
		if (*diffp == 0)
1318
			return;
1319
1320
		if (JT(*diffp) != JT(b))
1321
			return;
1322
1323
		if (!SET_MEMBER((*diffp)->dom, b->id))
1324
			return;
1325
1326
		if ((*diffp)->val[A_ATOM] != val)
1327
			break;
1328
1329
		diffp = &JF(*diffp);
1330
		at_top = 0;
1331
	}
1332
	samep = &JF(*diffp);
1333
	while (1) {
1334
		if (*samep == 0)
1335
			return;
1336
1337
		if (JT(*samep) != JT(b))
1338
			return;
1339
1340
		if (!SET_MEMBER((*samep)->dom, b->id))
1341
			return;
1342
1343
		if ((*samep)->val[A_ATOM] == val)
1344
			break;
1345
1346
		/* XXX Need to check that there are no data dependencies
1347
		   between dp0 and dp1.  Currently, the code generator
1348
		   will not produce such dependencies. */
1349
		samep = &JF(*samep);
1350
	}
1351
#ifdef notdef
1352
	/* XXX This doesn't cover everything. */
1353
	for (i = 0; i < N_ATOMS; ++i)
1354
		if ((*samep)->val[i] != pred->val[i])
1355
			return;
1356
#endif
1357
	/* Pull up the node. */
1358
	pull = *samep;
1359
	*samep = JF(pull);
1360
	JF(pull) = *diffp;
1361
1362
	/*
1363
	 * At the top of the chain, each predecessor needs to point at the
1364
	 * pulled up node.  Inside the chain, there is only one predecessor
1365
	 * to worry about.
1366
	 */
1367
	if (at_top) {
1368
		for (ep = b->in_edges; ep != 0; ep = ep->next) {
1369
			if (JT(ep->pred) == b)
1370
				JT(ep->pred) = pull;
1371
			else
1372
				JF(ep->pred) = pull;
1373
		}
1374
	}
1375
	else
1376
		*diffp = pull;
1377
1378
	done = 0;
1379
}
1380
1381
static void
1382
and_pullup(b)
1383
	struct block *b;
1384
{
1385
	int val, at_top;
1386
	struct block *pull;
1387
	struct block **diffp, **samep;
1388
	struct edge *ep;
1389
1390
	ep = b->in_edges;
1391
	if (ep == 0)
1392
		return;
1393
1394
	/*
1395
	 * Make sure each predecessor loads the same value.
1396
	 */
1397
	val = ep->pred->val[A_ATOM];
1398
	for (ep = ep->next; ep != 0; ep = ep->next)
1399
		if (val != ep->pred->val[A_ATOM])
1400
			return;
1401
1402
	if (JT(b->in_edges->pred) == b)
1403
		diffp = &JT(b->in_edges->pred);
1404
	else
1405
		diffp = &JF(b->in_edges->pred);
1406
1407
	at_top = 1;
1408
	while (1) {
1409
		if (*diffp == 0)
1410
			return;
1411
1412
		if (JF(*diffp) != JF(b))
1413
			return;
1414
1415
		if (!SET_MEMBER((*diffp)->dom, b->id))
1416
			return;
1417
1418
		if ((*diffp)->val[A_ATOM] != val)
1419
			break;
1420
1421
		diffp = &JT(*diffp);
1422
		at_top = 0;
1423
	}
1424
	samep = &JT(*diffp);
1425
	while (1) {
1426
		if (*samep == 0)
1427
			return;
1428
1429
		if (JF(*samep) != JF(b))
1430
			return;
1431
1432
		if (!SET_MEMBER((*samep)->dom, b->id))
1433
			return;
1434
1435
		if ((*samep)->val[A_ATOM] == val)
1436
			break;
1437
1438
		/* XXX Need to check that there are no data dependencies
1439
		   between diffp and samep.  Currently, the code generator
1440
		   will not produce such dependencies. */
1441
		samep = &JT(*samep);
1442
	}
1443
#ifdef notdef
1444
	/* XXX This doesn't cover everything. */
1445
	for (i = 0; i < N_ATOMS; ++i)
1446
		if ((*samep)->val[i] != pred->val[i])
1447
			return;
1448
#endif
1449
	/* Pull up the node. */
1450
	pull = *samep;
1451
	*samep = JT(pull);
1452
	JT(pull) = *diffp;
1453
1454
	/*
1455
	 * At the top of the chain, each predecessor needs to point at the
1456
	 * pulled up node.  Inside the chain, there is only one predecessor
1457
	 * to worry about.
1458
	 */
1459
	if (at_top) {
1460
		for (ep = b->in_edges; ep != 0; ep = ep->next) {
1461
			if (JT(ep->pred) == b)
1462
				JT(ep->pred) = pull;
1463
			else
1464
				JF(ep->pred) = pull;
1465
		}
1466
	}
1467
	else
1468
		*diffp = pull;
1469
1470
	done = 0;
1471
}
1472
1473
static void
1474
opt_blks(root, do_stmts)
1475
	struct block *root;
1476
	int do_stmts;
1477
{
1478
	int i, maxlevel;
1479
	struct block *p;
1480
1481
	init_val();
1482
	maxlevel = root->level;
1483
	for (i = maxlevel; i >= 0; --i)
1484
		for (p = levels[i]; p; p = p->link)
1485
			opt_blk(p, do_stmts);
1486
1487
	if (do_stmts)
1488
		/*
1489
		 * No point trying to move branches; it can't possibly
1490
		 * make a difference at this point.
1491
		 */
1492
		return;
1493
1494
	for (i = 1; i <= maxlevel; ++i) {
1495
		for (p = levels[i]; p; p = p->link) {
1496
			opt_j(&p->et);
1497
			opt_j(&p->ef);
1498
		}
1499
	}
1500
	for (i = 1; i <= maxlevel; ++i) {
1501
		for (p = levels[i]; p; p = p->link) {
1502
			or_pullup(p);
1503
			and_pullup(p);
1504
		}
1505
	}
1506
}
1507
1508
static __inline void
1509
link_inedge(parent, child)
1510
	struct edge *parent;
1511
	struct block *child;
1512
{
1513
	parent->next = child->in_edges;
1514
	child->in_edges = parent;
1515
}
1516
1517
static void
1518
find_inedges(root)
1519
	struct block *root;
1520
{
1521
	int i;
1522
	struct block *b;
1523
1524
	for (i = 0; i < n_blocks; ++i)
1525
		blocks[i]->in_edges = 0;
1526
1527
	/*
1528
	 * Traverse the graph, adding each edge to the predecessor
1529
	 * list of its successors.  Skip the leaves (i.e. level 0).
1530
	 */
1531
	for (i = root->level; i > 0; --i) {
1532
		for (b = levels[i]; b != 0; b = b->link) {
1533
			link_inedge(&b->et, JT(b));
1534
			link_inedge(&b->ef, JF(b));
1535
		}
1536
	}
1537
}
1538
1539
static void
1540
opt_root(b)
1541
	struct block **b;
1542
{
1543
	struct slist *tmp, *s;
1544
1545
	s = (*b)->stmts;
1546
	(*b)->stmts = 0;
1547
	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1548
		*b = JT(*b);
1549
1550
	tmp = (*b)->stmts;
1551
	if (tmp != 0)
1552
		sappend(s, tmp);
1553
	(*b)->stmts = s;
1554
1555
	/*
1556
	 * If the root node is a return, then there is no
1557
	 * point executing any statements (since the bpf machine
1558
	 * has no side effects).
1559
	 */
1560
	if (BPF_CLASS((*b)->s.code) == BPF_RET)
1561
		(*b)->stmts = 0;
1562
}
1563
1564
static void
1565
opt_loop(root, do_stmts)
1566
	struct block *root;
1567
	int do_stmts;
1568
{
1569
1570
#ifdef BDEBUG
1571
	if (dflag > 1)
1572
		opt_dump(root);
1573
#endif
1574
	do {
1575
		done = 1;
1576
		find_levels(root);
1577
		find_dom(root);
1578
		find_closure(root);
1579
		find_inedges(root);
1580
		find_ud(root);
1581
		find_edom(root);
1582
		opt_blks(root, do_stmts);
1583
#ifdef BDEBUG
1584
		if (dflag > 1)
1585
			opt_dump(root);
1586
#endif
1587
	} while (!done);
1588
}
1589
1590
/*
1591
 * Optimize the filter code in its dag representation.
1592
 */
1593
void
1594
bpf_optimize(rootp)
1595
	struct block **rootp;
1596
{
1597
	struct block *root;
1598
1599
	root = *rootp;
1600
1601
	opt_init(root);
1602
	opt_loop(root, 0);
1603
	opt_loop(root, 1);
1604
	intern_blocks(root);
1605
	opt_root(rootp);
1606
	opt_cleanup();
1607
}
1608
1609
static void
1610
make_marks(p)
1611
	struct block *p;
1612
{
1613
	if (!isMarked(p)) {
1614
		Mark(p);
1615
		if (BPF_CLASS(p->s.code) != BPF_RET) {
1616
			make_marks(JT(p));
1617
			make_marks(JF(p));
1618
		}
1619
	}
1620
}
1621
1622
/*
1623
 * Mark code array such that isMarked(i) is true
1624
 * only for nodes that are alive.
1625
 */
1626
static void
1627
mark_code(p)
1628
	struct block *p;
1629
{
1630
	cur_mark += 1;
1631
	make_marks(p);
1632
}
1633
1634
/*
1635
 * True iff the two stmt lists load the same value from the packet into
1636
 * the accumulator.
1637
 */
1638
static int
1639
eq_slist(x, y)
1640
	struct slist *x, *y;
1641
{
1642
	while (1) {
1643
		while (x && x->s.code == NOP)
1644
			x = x->next;
1645
		while (y && y->s.code == NOP)
1646
			y = y->next;
1647
		if (x == 0)
1648
			return y == 0;
1649
		if (y == 0)
1650
			return x == 0;
1651
		if (x->s.code != y->s.code || x->s.k != y->s.k)
1652
			return 0;
1653
		x = x->next;
1654
		y = y->next;
1655
	}
1656
}
1657
1658
static __inline int
1659
eq_blk(b0, b1)
1660
	struct block *b0, *b1;
1661
{
1662
	if (b0->s.code == b1->s.code &&
1663
	    b0->s.k == b1->s.k &&
1664
	    b0->et.succ == b1->et.succ &&
1665
	    b0->ef.succ == b1->ef.succ)
1666
		return eq_slist(b0->stmts, b1->stmts);
1667
	return 0;
1668
}
1669
1670
static void
1671
intern_blocks(root)
1672
	struct block *root;
1673
{
1674
	struct block *p;
1675
	int i, j;
1676
	int done;
1677
 top:
1678
	done = 1;
1679
	for (i = 0; i < n_blocks; ++i)
1680
		blocks[i]->link = 0;
1681
1682
	mark_code(root);
1683
1684
	for (i = n_blocks - 1; --i >= 0; ) {
1685
		if (!isMarked(blocks[i]))
1686
			continue;
1687
		for (j = i + 1; j < n_blocks; ++j) {
1688
			if (!isMarked(blocks[j]))
1689
				continue;
1690
			if (eq_blk(blocks[i], blocks[j])) {
1691
				blocks[i]->link = blocks[j]->link ?
1692
					blocks[j]->link : blocks[j];
1693
				break;
1694
			}
1695
		}
1696
	}
1697
	for (i = 0; i < n_blocks; ++i) {
1698
		p = blocks[i];
1699
		if (JT(p) == 0)
1700
			continue;
1701
		if (JT(p)->link) {
1702
			done = 0;
1703
			JT(p) = JT(p)->link;
1704
		}
1705
		if (JF(p)->link) {
1706
			done = 0;
1707
			JF(p) = JF(p)->link;
1708
		}
1709
	}
1710
	if (!done)
1711
		goto top;
1712
}
1713
1714
static void
1715
opt_cleanup()
1716
{
1717
	free((void *)vnode_base);
1718
	free((void *)vmap);
1719
	free((void *)edges);
1720
	free((void *)space1);
1721
	free((void *)space2);
1722
	free((void *)levels);
1723
	free((void *)blocks);
1724
}
1725
1726
/*
1727
 * Return the number of stmts in 's'.
1728
 */
1729
static int
1730
slength(s)
1731
	struct slist *s;
1732
{
1733
	int n = 0;
1734
1735
264
	for (; s; s = s->next)
1736
78
		if (s->s.code != NOP)
1737
78
			++n;
1738
36
	return n;
1739
}
1740
1741
/*
1742
 * Return the number of nodes reachable by 'p'.
1743
 * All nodes should be initially unmarked.
1744
 */
1745
static int
1746
count_blocks(p)
1747
	struct block *p;
1748
{
1749
	if (p == 0 || isMarked(p))
1750
		return 0;
1751
	Mark(p);
1752
	return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1753
}
1754
1755
/*
1756
 * Do a depth first search on the flow graph, numbering the
1757
 * the basic blocks, and entering them into the 'blocks' array.`
1758
 */
1759
static void
1760
number_blks_r(p)
1761
	struct block *p;
1762
{
1763
	int n;
1764
1765
	if (p == 0 || isMarked(p))
1766
		return;
1767
1768
	Mark(p);
1769
	n = n_blocks++;
1770
	p->id = n;
1771
	blocks[n] = p;
1772
1773
	number_blks_r(JT(p));
1774
	number_blks_r(JF(p));
1775
}
1776
1777
/*
1778
 * Return the number of stmts in the flowgraph reachable by 'p'.
1779
 * The nodes should be unmarked before calling.
1780
 */
1781
static int
1782
count_stmts(p)
1783
	struct block *p;
1784
{
1785
	int n;
1786
1787

105
	if (p == 0 || isMarked(p))
1788
21
		return 0;
1789
18
	Mark(p);
1790
18
	n = count_stmts(JT(p)) + count_stmts(JF(p));
1791
18
	return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1792
39
}
1793
1794
/*
1795
 * Allocate memory.  All allocation is done before optimization
1796
 * is begun.  A linear bound on the size of all data structures is computed
1797
 * from the total number of blocks and/or statements.
1798
 */
1799
static void
1800
opt_init(root)
1801
	struct block *root;
1802
{
1803
	bpf_u_int32 *p;
1804
	int i, n, max_stmts;
1805
	size_t size1, size2;
1806
1807
	/*
1808
	 * First, count the blocks, so we can malloc an array to map
1809
	 * block number to block.  Then, put the blocks into the array.
1810
	 */
1811
	unMarkAll();
1812
	n = count_blocks(root);
1813
	blocks = reallocarray(NULL, n, sizeof(*blocks));
1814
	if (blocks == NULL)
1815
		bpf_error("malloc");
1816
1817
	unMarkAll();
1818
	n_blocks = 0;
1819
	number_blks_r(root);
1820
1821
	n_edges = 2 * n_blocks;
1822
	edges = reallocarray(NULL, n_edges, sizeof(*edges));
1823
	if (edges == NULL)
1824
		bpf_error("malloc");
1825
1826
	/*
1827
	 * The number of levels is bounded by the number of nodes.
1828
	 */
1829
	levels = reallocarray(NULL, n_blocks, sizeof(*levels));
1830
	if (levels == NULL)
1831
		bpf_error("malloc");
1832
1833
	edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1834
	nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1835
1836
	size1 = 2;
1837
	if (n_blocks > SIZE_MAX / size1)
1838
		goto fail1;
1839
	size1 *= n_blocks;
1840
	if (nodewords > SIZE_MAX / size1)
1841
		goto fail1;
1842
	size1 *= nodewords;
1843
	if (sizeof(*space1) > SIZE_MAX / size1)
1844
		goto fail1;
1845
	size1 *= sizeof(*space1);
1846
1847
	space1 = (bpf_u_int32 *)malloc(size1);
1848
	if (space1 == NULL) {
1849
fail1:
1850
		bpf_error("malloc");
1851
	}
1852
1853
	size2 = n_edges;
1854
	if (edgewords > SIZE_MAX / size2)
1855
		goto fail2;
1856
	size2 *= edgewords;
1857
	if (sizeof(*space2) > SIZE_MAX / size2)
1858
		goto fail2;
1859
	size2 *= sizeof(*space2);
1860
1861
	space2 = (bpf_u_int32 *)malloc(size2);
1862
	if (space2 == NULL) {
1863
fail2:
1864
		free(space1);
1865
		bpf_error("malloc");
1866
	}
1867
1868
	p = space1;
1869
	all_dom_sets = p;
1870
	for (i = 0; i < n; ++i) {
1871
		blocks[i]->dom = p;
1872
		p += nodewords;
1873
	}
1874
	all_closure_sets = p;
1875
	for (i = 0; i < n; ++i) {
1876
		blocks[i]->closure = p;
1877
		p += nodewords;
1878
	}
1879
	p = space2;
1880
	all_edge_sets = p;
1881
	for (i = 0; i < n; ++i) {
1882
		struct block *b = blocks[i];
1883
1884
		b->et.edom = p;
1885
		p += edgewords;
1886
		b->ef.edom = p;
1887
		p += edgewords;
1888
		b->et.id = i;
1889
		edges[i] = &b->et;
1890
		b->ef.id = n_blocks + i;
1891
		edges[n_blocks + i] = &b->ef;
1892
		b->et.pred = b;
1893
		b->ef.pred = b;
1894
	}
1895
	max_stmts = 0;
1896
	for (i = 0; i < n; ++i)
1897
		max_stmts += slength(blocks[i]->stmts) + 1;
1898
	/*
1899
	 * We allocate at most 3 value numbers per statement,
1900
	 * so this is an upper bound on the number of valnodes
1901
	 * we'll need.
1902
	 */
1903
	maxval = 3 * max_stmts;
1904
	vmap = reallocarray(NULL, maxval, sizeof(*vmap));
1905
	vnode_base = reallocarray(NULL, maxval, sizeof(*vnode_base));
1906
	if (vmap == NULL || vnode_base == NULL)
1907
		bpf_error("malloc");
1908
}
1909
1910
/*
1911
 * Some pointers used to convert the basic block form of the code,
1912
 * into the array form that BPF requires.  'fstart' will point to
1913
 * the malloc'd array while 'ftail' is used during the recursive traversal.
1914
 */
1915
static struct bpf_insn *fstart;
1916
static struct bpf_insn *ftail;
1917
1918
#ifdef BDEBUG
1919
int bids[1000];
1920
#endif
1921
1922
/*
1923
 * Returns true if successful.  Returns false if a branch has
1924
 * an offset that is too large.  If so, we have marked that
1925
 * branch so that on a subsequent iteration, it will be treated
1926
 * properly.
1927
 */
1928
static int
1929
convert_code_r(p)
1930
	struct block *p;
1931
{
1932
	struct bpf_insn *dst;
1933
	struct slist *src;
1934
	int slen;
1935
	u_int off;
1936
	int extrajmps;		/* number of extra jumps inserted */
1937
	struct slist **offset = NULL;
1938
1939

105
	if (p == 0 || isMarked(p))
1940
21
		return (1);
1941
18
	Mark(p);
1942
1943
18
	if (convert_code_r(JF(p)) == 0)
1944
		return (0);
1945
18
	if (convert_code_r(JT(p)) == 0)
1946
		return (0);
1947
1948
18
	slen = slength(p->stmts);
1949
18
	dst = ftail -= (slen + 1 + p->longjt + p->longjf);
1950
		/* inflate length by any extra jumps */
1951
1952
18
	p->offset = dst - fstart;
1953
1954
	/* generate offset[] for convenience  */
1955
18
	if (slen) {
1956
12
		offset = calloc(slen, sizeof(struct slist *));
1957
12
		if (!offset) {
1958
			bpf_error("not enough core");
1959
			/*NOTREACHED*/
1960
		}
1961
	}
1962
18
	src = p->stmts;
1963
114
	for (off = 0; off < slen && src; off++) {
1964
#if 0
1965
		printf("off=%d src=%x\n", off, src);
1966
#endif
1967
39
		offset[off] = src;
1968
39
		src = src->next;
1969
	}
1970
1971
	off = 0;
1972
114
	for (src = p->stmts; src; src = src->next) {
1973
39
		if (src->s.code == NOP)
1974
			continue;
1975
39
		dst->code = (u_short)src->s.code;
1976
39
		dst->k = src->s.k;
1977
1978
		/* fill block-local relative jump */
1979

39
		if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
1980
#if 0
1981
			if (src->s.jt || src->s.jf) {
1982
				bpf_error("illegal jmp destination");
1983
				/*NOTREACHED*/
1984
			}
1985
#endif
1986
			goto filled;
1987
		}
1988
		if (off == slen - 2)	/*???*/
1989
			goto filled;
1990
1991
	    {
1992
		int i;
1993
		int jt, jf;
1994
		char *ljerr = "%s for block-local relative jump: off=%d";
1995
1996
#if 0
1997
		printf("code=%x off=%d %x %x\n", src->s.code,
1998
			off, src->s.jt, src->s.jf);
1999
#endif
2000
2001
		if (!src->s.jt || !src->s.jf) {
2002
			bpf_error(ljerr, "no jmp destination", off);
2003
			/*NOTREACHED*/
2004
		}
2005
2006
		jt = jf = 0;
2007
		for (i = 0; i < slen; i++) {
2008
			if (offset[i] == src->s.jt) {
2009
				if (jt) {
2010
					bpf_error(ljerr, "multiple matches", off);
2011
					/*NOTREACHED*/
2012
				}
2013
2014
				dst->jt = i - off - 1;
2015
				jt++;
2016
			}
2017
			if (offset[i] == src->s.jf) {
2018
				if (jf) {
2019
					bpf_error(ljerr, "multiple matches", off);
2020
					/*NOTREACHED*/
2021
				}
2022
				dst->jf = i - off - 1;
2023
				jf++;
2024
			}
2025
		}
2026
		if (!jt || !jf) {
2027
			bpf_error(ljerr, "no destination found", off);
2028
			/*NOTREACHED*/
2029
		}
2030
	    }
2031
filled:
2032
39
		++dst;
2033
39
		++off;
2034
39
	}
2035
18
	free(offset);
2036
2037
#ifdef BDEBUG
2038
	bids[dst - fstart] = p->id + 1;
2039
#endif
2040
18
	dst->code = (u_short)p->s.code;
2041
18
	dst->k = p->s.k;
2042
18
	if (JT(p)) {
2043
		extrajmps = 0;
2044
12
		off = JT(p)->offset - (p->offset + slen) - 1;
2045
12
		if (off >= 256) {
2046
		    /* offset too large for branch, must add a jump */
2047
		    if (p->longjt == 0) {
2048
		    	/* mark this instruction and retry */
2049
			p->longjt++;
2050
			return(0);
2051
		    }
2052
		    /* branch if T to following jump */
2053
		    dst->jt = extrajmps;
2054
		    extrajmps++;
2055
		    dst[extrajmps].code = BPF_JMP|BPF_JA;
2056
		    dst[extrajmps].k = off - extrajmps;
2057
		}
2058
		else
2059
12
		    dst->jt = off;
2060
12
		off = JF(p)->offset - (p->offset + slen) - 1;
2061
12
		if (off >= 256) {
2062
		    /* offset too large for branch, must add a jump */
2063
		    if (p->longjf == 0) {
2064
		    	/* mark this instruction and retry */
2065
			p->longjf++;
2066
			return(0);
2067
		    }
2068
		    /* branch if F to following jump */
2069
		    /* if two jumps are inserted, F goes to second one */
2070
		    dst->jf = extrajmps;
2071
		    extrajmps++;
2072
		    dst[extrajmps].code = BPF_JMP|BPF_JA;
2073
		    dst[extrajmps].k = off - extrajmps;
2074
		}
2075
		else
2076
12
		    dst->jf = off;
2077
	}
2078
18
	return (1);
2079
39
}
2080
2081
2082
/*
2083
 * Convert flowgraph intermediate representation to the
2084
 * BPF array representation.  Set *lenp to the number of instructions.
2085
 */
2086
struct bpf_insn *
2087
icode_to_fcode(root, lenp)
2088
	struct block *root;
2089
	int *lenp;
2090
{
2091
	int n;
2092
	struct bpf_insn *fp;
2093
2094
	/*
2095
	 * Loop doing convert_codr_r() until no branches remain
2096
	 * with too-large offsets.
2097
	 */
2098
6
	while (1) {
2099
3
	    unMarkAll();
2100
3
	    n = *lenp = count_stmts(root);
2101
2102
3
	    fp = calloc(n, sizeof(*fp));
2103
3
	    if (fp == NULL)
2104
		    bpf_error("calloc");
2105
2106
3
	    fstart = fp;
2107
3
	    ftail = fp + n;
2108
2109
3
	    unMarkAll();
2110
3
	    if (convert_code_r(root))
2111
		break;
2112
	    free(fp);
2113
	}
2114
2115
3
	return fp;
2116
}
2117
2118
#ifdef BDEBUG
2119
static void
2120
opt_dump(root)
2121
	struct block *root;
2122
{
2123
	struct bpf_program f;
2124
2125
	memset(bids, 0, sizeof bids);
2126
	f.bf_insns = icode_to_fcode(root, &f.bf_len);
2127
	bpf_dump(&f, 1);
2128
	putchar('\n');
2129
	free((char *)f.bf_insns);
2130
}
2131
#endif