GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/diff/diffreg.c Lines: 503 669 75.2 %
Date: 2017-11-13 Branches: 372 591 62.9 %

Line Branch Exec Source
1
/*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2
3
/*
4
 * Copyright (C) Caldera International Inc.  2001-2002.
5
 * All rights reserved.
6
 *
7
 * Redistribution and use in source and binary forms, with or without
8
 * modification, are permitted provided that the following conditions
9
 * are met:
10
 * 1. Redistributions of source code and documentation must retain the above
11
 *    copyright notice, this list of conditions and the following disclaimer.
12
 * 2. Redistributions in binary form must reproduce the above copyright
13
 *    notice, this list of conditions and the following disclaimer in the
14
 *    documentation and/or other materials provided with the distribution.
15
 * 3. All advertising materials mentioning features or use of this software
16
 *    must display the following acknowledgement:
17
 *	This product includes software developed or owned by Caldera
18
 *	International, Inc.
19
 * 4. Neither the name of Caldera International, Inc. nor the names of other
20
 *    contributors may be used to endorse or promote products derived from
21
 *    this software without specific prior written permission.
22
 *
23
 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24
 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27
 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28
 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34
 * POSSIBILITY OF SUCH DAMAGE.
35
 */
36
/*-
37
 * Copyright (c) 1991, 1993
38
 *	The Regents of the University of California.  All rights reserved.
39
 *
40
 * Redistribution and use in source and binary forms, with or without
41
 * modification, are permitted provided that the following conditions
42
 * are met:
43
 * 1. Redistributions of source code must retain the above copyright
44
 *    notice, this list of conditions and the following disclaimer.
45
 * 2. Redistributions in binary form must reproduce the above copyright
46
 *    notice, this list of conditions and the following disclaimer in the
47
 *    documentation and/or other materials provided with the distribution.
48
 * 3. Neither the name of the University nor the names of its contributors
49
 *    may be used to endorse or promote products derived from this software
50
 *    without specific prior written permission.
51
 *
52
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62
 * SUCH DAMAGE.
63
 *
64
 *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65
 */
66
67
#include <sys/stat.h>
68
#include <sys/wait.h>
69
70
#include <ctype.h>
71
#include <err.h>
72
#include <errno.h>
73
#include <fcntl.h>
74
#include <paths.h>
75
#include <stddef.h>
76
#include <stdint.h>
77
#include <stdio.h>
78
#include <stdlib.h>
79
#include <string.h>
80
#include <unistd.h>
81
#include <limits.h>
82
83
#include "diff.h"
84
#include "xmalloc.h"
85
86
#define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
87
#define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
88
89
/*
90
 * diff - compare two files.
91
 */
92
93
/*
94
 *	Uses an algorithm due to Harold Stone, which finds
95
 *	a pair of longest identical subsequences in the two
96
 *	files.
97
 *
98
 *	The major goal is to generate the match vector J.
99
 *	J[i] is the index of the line in file1 corresponding
100
 *	to line i file0. J[i] = 0 if there is no
101
 *	such line in file1.
102
 *
103
 *	Lines are hashed so as to work in core. All potential
104
 *	matches are located by sorting the lines of each file
105
 *	on the hash (called ``value''). In particular, this
106
 *	collects the equivalence classes in file1 together.
107
 *	Subroutine equiv replaces the value of each line in
108
 *	file0 by the index of the first element of its
109
 *	matching equivalence in (the reordered) file1.
110
 *	To save space equiv squeezes file1 into a single
111
 *	array member in which the equivalence classes
112
 *	are simply concatenated, except that their first
113
 *	members are flagged by changing sign.
114
 *
115
 *	Next the indices that point into member are unsorted into
116
 *	array class according to the original order of file0.
117
 *
118
 *	The cleverness lies in routine stone. This marches
119
 *	through the lines of file0, developing a vector klist
120
 *	of "k-candidates". At step i a k-candidate is a matched
121
 *	pair of lines x,y (x in file0 y in file1) such that
122
 *	there is a common subsequence of length k
123
 *	between the first i lines of file0 and the first y
124
 *	lines of file1, but there is no such subsequence for
125
 *	any smaller y. x is the earliest possible mate to y
126
 *	that occurs in such a subsequence.
127
 *
128
 *	Whenever any of the members of the equivalence class of
129
 *	lines in file1 matable to a line in file0 has serial number
130
 *	less than the y of some k-candidate, that k-candidate
131
 *	with the smallest such y is replaced. The new
132
 *	k-candidate is chained (via pred) to the current
133
 *	k-1 candidate so that the actual subsequence can
134
 *	be recovered. When a member has serial number greater
135
 *	that the y of all k-candidates, the klist is extended.
136
 *	At the end, the longest subsequence is pulled out
137
 *	and placed in the array J by unravel
138
 *
139
 *	With J in hand, the matches there recorded are
140
 *	check'ed against reality to assure that no spurious
141
 *	matches have crept in due to hashing. If they have,
142
 *	they are broken, and "jackpot" is recorded--a harmless
143
 *	matter except that a true match for a spuriously
144
 *	mated line may now be unnecessarily reported as a change.
145
 *
146
 *	Much of the complexity of the program comes simply
147
 *	from trying to minimize core utilization and
148
 *	maximize the range of doable problems by dynamically
149
 *	allocating what is needed and reusing what is not.
150
 *	The core requirements for problems larger than somewhat
151
 *	are (in words) 2*length(file0) + length(file1) +
152
 *	3*(number of k-candidates installed),  typically about
153
 *	6n words for files of length n.
154
 */
155
156
struct cand {
157
	int	x;
158
	int	y;
159
	int	pred;
160
};
161
162
struct line {
163
	int	serial;
164
	int	value;
165
} *file[2];
166
167
/*
168
 * The following struct is used to record change information when
169
 * doing a "context" or "unified" diff.  (see routine "change" to
170
 * understand the highly mnemonic field names)
171
 */
172
struct context_vec {
173
	int	a;		/* start line in old file */
174
	int	b;		/* end line in old file */
175
	int	c;		/* start line in new file */
176
	int	d;		/* end line in new file */
177
};
178
179
#define	diff_output	printf
180
static FILE	*opentemp(const char *);
181
static void	 output(char *, FILE *, char *, FILE *, int);
182
static void	 check(FILE *, FILE *, int);
183
static void	 range(int, int, char *);
184
static void	 uni_range(int, int);
185
static void	 dump_context_vec(FILE *, FILE *, int);
186
static void	 dump_unified_vec(FILE *, FILE *, int);
187
static void	 prepare(int, FILE *, off_t, int);
188
static void	 prune(void);
189
static void	 equiv(struct line *, int, struct line *, int, int *);
190
static void	 unravel(int);
191
static void	 unsort(struct line *, int, int *);
192
static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
193
static void	 sort(struct line *, int);
194
static void	 print_header(const char *, const char *);
195
static int	 ignoreline(char *);
196
static int	 asciifile(FILE *);
197
static int	 fetch(long *, int, int, FILE *, int, int, int);
198
static int	 newcand(int, int, int);
199
static int	 search(int *, int, int);
200
static int	 skipline(FILE *);
201
static int	 isqrt(int);
202
static int	 stone(int *, int, int *, int *, int);
203
static int	 readhash(FILE *, int);
204
static int	 files_differ(FILE *, FILE *, int);
205
static char	*match_function(const long *, int, FILE *);
206
static char	*preadline(int, size_t, off_t);
207
208
static int  *J;			/* will be overlaid on class */
209
static int  *class;		/* will be overlaid on file[0] */
210
static int  *klist;		/* will be overlaid on file[0] after class */
211
static int  *member;		/* will be overlaid on file[1] */
212
static int   clen;
213
static int   inifdef;		/* whether or not we are in a #ifdef block */
214
static int   len[2];
215
static int   pref, suff;	/* length of prefix and suffix */
216
static int   slen[2];
217
static int   anychange;
218
static long *ixnew;		/* will be overlaid on file[1] */
219
static long *ixold;		/* will be overlaid on klist */
220
static struct cand *clist;	/* merely a free storage pot for candidates */
221
static int   clistlen;		/* the length of clist */
222
static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
223
static u_char *chrtran;		/* translation table for case-folding */
224
static struct context_vec *context_vec_start;
225
static struct context_vec *context_vec_end;
226
static struct context_vec *context_vec_ptr;
227
228
#define FUNCTION_CONTEXT_SIZE	55
229
static char lastbuf[FUNCTION_CONTEXT_SIZE];
230
static int lastline;
231
static int lastmatchline;
232
233
234
/*
235
 * chrtran points to one of 2 translation tables: cup2low if folding upper to
236
 * lower case clow2low if not folding case
237
 */
238
u_char clow2low[256] = {
239
	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
240
	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
241
	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
242
	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
243
	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
244
	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
245
	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
246
	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
247
	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
248
	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
249
	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
250
	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
251
	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
252
	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
253
	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
254
	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
255
	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
256
	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
257
	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
258
	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
259
	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
260
	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
261
	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
262
	0xfd, 0xfe, 0xff
263
};
264
265
u_char cup2low[256] = {
266
	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
267
	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
268
	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
269
	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
270
	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
271
	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
272
	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
273
	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
274
	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
275
	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
276
	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
277
	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
278
	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
279
	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
280
	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
281
	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
282
	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
283
	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
284
	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
285
	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
286
	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
287
	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
288
	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
289
	0xfd, 0xfe, 0xff
290
};
291
292
int
293
diffreg(char *file1, char *file2, int flags)
294
{
295
	FILE *f1, *f2;
296
	int i, rval;
297
298
	f1 = f2 = NULL;
299
	rval = D_SAME;
300
24102
	anychange = 0;
301
12051
	lastline = 0;
302
12051
	lastmatchline = 0;
303
12051
	context_vec_ptr = context_vec_start - 1;
304
12051
	if (flags & D_IGNORECASE)
305
		chrtran = cup2low;
306
	else
307
		chrtran = clow2low;
308
12051
	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
309
		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
310

12607
	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
311
		goto closem;
312
313
12051
	if (flags & D_EMPTY1)
314
		f1 = fopen(_PATH_DEVNULL, "r");
315
	else {
316
12051
		if (!S_ISREG(stb1.st_mode)) {
317

1112
			if ((f1 = opentemp(file1)) == NULL ||
318
1668
			    fstat(fileno(f1), &stb1) < 0) {
319
				warn("%s", file1);
320
				status |= 2;
321
				goto closem;
322
			}
323
11495
		} else if (strcmp(file1, "-") == 0)
324
			f1 = stdin;
325
		else
326
11495
			f1 = fopen(file1, "r");
327
	}
328
12051
	if (f1 == NULL) {
329
		warn("%s", file1);
330
		status |= 2;
331
		goto closem;
332
	}
333
334
12051
	if (flags & D_EMPTY2)
335
		f2 = fopen(_PATH_DEVNULL, "r");
336
	else {
337
12051
		if (!S_ISREG(stb2.st_mode)) {
338

8826
			if ((f2 = opentemp(file2)) == NULL ||
339
13239
			    fstat(fileno(f2), &stb2) < 0) {
340
				warn("%s", file2);
341
				status |= 2;
342
				goto closem;
343
			}
344
7638
		} else if (strcmp(file2, "-") == 0)
345
			f2 = stdin;
346
		else
347
7638
			f2 = fopen(file2, "r");
348
	}
349
12051
	if (f2 == NULL) {
350
		warn("%s", file2);
351
		status |= 2;
352
		goto closem;
353
	}
354
355
12454
	switch (files_differ(f1, f2, flags)) {
356
	case 0:
357
		goto closem;
358
	case 1:
359
		break;
360
	default:
361
		/* error */
362
		status |= 2;
363
		goto closem;
364
	}
365
366

806
	if ((flags & D_FORCEASCII) == 0 &&
367
806
	    (!asciifile(f1) || !asciifile(f2))) {
368
		rval = D_BINARY;
369
		status |= 1;
370
		goto closem;
371
	}
372
403
	prepare(0, f1, stb1.st_size, flags);
373
403
	prepare(1, f2, stb2.st_size, flags);
374
375
403
	prune();
376
403
	sort(sfile[0], slen[0]);
377
403
	sort(sfile[1], slen[1]);
378
379
403
	member = (int *)file[1];
380
403
	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
381
403
	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
382
383
403
	class = (int *)file[0];
384
403
	unsort(sfile[0], slen[0], class);
385
403
	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
386
387
403
	klist = xcalloc(slen[0] + 2, sizeof(*klist));
388
403
	clen = 0;
389
403
	clistlen = 100;
390
403
	clist = xcalloc(clistlen, sizeof(*clist));
391
403
	i = stone(class, slen[0], member, klist, flags);
392
403
	free(member);
393
403
	free(class);
394
395
403
	J = xreallocarray(J, len[0] + 2, sizeof(*J));
396
403
	unravel(klist[i]);
397
403
	free(clist);
398
403
	free(klist);
399
400
403
	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
401
403
	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
402
403
	check(f1, f2, flags);
403
403
	output(file1, f1, file2, f2, flags);
404
closem:
405
12051
	if (anychange) {
406
396
		status |= 1;
407
396
		if (rval == D_SAME)
408
396
			rval = D_DIFFER;
409
	}
410
12051
	if (f1 != NULL)
411
12051
		fclose(f1);
412
12051
	if (f2 != NULL)
413
12051
		fclose(f2);
414
415
12051
	return (rval);
416
12051
}
417
418
/*
419
 * Check to see if the given files differ.
420
 * Returns 0 if they are the same, 1 if different, and -1 on error.
421
 * XXX - could use code from cmp(1) [faster]
422
 */
423
static int
424
files_differ(FILE *f1, FILE *f2, int flags)
425
{
426
24102
	char buf1[BUFSIZ], buf2[BUFSIZ];
427
	size_t i, j;
428
429

35850
	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
430
11748
	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
431
303
		return (1);
432
35683
	for (;;) {
433
35683
		i = fread(buf1, 1, sizeof(buf1), f1);
434
35683
		j = fread(buf2, 1, sizeof(buf2), f2);
435




117958
		if ((!i && ferror(f1)) || (!j && ferror(f2)))
436
			return (-1);
437
35683
		if (i != j)
438
			return (1);
439
35683
		if (i == 0)
440
11648
			return (0);
441
24035
		if (memcmp(buf1, buf2, i) != 0)
442
100
			return (1);
443
	}
444
12051
}
445
446
static FILE *
447
opentemp(const char *file)
448
{
449
9938
	char buf[BUFSIZ], tempfile[PATH_MAX];
450
	ssize_t nread;
451
	int ifd, ofd;
452
453
4969
	if (strcmp(file, "-") == 0)
454
4251
		ifd = STDIN_FILENO;
455
718
	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
456
		return (NULL);
457
458
4969
	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
459
460
4969
	if ((ofd = mkstemp(tempfile)) < 0) {
461
		close(ifd);
462
		return (NULL);
463
	}
464
4969
	unlink(tempfile);
465
25232
	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
466
7647
		if (write(ofd, buf, nread) != nread) {
467
			close(ifd);
468
			close(ofd);
469
			return (NULL);
470
		}
471
	}
472
4969
	close(ifd);
473
4969
	lseek(ofd, (off_t)0, SEEK_SET);
474
4969
	return (fdopen(ofd, "r"));
475
4969
}
476
477
char *
478
splice(char *dir, char *file)
479
{
480
	char *tail, *buf;
481
	size_t dirlen;
482
483
	dirlen = strlen(dir);
484
	while (dirlen != 0 && dir[dirlen - 1] == '/')
485
	    dirlen--;
486
	if ((tail = strrchr(file, '/')) == NULL)
487
		tail = file;
488
	else
489
		tail++;
490
	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
491
	return (buf);
492
}
493
494
static void
495
prepare(int i, FILE *fd, off_t filesize, int flags)
496
{
497
	struct line *p;
498
	int j, h;
499
	size_t sz;
500
501
1612
	rewind(fd);
502
503
806
	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
504
806
	if (sz < 100)
505
		sz = 100;
506
507
806
	p = xcalloc(sz + 3, sizeof(*p));
508
343922
	for (j = 0; (h = readhash(fd, flags));) {
509
171155
		if (j == sz) {
510
88
			sz = sz * 3 / 2;
511
88
			p = xreallocarray(p, sz + 3, sizeof(*p));
512
88
		}
513
171155
		p[++j].value = h;
514
	}
515
806
	len[i] = j;
516
806
	file[i] = p;
517
806
}
518
519
static void
520
prune(void)
521
{
522
	int i, j;
523
524

6147
	for (pref = 0; pref < len[0] && pref < len[1] &&
525
1343
	    file[0][pref + 1].value == file[1][pref + 1].value;
526
	    pref++)
527
		;
528

4581
	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
529
1038
	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
530
818
	    suff++)
531
		;
532
2418
	for (j = 0; j < 2; j++) {
533
806
		sfile[j] = file[j] + pref;
534
806
		slen[j] = len[j] - pref - suff;
535
337878
		for (i = 0; i <= slen[j]; i++)
536
168133
			sfile[j][i].serial = i;
537
	}
538
403
}
539
540
static void
541
equiv(struct line *a, int n, struct line *b, int m, int *c)
542
{
543
	int i, j;
544
545
	i = j = 1;
546

498336
	while (i <= n && j <= m) {
547
165668
		if (a[i].value < b[j].value)
548
20576
			a[i++].value = 0;
549
145092
		else if (a[i].value == b[j].value)
550
49364
			a[i++].value = j;
551
		else
552
95728
			j++;
553
	}
554
2316
	while (i <= n)
555
755
		a[i++].value = 0;
556
403
	b[m + 1].value = 0;
557
	j = 0;
558
94822
	while (++j <= m) {
559
47008
		c[j] = -b[j].serial;
560
193264
		while (b[j + 1].value == b[j].value) {
561
			j++;
562
49624
			c[j] = b[j].serial;
563
		}
564
	}
565
403
	c[j] = -1;
566
403
}
567
568
/* Code taken from ping.c */
569
static int
570
isqrt(int n)
571
{
572
	int y, x = 1;
573
574
806
	if (n == 0)
575
120
		return (0);
576
577
283
	do { /* newton was a stinker */
578
		y = x;
579
843
		x = n / x;
580
843
		x += y;
581
843
		x /= 2;
582

1550
	} while ((x - y) > 1 || (x - y) < -1);
583
584
283
	return (x);
585
403
}
586
587
static int
588
stone(int *a, int n, int *b, int *c, int flags)
589
{
590
	int i, k, y, j, l;
591
	int oldc, tc, oldl, sq;
592
	u_int numtries, bound;
593
594
806
	if (flags & D_MINIMAL)
595
		bound = UINT_MAX;
596
	else {
597
403
		sq = isqrt(n);
598
403
		bound = MAXIMUM(256, sq);
599
	}
600
601
	k = 0;
602
403
	c[0] = newcand(0, 0, 0);
603
142196
	for (i = 1; i <= n; i++) {
604
70695
		j = a[i];
605
70695
		if (j == 0)
606
			continue;
607
49364
		y = -b[j];
608
		oldl = 0;
609
49364
		oldc = c[0];
610
		numtries = 0;
611
49364
		do {
612
640912
			if (y <= clist[oldc].y)
613
				continue;
614
583652
			l = search(c, k, y);
615
583652
			if (l != oldl + 1)
616
571148
				oldc = c[l - 1];
617
583652
			if (l <= k) {
618
536668
				if (clist[c[l]].y <= y)
619
					continue;
620
				tc = c[l];
621
68500
				c[l] = newcand(i, y, oldc);
622
				oldc = tc;
623
				oldl = l;
624
68500
				numtries++;
625
			} else {
626
46984
				c[l] = newcand(i, y, oldc);
627
46984
				k++;
628
46984
				break;
629
			}
630

1253976
		} while ((y = b[++j]) > 0 && numtries < bound);
631
	}
632
403
	return (k);
633
}
634
635
static int
636
newcand(int x, int y, int pred)
637
{
638
	struct cand *q;
639
640
231774
	if (clen == clistlen) {
641
1420
		clistlen = clistlen * 11 / 10;
642
1420
		clist = xreallocarray(clist, clistlen, sizeof(*clist));
643
1420
	}
644
115887
	q = clist + clen;
645
115887
	q->x = x;
646
115887
	q->y = y;
647
115887
	q->pred = pred;
648
115887
	return (clen++);
649
}
650
651
static int
652
search(int *c, int k, int y)
653
{
654
	int i, j, l, t;
655
656
1167304
	if (clist[c[k]].y < y)	/* quick look for typical case */
657
46984
		return (k + 1);
658
	i = 0;
659
536668
	j = k + 1;
660
4971484
	for (;;) {
661
4971484
		l = (i + j) / 2;
662
4971484
		if (l <= i)
663
			break;
664
4902984
		t = clist[c[l]].y;
665
4902984
		if (t > y)
666
2048880
			j = l;
667
2854104
		else if (t < y)
668
			i = l;
669
		else
670
468168
			return (l);
671
	}
672
68500
	return (l + 1);
673
583652
}
674
675
static void
676
unravel(int p)
677
{
678
	struct cand *q;
679
	int i;
680
681
147233
	for (i = 0; i <= len[0]; i++)
682
146024
		J[i] = i <= pref ? i :
683
72331
		    i > len[0] - suff ? i + len[1] - len[0] : 0;
684
94774
	for (q = clist + p; q->y != 0; q = clist + q->pred)
685
46984
		J[q->x + pref] = q->y + pref;
686
403
}
687
688
/*
689
 * Check does double duty:
690
 *  1.	ferret out any fortuitous correspondences due
691
 *	to confounding by hashing (which result in "jackpot")
692
 *  2.  collect random access indexes to the two files
693
 */
694
static void
695
check(FILE *f1, FILE *f2, int flags)
696
{
697
	int i, j, jackpot, c, d;
698
	long ctold, ctnew;
699
700
806
	rewind(f1);
701
403
	rewind(f2);
702
	j = 1;
703
403
	ixold[0] = ixnew[0] = 0;
704
	jackpot = 0;
705
	ctold = ctnew = 0;
706
146024
	for (i = 1; i <= len[0]; i++) {
707
72609
		if (J[i] == 0) {
708
23711
			ixold[i] = ctold += skipline(f1);
709
23711
			continue;
710
		}
711
171940
		while (j < J[i]) {
712
37072
			ixnew[j] = ctnew += skipline(f2);
713
37072
			j++;
714
		}
715
48898
		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
716
			for (;;) {
717
				c = getc(f1);
718
				d = getc(f2);
719
				/*
720
				 * GNU diff ignores a missing newline
721
				 * in one file for -b or -w.
722
				 */
723
				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
724
					if (c == EOF && d == '\n') {
725
						ctnew++;
726
						break;
727
					} else if (c == '\n' && d == EOF) {
728
						ctold++;
729
						break;
730
					}
731
				}
732
				ctold++;
733
				ctnew++;
734
				if ((flags & D_FOLDBLANKS) && isspace(c) &&
735
				    isspace(d)) {
736
					do {
737
						if (c == '\n')
738
							break;
739
						ctold++;
740
					} while (isspace(c = getc(f1)));
741
					do {
742
						if (d == '\n')
743
							break;
744
						ctnew++;
745
					} while (isspace(d = getc(f2)));
746
				} else if ((flags & D_IGNOREBLANKS)) {
747
					while (isspace(c) && c != '\n') {
748
						c = getc(f1);
749
						ctold++;
750
					}
751
					while (isspace(d) && d != '\n') {
752
						d = getc(f2);
753
						ctnew++;
754
					}
755
				}
756
				if (chrtran[c] != chrtran[d]) {
757
					jackpot++;
758
					J[i] = 0;
759
					if (c != '\n' && c != EOF)
760
						ctold += skipline(f1);
761
					if (d != '\n' && c != EOF)
762
						ctnew += skipline(f2);
763
					break;
764
				}
765
				if (c == '\n' || c == EOF)
766
					break;
767
			}
768
		} else {
769
1141199
			for (;;) {
770
1141199
				ctold++;
771
1141199
				ctnew++;
772


7988393
				if ((c = getc(f1)) != (d = getc(f2))) {
773
					/* jackpot++; */
774
60
					J[i] = 0;
775
60
					if (c != '\n' && c != EOF)
776
						ctold += skipline(f1);
777
60
					if (d != '\n' && c != EOF)
778
20
						ctnew += skipline(f2);
779
					break;
780
				}
781
1141139
				if (c == '\n' || c == EOF)
782
					break;
783
			}
784
		}
785
48898
		ixold[i] = ctold;
786
48898
		ixnew[j] = ctnew;
787
48898
		j++;
788
48898
	}
789
25958
	for (; j <= len[1]; j++)
790
12576
		ixnew[j] = ctnew += skipline(f2);
791
	/*
792
	 * if (jackpot)
793
	 *	fprintf(stderr, "jackpot\n");
794
	 */
795
403
}
796
797
/* shellsort CACM #201 */
798
static void
799
sort(struct line *a, int n)
800
{
801
	struct line *ai, *aim, w;
802
	int j, m = 0, k;
803
804
1612
	if (n == 0)
805
223
		return;
806
6452
	for (j = 1; j <= n; j *= 2)
807
2643
		m = 2 * j - 1;
808
5286
	for (m /= 2; m != 0; m /= 2) {
809
2060
		k = n - m;
810
2902744
		for (j = 1; j <= k; j++) {
811
5710336
			for (ai = &a[j]; ai > a; ai -= m) {
812
2764992
				aim = &ai[m];
813
2764992
				if (aim < ai)
814
					break;	/* wraparound */
815

3274532
				if (aim->value > ai[0].value ||
816
1648412
				    (aim->value == ai[0].value &&
817
509540
					aim->serial > ai[0].serial))
818
					break;
819
1405856
				w.value = ai[0].value;
820
1405856
				ai[0].value = aim->value;
821
1405856
				aim->value = w.value;
822
1405856
				w.serial = ai[0].serial;
823
1405856
				ai[0].serial = aim->serial;
824
1405856
				aim->serial = w.serial;
825
			}
826
		}
827
	}
828
1389
}
829
830
static void
831
unsort(struct line *f, int l, int *b)
832
{
833
	int *a, i;
834
835
806
	a = xcalloc(l + 1, sizeof(*a));
836
142196
	for (i = 1; i <= l; i++)
837
70695
		a[f[i].serial] = f[i].value;
838
142196
	for (i = 1; i <= l; i++)
839
70695
		b[i] = a[i];
840
403
	free(a);
841
403
}
842
843
static int
844
skipline(FILE *f)
845
{
846
	int i, c;
847
848

8684469
	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
849
		continue;
850
73379
	return (i);
851
}
852
853
static void
854
output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
855
{
856
	int m, i0, i1, j0, j1;
857
858
403
	rewind(f1);
859
403
	rewind(f2);
860
403
	m = len[0];
861
403
	J[0] = 0;
862
403
	J[m + 1] = len[1] + 1;
863
403
	if (diff_format != D_EDIT) {
864
18256
		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
865

137190
			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
866
37003
				i0++;
867
8790
			j0 = J[i0 - 1] + 1;
868
			i1 = i0 - 1;
869

79856
			while (i1 < m && J[i1 + 1] == 0)
870
				i1++;
871
8790
			j1 = J[i1 + 1] - 1;
872
8790
			J[i1] = j1;
873
8790
			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
874
		}
875
	} else {
876
5720
		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
877

55695
			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
878
11835
				i0--;
879
2795
			j0 = J[i0 + 1] - 1;
880
			i1 = i0 + 1;
881

25845
			while (i1 > 1 && J[i1 - 1] == 0)
882
5840
				i1--;
883
2795
			j1 = J[i1 - 1] + 1;
884
2795
			J[i1] = j1;
885
2795
			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
886
		}
887
	}
888
403
	if (m == 0)
889
36
		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
890
403
	if (diff_format == D_IFDEF) {
891
		for (;;) {
892
#define	c i0
893
			if ((c = getc(f1)) == EOF)
894
				return;
895
			diff_output("%c", c);
896
		}
897
#undef c
898
	}
899
403
	if (anychange != 0) {
900
396
		if (diff_format == D_CONTEXT)
901
65
			dump_context_vec(f1, f2, flags);
902
331
		else if (diff_format == D_UNIFIED)
903
73
			dump_unified_vec(f1, f2, flags);
904
	}
905
806
}
906
907
static void
908
range(int a, int b, char *separator)
909
{
910
20188
	diff_output("%d", a > b ? b : a);
911
10094
	if (a < b)
912
4072
		diff_output("%s%d", separator, b);
913
10094
}
914
915
static void
916
uni_range(int a, int b)
917
{
918
2384
	if (a < b)
919
1172
		diff_output("%d,%d", a, b - a + 1);
920
20
	else if (a == b)
921
10
		diff_output("%d", b);
922
	else
923
10
		diff_output("%d,0", b);
924
1192
}
925
926
static char *
927
preadline(int fd, size_t rlen, off_t off)
928
{
929
	char *line;
930
	ssize_t nr;
931
932
214
	line = xmalloc(rlen + 1);
933
107
	if ((nr = pread(fd, line, rlen, off)) < 0)
934
		err(2, "preadline");
935

214
	if (nr > 0 && line[nr-1] == '\n')
936
107
		nr--;
937
107
	line[nr] = '\0';
938
107
	return (line);
939
}
940
941
static int
942
ignoreline(char *line)
943
{
944
	int ret;
945
946
214
	ret = regexec(&ignore_re, line, 0, NULL, 0);
947
107
	free(line);
948
107
	return (ret == 0);	/* if it matched, it should be ignored. */
949
}
950
951
/*
952
 * Indicate that there is a difference between lines a and b of the from file
953
 * to get to lines c to d of the to file.  If a is greater then b then there
954
 * are no lines in the from file involved and this means that there were
955
 * lines appended (beginning at b).  If c is greater than d then there are
956
 * lines missing from the to file.
957
 */
958
static void
959
change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
960
    int *pflags)
961
{
962
	static size_t max_context = 64;
963
23242
	int i;
964
965
restart:
966

26023
	if (diff_format != D_IFDEF && a > b && c > d)
967
179
		return;
968
11447
	if (ignore_pats != NULL) {
969
		char *line;
970
		/*
971
		 * All lines in the change, insert, or delete must
972
		 * match an ignore pattern for the change to be
973
		 * ignored.
974
		 */
975
103
		if (a <= b) {		/* Changes and deletes. */
976
140
			for (i = a; i <= b; i++) {
977
268
				line = preadline(fileno(f1),
978
67
				    ixold[i] - ixold[i - 1], ixold[i - 1]);
979
67
				if (!ignoreline(line))
980
64
					goto proceed;
981
			}
982
		}
983

42
		if (a > b || c <= d) {	/* Changes and inserts. */
984
88
			for (i = c; i <= d; i++) {
985
160
				line = preadline(fileno(f2),
986
40
				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
987
40
				if (!ignoreline(line))
988
32
					goto proceed;
989
			}
990
		}
991
7
		return;
992
	}
993
proceed:
994
11440
	if (*pflags & D_HEADER) {
995
		diff_output("%s %s %s\n", diffargs, file1, file2);
996
		*pflags &= ~D_HEADER;
997
	}
998
11440
	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
999
		/*
1000
		 * Allocate change records as needed.
1001
		 */
1002
5588
		if (context_vec_ptr == context_vec_end - 1) {
1003
148
			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1004
148
			max_context <<= 1;
1005
148
			context_vec_start = xreallocarray(context_vec_start,
1006
			    max_context, sizeof(*context_vec_start));
1007
148
			context_vec_end = context_vec_start + max_context;
1008
148
			context_vec_ptr = context_vec_start + offset;
1009
148
		}
1010
5588
		if (anychange == 0) {
1011
			/*
1012
			 * Print the context/unidiff header first time through.
1013
			 */
1014
138
			print_header(file1, file2);
1015
138
			anychange = 1;
1016

6626
		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1017
1038
		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1018
			/*
1019
			 * If this change is more than 'diff_context' lines from the
1020
			 * previous change, dump the record and reset it.
1021
			 */
1022
1038
			if (diff_format == D_CONTEXT)
1023
515
				dump_context_vec(f1, f2, *pflags);
1024
			else
1025
523
				dump_unified_vec(f1, f2, *pflags);
1026
		}
1027
5588
		context_vec_ptr++;
1028
5588
		context_vec_ptr->a = a;
1029
5588
		context_vec_ptr->b = b;
1030
5588
		context_vec_ptr->c = c;
1031
5588
		context_vec_ptr->d = d;
1032
5588
		return;
1033
	}
1034
5852
	if (anychange == 0)
1035
258
		anychange = 1;
1036

11704
	switch (diff_format) {
1037
	case D_BRIEF:
1038
		return;
1039
	case D_NORMAL:
1040
	case D_EDIT:
1041
5852
		range(a, b, ",");
1042
16208
		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1043
5852
		if (diff_format == D_NORMAL)
1044
3082
			range(c, d, ",");
1045
5852
		diff_output("\n");
1046
5852
		break;
1047
	case D_REVERSE:
1048
		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1049
		range(a, b, " ");
1050
		diff_output("\n");
1051
		break;
1052
	case D_NREVERSE:
1053
		if (a > b)
1054
			diff_output("a%d %d\n", b, d - c + 1);
1055
		else {
1056
			diff_output("d%d %d\n", a, b - a + 1);
1057
			if (!(c > d))
1058
				/* add changed lines */
1059
				diff_output("a%d %d\n", b, d - c + 1);
1060
		}
1061
		break;
1062
	}
1063
5852
	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1064
3082
		fetch(ixold, a, b, f1, '<', 1, *pflags);
1065

5436
		if (a <= b && c <= d && diff_format == D_NORMAL)
1066
2152
			diff_output("---\n");
1067
	}
1068
5852
	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1069
5852
	if (i != 0 && diff_format == D_EDIT) {
1070
		/*
1071
		 * A non-zero return value for D_EDIT indicates that the
1072
		 * last line printed was a bare dot (".") that has been
1073
		 * escaped as ".." to prevent ed(1) from misinterpreting
1074
		 * it.  We have to add a substitute command to change this
1075
		 * back and restart where we left off.
1076
		 */
1077
5
		diff_output(".\n");
1078
5
		diff_output("%ds/.//\n", a + i - 1);
1079
		b = a + i - 1;
1080
		a = b + 1;
1081
5
		c += i;
1082
5
		goto restart;
1083
	}
1084

8612
	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1085
2675
		diff_output(".\n");
1086
5847
	if (inifdef) {
1087
		diff_output("#endif /* %s */\n", ifdefname);
1088
		inifdef = 0;
1089
	}
1090
17468
}
1091
1092
static int
1093
fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1094
{
1095
	int i, j, c, lastc, col, nc;
1096
1097
	/*
1098
	 * When doing #ifdef's, copy down to current line
1099
	 * if this is the first file, so that stuff makes it to output.
1100
	 */
1101
56888
	if (diff_format == D_IFDEF && oldfile) {
1102
		long curpos = ftell(lb);
1103
		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1104
		nc = f[a > b ? b : a - 1] - curpos;
1105
		for (i = 0; i < nc; i++)
1106
			diff_output("%c", getc(lb));
1107
	}
1108
28444
	if (a > b)
1109
1248
		return (0);
1110
27196
	if (diff_format == D_IFDEF) {
1111
		if (inifdef) {
1112
			diff_output("#else /* %s%s */\n",
1113
			    oldfile == 1 ? "!" : "", ifdefname);
1114
		} else {
1115
			if (oldfile)
1116
				diff_output("#ifndef %s\n", ifdefname);
1117
			else
1118
				diff_output("#ifdef %s\n", ifdefname);
1119
		}
1120
		inifdef = 1 + oldfile;
1121
	}
1122
230860
	for (i = a; i <= b; i++) {
1123
88339
		fseek(lb, f[i - 1], SEEK_SET);
1124
88339
		nc = f[i] - f[i - 1];
1125
88339
		if (diff_format != D_IFDEF && ch != '\0') {
1126
75999
			diff_output("%c", ch);
1127

75999
			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1128
			    || diff_format == D_UNIFIED))
1129
				diff_output("\t");
1130
75999
			else if (diff_format != D_UNIFIED)
1131
50336
				diff_output(" ");
1132
		}
1133
		col = 0;
1134
4370052
		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1135

8387168
			if ((c = getc(lb)) == EOF) {
1136
200
				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1137
100
				    diff_format == D_NREVERSE)
1138
10
					warnx("No newline at end of file");
1139
				else
1140
90
					diff_output("\n\\ No newline at end of "
1141
					    "file\n");
1142
100
				return (0);
1143
			}
1144

2168269
			if (c == '\t' && (flags & D_EXPANDTABS)) {
1145
				do {
1146
					diff_output(" ");
1147
				} while (++col & 7);
1148
			} else {
1149
4193384
				if (diff_format == D_EDIT && j == 1 && c == '\n'
1150
2096692
				    && lastc == '.') {
1151
					/*
1152
					 * Don't print a bare "." line
1153
					 * since that will confuse ed(1).
1154
					 * Print ".." instead and return,
1155
					 * giving the caller an offset
1156
					 * from which to restart.
1157
					 */
1158
5
					diff_output(".\n");
1159
5
					return (i - a + 1);
1160
				}
1161
2096687
				diff_output("%c", c);
1162
2096687
				col++;
1163
			}
1164
		}
1165
	}
1166
27091
	return (0);
1167
28444
}
1168
1169
/*
1170
 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1171
 */
1172
static int
1173
readhash(FILE *f, int flags)
1174
{
1175
	int i, t, space;
1176
	int sum;
1177
1178
	sum = 1;
1179
	space = 0;
1180
343922
	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1181
171961
		if (flags & D_IGNORECASE)
1182
			for (i = 0; (t = getc(f)) != '\n'; i++) {
1183
				if (t == EOF) {
1184
					if (i == 0)
1185
						return (0);
1186
					break;
1187
				}
1188
				sum = sum * 127 + chrtran[t];
1189
			}
1190
		else
1191

20027010
			for (i = 0; (t = getc(f)) != '\n'; i++) {
1192
3834407
				if (t == EOF) {
1193
966
					if (i == 0)
1194
806
						return (0);
1195
					break;
1196
				}
1197
3833441
				sum = sum * 127 + t;
1198
			}
1199
	} else {
1200
		for (i = 0;;) {
1201
			switch (t = getc(f)) {
1202
			case '\t':
1203
			case '\r':
1204
			case '\v':
1205
			case '\f':
1206
			case ' ':
1207
				space++;
1208
				continue;
1209
			default:
1210
				if (space && (flags & D_IGNOREBLANKS) == 0) {
1211
					i++;
1212
					space = 0;
1213
				}
1214
				sum = sum * 127 + chrtran[t];
1215
				i++;
1216
				continue;
1217
			case EOF:
1218
				if (i == 0)
1219
					return (0);
1220
				/* FALLTHROUGH */
1221
			case '\n':
1222
				break;
1223
			}
1224
			break;
1225
		}
1226
	}
1227
	/*
1228
	 * There is a remote possibility that we end up with a zero sum.
1229
	 * Zero is used as an EOF marker, so return 1 instead.
1230
	 */
1231
171155
	return (sum == 0 ? 1 : sum);
1232
171961
}
1233
1234
static int
1235
asciifile(FILE *f)
1236
{
1237
1612
	unsigned char buf[BUFSIZ];
1238
	size_t cnt;
1239
1240
806
	if (f == NULL)
1241
		return (1);
1242
1243
806
	rewind(f);
1244
806
	cnt = fread(buf, 1, sizeof(buf), f);
1245
806
	return (memchr(buf, '\0', cnt) == NULL);
1246
806
}
1247
1248
#define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1249
1250
static char *
1251
match_function(const long *f, int pos, FILE *fp)
1252
{
1253
	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1254
	size_t nc;
1255
	int last = lastline;
1256
	char *state = NULL;
1257
1258
	lastline = pos;
1259
	while (pos > last) {
1260
		fseek(fp, f[pos - 1], SEEK_SET);
1261
		nc = f[pos] - f[pos - 1];
1262
		if (nc >= sizeof(buf))
1263
			nc = sizeof(buf) - 1;
1264
		nc = fread(buf, 1, nc, fp);
1265
		if (nc > 0) {
1266
			buf[nc] = '\0';
1267
			buf[strcspn(buf, "\n")] = '\0';
1268
			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1269
				if (begins_with(buf, "private:")) {
1270
					if (!state)
1271
						state = " (private)";
1272
				} else if (begins_with(buf, "protected:")) {
1273
					if (!state)
1274
						state = " (protected)";
1275
				} else if (begins_with(buf, "public:")) {
1276
					if (!state)
1277
						state = " (public)";
1278
				} else {
1279
					strlcpy(lastbuf, buf, sizeof lastbuf);
1280
					if (state)
1281
						strlcat(lastbuf, state,
1282
						    sizeof lastbuf);
1283
					lastmatchline = pos;
1284
					return lastbuf;
1285
				}
1286
			}
1287
		}
1288
		pos--;
1289
	}
1290
	return lastmatchline > 0 ? lastbuf : NULL;
1291
}
1292
1293
/* dump accumulated "context" diff changes */
1294
static void
1295
dump_context_vec(FILE *f1, FILE *f2, int flags)
1296
{
1297
1160
	struct context_vec *cvp = context_vec_start;
1298
	int lowa, upb, lowc, upd, do_output;
1299
	int a, b, c, d;
1300
	char ch, *f;
1301
1302
580
	if (context_vec_start > context_vec_ptr)
1303
		return;
1304
1305
	b = d = 0;		/* gcc */
1306
1690
	lowa = MAXIMUM(1, cvp->a - diff_context);
1307
1740
	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1308
1690
	lowc = MAXIMUM(1, cvp->c - diff_context);
1309
1740
	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1310
1311
580
	diff_output("***************");
1312
580
	if ((flags & D_PROTOTYPE)) {
1313
		f = match_function(ixold, lowa-1, f1);
1314
		if (f != NULL)
1315
			diff_output(" %s", f);
1316
	}
1317
580
	diff_output("\n*** ");
1318
580
	range(lowa, upb, ",");
1319
580
	diff_output(" ****\n");
1320
1321
	/*
1322
	 * Output changes to the "old" file.  The first loop suppresses
1323
	 * output if there were no changes to the "old" file (we'll see
1324
	 * the "old" lines as context in the "new" list).
1325
	 */
1326
	do_output = 0;
1327
1710
	for (; cvp <= context_vec_ptr; cvp++)
1328
695
		if (cvp->a <= cvp->b) {
1329
420
			cvp = context_vec_start;
1330
			do_output++;
1331
420
			break;
1332
		}
1333
580
	if (do_output) {
1334
6040
		while (cvp <= context_vec_ptr) {
1335
2600
			a = cvp->a;
1336
2600
			b = cvp->b;
1337
2600
			c = cvp->c;
1338
2600
			d = cvp->d;
1339
1340

4750
			if (a <= b && c <= d)
1341
2060
				ch = 'c';
1342
			else
1343
540
				ch = (a <= b) ? 'd' : 'a';
1344
1345
2600
			if (ch == 'a')
1346
450
				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1347
			else {
1348
2150
				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1349
4300
				fetch(ixold, a, b, f1,
1350
2150
				    ch == 'c' ? '!' : '-', 0, flags);
1351
			}
1352
2600
			lowa = b + 1;
1353
2600
			cvp++;
1354
		}
1355
420
		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1356
420
	}
1357
	/* output changes to the "new" file */
1358
580
	diff_output("--- ");
1359
580
	range(lowc, upd, ",");
1360
580
	diff_output(" ----\n");
1361
1362
	do_output = 0;
1363
1220
	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1364
595
		if (cvp->c <= cvp->d) {
1365
565
			cvp = context_vec_start;
1366
			do_output++;
1367
565
			break;
1368
		}
1369
580
	if (do_output) {
1370
6640
		while (cvp <= context_vec_ptr) {
1371
2755
			a = cvp->a;
1372
2755
			b = cvp->b;
1373
2755
			c = cvp->c;
1374
2755
			d = cvp->d;
1375
1376

4890
			if (a <= b && c <= d)
1377
2060
				ch = 'c';
1378
			else
1379
695
				ch = (a <= b) ? 'd' : 'a';
1380
1381
2755
			if (ch == 'd')
1382
75
				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1383
			else {
1384
2680
				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1385
5360
				fetch(ixnew, c, d, f2,
1386
2680
				    ch == 'c' ? '!' : '+', 0, flags);
1387
			}
1388
2755
			lowc = d + 1;
1389
2755
			cvp++;
1390
		}
1391
565
		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1392
565
	}
1393
580
	context_vec_ptr = context_vec_start - 1;
1394
1160
}
1395
1396
/* dump accumulated "unified" diff changes */
1397
static void
1398
dump_unified_vec(FILE *f1, FILE *f2, int flags)
1399
{
1400
1192
	struct context_vec *cvp = context_vec_start;
1401
	int lowa, upb, lowc, upd;
1402
	int a, b, c, d;
1403
	char ch, *f;
1404
1405
596
	if (context_vec_start > context_vec_ptr)
1406
		return;
1407
1408
	b = d = 0;		/* gcc */
1409
1738
	lowa = MAXIMUM(1, cvp->a - diff_context);
1410
1788
	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1411
1738
	lowc = MAXIMUM(1, cvp->c - diff_context);
1412
1788
	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1413
1414
596
	diff_output("@@ -");
1415
596
	uni_range(lowa, upb);
1416
596
	diff_output(" +");
1417
596
	uni_range(lowc, upd);
1418
596
	diff_output(" @@");
1419
596
	if ((flags & D_PROTOTYPE)) {
1420
		f = match_function(ixold, lowa-1, f1);
1421
		if (f != NULL)
1422
			diff_output(" %s", f);
1423
	}
1424
596
	diff_output("\n");
1425
1426
	/*
1427
	 * Output changes in "unified" diff format--the old and new lines
1428
	 * are printed together.
1429
	 */
1430
6828
	for (; cvp <= context_vec_ptr; cvp++) {
1431
2818
		a = cvp->a;
1432
2818
		b = cvp->b;
1433
2818
		c = cvp->c;
1434
2818
		d = cvp->d;
1435
1436
		/*
1437
		 * c: both new and old changes
1438
		 * d: only changes in the old file
1439
		 * a: only changes in the new file
1440
		 */
1441

5016
		if (a <= b && c <= d)
1442
2108
			ch = 'c';
1443
		else
1444
710
			ch = (a <= b) ? 'd' : 'a';
1445
1446

5636
		switch (ch) {
1447
		case 'c':
1448
2108
			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1449
2108
			fetch(ixold, a, b, f1, '-', 0, flags);
1450
2108
			fetch(ixnew, c, d, f2, '+', 0, flags);
1451
2108
			break;
1452
		case 'd':
1453
90
			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1454
90
			fetch(ixold, a, b, f1, '-', 0, flags);
1455
90
			break;
1456
		case 'a':
1457
620
			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1458
620
			fetch(ixnew, c, d, f2, '+', 0, flags);
1459
620
			break;
1460
		}
1461
2818
		lowa = b + 1;
1462
2818
		lowc = d + 1;
1463
	}
1464
596
	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1465
1466
596
	context_vec_ptr = context_vec_start - 1;
1467
1192
}
1468
1469
static void
1470
print_header(const char *file1, const char *file2)
1471
{
1472
276
	if (label[0] != NULL)
1473
		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1474
		    label[0]);
1475
	else
1476
138
		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1477
138
		    file1, ctime(&stb1.st_mtime));
1478
138
	if (label[1] != NULL)
1479
		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1480
		    label[1]);
1481
	else
1482
138
		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1483
138
		    file2, ctime(&stb2.st_mtime));
1484
138
}