GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/diff/diffreg.c Lines: 500 664 75.3 %
Date: 2017-11-07 Branches: 373 591 63.1 %

Line Branch Exec Source
1
/*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2
3
/*
4
 * Copyright (C) Caldera International Inc.  2001-2002.
5
 * All rights reserved.
6
 *
7
 * Redistribution and use in source and binary forms, with or without
8
 * modification, are permitted provided that the following conditions
9
 * are met:
10
 * 1. Redistributions of source code and documentation must retain the above
11
 *    copyright notice, this list of conditions and the following disclaimer.
12
 * 2. Redistributions in binary form must reproduce the above copyright
13
 *    notice, this list of conditions and the following disclaimer in the
14
 *    documentation and/or other materials provided with the distribution.
15
 * 3. All advertising materials mentioning features or use of this software
16
 *    must display the following acknowledgement:
17
 *	This product includes software developed or owned by Caldera
18
 *	International, Inc.
19
 * 4. Neither the name of Caldera International, Inc. nor the names of other
20
 *    contributors may be used to endorse or promote products derived from
21
 *    this software without specific prior written permission.
22
 *
23
 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24
 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27
 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28
 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34
 * POSSIBILITY OF SUCH DAMAGE.
35
 */
36
/*-
37
 * Copyright (c) 1991, 1993
38
 *	The Regents of the University of California.  All rights reserved.
39
 *
40
 * Redistribution and use in source and binary forms, with or without
41
 * modification, are permitted provided that the following conditions
42
 * are met:
43
 * 1. Redistributions of source code must retain the above copyright
44
 *    notice, this list of conditions and the following disclaimer.
45
 * 2. Redistributions in binary form must reproduce the above copyright
46
 *    notice, this list of conditions and the following disclaimer in the
47
 *    documentation and/or other materials provided with the distribution.
48
 * 3. Neither the name of the University nor the names of its contributors
49
 *    may be used to endorse or promote products derived from this software
50
 *    without specific prior written permission.
51
 *
52
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62
 * SUCH DAMAGE.
63
 *
64
 *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65
 */
66
67
#include <sys/stat.h>
68
#include <sys/wait.h>
69
70
#include <ctype.h>
71
#include <err.h>
72
#include <errno.h>
73
#include <fcntl.h>
74
#include <paths.h>
75
#include <stddef.h>
76
#include <stdint.h>
77
#include <stdio.h>
78
#include <stdlib.h>
79
#include <string.h>
80
#include <unistd.h>
81
#include <limits.h>
82
83
#include "diff.h"
84
#include "xmalloc.h"
85
86
#define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
87
#define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
88
89
/*
90
 * diff - compare two files.
91
 */
92
93
/*
94
 *	Uses an algorithm due to Harold Stone, which finds
95
 *	a pair of longest identical subsequences in the two
96
 *	files.
97
 *
98
 *	The major goal is to generate the match vector J.
99
 *	J[i] is the index of the line in file1 corresponding
100
 *	to line i file0. J[i] = 0 if there is no
101
 *	such line in file1.
102
 *
103
 *	Lines are hashed so as to work in core. All potential
104
 *	matches are located by sorting the lines of each file
105
 *	on the hash (called ``value''). In particular, this
106
 *	collects the equivalence classes in file1 together.
107
 *	Subroutine equiv replaces the value of each line in
108
 *	file0 by the index of the first element of its
109
 *	matching equivalence in (the reordered) file1.
110
 *	To save space equiv squeezes file1 into a single
111
 *	array member in which the equivalence classes
112
 *	are simply concatenated, except that their first
113
 *	members are flagged by changing sign.
114
 *
115
 *	Next the indices that point into member are unsorted into
116
 *	array class according to the original order of file0.
117
 *
118
 *	The cleverness lies in routine stone. This marches
119
 *	through the lines of file0, developing a vector klist
120
 *	of "k-candidates". At step i a k-candidate is a matched
121
 *	pair of lines x,y (x in file0 y in file1) such that
122
 *	there is a common subsequence of length k
123
 *	between the first i lines of file0 and the first y
124
 *	lines of file1, but there is no such subsequence for
125
 *	any smaller y. x is the earliest possible mate to y
126
 *	that occurs in such a subsequence.
127
 *
128
 *	Whenever any of the members of the equivalence class of
129
 *	lines in file1 matable to a line in file0 has serial number
130
 *	less than the y of some k-candidate, that k-candidate
131
 *	with the smallest such y is replaced. The new
132
 *	k-candidate is chained (via pred) to the current
133
 *	k-1 candidate so that the actual subsequence can
134
 *	be recovered. When a member has serial number greater
135
 *	that the y of all k-candidates, the klist is extended.
136
 *	At the end, the longest subsequence is pulled out
137
 *	and placed in the array J by unravel
138
 *
139
 *	With J in hand, the matches there recorded are
140
 *	check'ed against reality to assure that no spurious
141
 *	matches have crept in due to hashing. If they have,
142
 *	they are broken, and "jackpot" is recorded--a harmless
143
 *	matter except that a true match for a spuriously
144
 *	mated line may now be unnecessarily reported as a change.
145
 *
146
 *	Much of the complexity of the program comes simply
147
 *	from trying to minimize core utilization and
148
 *	maximize the range of doable problems by dynamically
149
 *	allocating what is needed and reusing what is not.
150
 *	The core requirements for problems larger than somewhat
151
 *	are (in words) 2*length(file0) + length(file1) +
152
 *	3*(number of k-candidates installed),  typically about
153
 *	6n words for files of length n.
154
 */
155
156
struct cand {
157
	int	x;
158
	int	y;
159
	int	pred;
160
};
161
162
struct line {
163
	int	serial;
164
	int	value;
165
} *file[2];
166
167
/*
168
 * The following struct is used to record change information when
169
 * doing a "context" or "unified" diff.  (see routine "change" to
170
 * understand the highly mnemonic field names)
171
 */
172
struct context_vec {
173
	int	a;		/* start line in old file */
174
	int	b;		/* end line in old file */
175
	int	c;		/* start line in new file */
176
	int	d;		/* end line in new file */
177
};
178
179
#define	diff_output	printf
180
static FILE	*opentemp(const char *);
181
static void	 output(char *, FILE *, char *, FILE *, int);
182
static void	 check(FILE *, FILE *, int);
183
static void	 range(int, int, char *);
184
static void	 uni_range(int, int);
185
static void	 dump_context_vec(FILE *, FILE *, int);
186
static void	 dump_unified_vec(FILE *, FILE *, int);
187
static void	 prepare(int, FILE *, off_t, int);
188
static void	 prune(void);
189
static void	 equiv(struct line *, int, struct line *, int, int *);
190
static void	 unravel(int);
191
static void	 unsort(struct line *, int, int *);
192
static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
193
static void	 sort(struct line *, int);
194
static void	 print_header(const char *, const char *);
195
static int	 ignoreline(char *);
196
static int	 asciifile(FILE *);
197
static int	 fetch(long *, int, int, FILE *, int, int, int);
198
static int	 newcand(int, int, int);
199
static int	 search(int *, int, int);
200
static int	 skipline(FILE *);
201
static int	 isqrt(int);
202
static int	 stone(int *, int, int *, int *, int);
203
static int	 readhash(FILE *, int);
204
static int	 files_differ(FILE *, FILE *, int);
205
static char	*match_function(const long *, int, FILE *);
206
static char	*preadline(int, size_t, off_t);
207
208
static int  *J;			/* will be overlaid on class */
209
static int  *class;		/* will be overlaid on file[0] */
210
static int  *klist;		/* will be overlaid on file[0] after class */
211
static int  *member;		/* will be overlaid on file[1] */
212
static int   clen;
213
static int   inifdef;		/* whether or not we are in a #ifdef block */
214
static int   len[2];
215
static int   pref, suff;	/* length of prefix and suffix */
216
static int   slen[2];
217
static int   anychange;
218
static long *ixnew;		/* will be overlaid on file[1] */
219
static long *ixold;		/* will be overlaid on klist */
220
static struct cand *clist;	/* merely a free storage pot for candidates */
221
static int   clistlen;		/* the length of clist */
222
static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
223
static u_char *chrtran;		/* translation table for case-folding */
224
static struct context_vec *context_vec_start;
225
static struct context_vec *context_vec_end;
226
static struct context_vec *context_vec_ptr;
227
228
#define FUNCTION_CONTEXT_SIZE	55
229
static char lastbuf[FUNCTION_CONTEXT_SIZE];
230
static int lastline;
231
static int lastmatchline;
232
233
234
/*
235
 * chrtran points to one of 2 translation tables: cup2low if folding upper to
236
 * lower case clow2low if not folding case
237
 */
238
u_char clow2low[256] = {
239
	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
240
	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
241
	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
242
	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
243
	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
244
	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
245
	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
246
	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
247
	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
248
	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
249
	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
250
	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
251
	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
252
	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
253
	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
254
	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
255
	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
256
	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
257
	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
258
	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
259
	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
260
	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
261
	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
262
	0xfd, 0xfe, 0xff
263
};
264
265
u_char cup2low[256] = {
266
	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
267
	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
268
	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
269
	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
270
	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
271
	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
272
	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
273
	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
274
	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
275
	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
276
	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
277
	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
278
	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
279
	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
280
	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
281
	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
282
	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
283
	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
284
	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
285
	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
286
	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
287
	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
288
	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
289
	0xfd, 0xfe, 0xff
290
};
291
292
int
293
diffreg(char *file1, char *file2, int flags)
294
{
295
	FILE *f1, *f2;
296
	int i, rval;
297
298
	f1 = f2 = NULL;
299
	rval = D_SAME;
300
44516
	anychange = 0;
301
22258
	lastline = 0;
302
22258
	lastmatchline = 0;
303
22258
	context_vec_ptr = context_vec_start - 1;
304
22258
	if (flags & D_IGNORECASE)
305
		chrtran = cup2low;
306
	else
307
		chrtran = clow2low;
308
22258
	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
309
		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
310

23511
	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
311
		goto closem;
312
313
22258
	if (flags & D_EMPTY1)
314
		f1 = fopen(_PATH_DEVNULL, "r");
315
	else {
316
22258
		if (!S_ISREG(stb1.st_mode)) {
317

2778
			if ((f1 = opentemp(file1)) == NULL ||
318
4167
			    fstat(fileno(f1), &stb1) < 0) {
319
				warn("%s", file1);
320
				status |= 2;
321
				goto closem;
322
			}
323
20869
		} else if (strcmp(file1, "-") == 0)
324
			f1 = stdin;
325
		else
326
20869
			f1 = fopen(file1, "r");
327
	}
328
22258
	if (f1 == NULL) {
329
		warn("%s", file1);
330
		status |= 2;
331
		goto closem;
332
	}
333
334
22258
	if (flags & D_EMPTY2)
335
		f2 = fopen(_PATH_DEVNULL, "r");
336
	else {
337
22258
		if (!S_ISREG(stb2.st_mode)) {
338

16276
			if ((f2 = opentemp(file2)) == NULL ||
339
24414
			    fstat(fileno(f2), &stb2) < 0) {
340
				warn("%s", file2);
341
				status |= 2;
342
				goto closem;
343
			}
344
14120
		} else if (strcmp(file2, "-") == 0)
345
			f2 = stdin;
346
		else
347
14120
			f2 = fopen(file2, "r");
348
	}
349
22258
	if (f2 == NULL) {
350
		warn("%s", file2);
351
		status |= 2;
352
		goto closem;
353
	}
354
355
23045
	switch (files_differ(f1, f2, flags)) {
356
	case 0:
357
		goto closem;
358
	case 1:
359
		break;
360
	default:
361
		/* error */
362
		status |= 2;
363
		goto closem;
364
	}
365
366

1505
	if ((flags & D_FORCEASCII) == 0 &&
367
1436
	    (!asciifile(f1) || !asciifile(f2))) {
368
		rval = D_BINARY;
369
		status |= 1;
370
		goto closem;
371
	}
372
787
	prepare(0, f1, stb1.st_size, flags);
373
787
	prepare(1, f2, stb2.st_size, flags);
374
375
787
	prune();
376
787
	sort(sfile[0], slen[0]);
377
787
	sort(sfile[1], slen[1]);
378
379
787
	member = (int *)file[1];
380
787
	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
381
787
	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
382
383
787
	class = (int *)file[0];
384
787
	unsort(sfile[0], slen[0], class);
385
787
	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
386
387
787
	klist = xcalloc(slen[0] + 2, sizeof(*klist));
388
787
	clen = 0;
389
787
	clistlen = 100;
390
787
	clist = xcalloc(clistlen, sizeof(*clist));
391
787
	i = stone(class, slen[0], member, klist, flags);
392
787
	free(member);
393
787
	free(class);
394
395
787
	J = xreallocarray(J, len[0] + 2, sizeof(*J));
396
787
	unravel(klist[i]);
397
787
	free(clist);
398
787
	free(klist);
399
400
787
	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
401
787
	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
402
787
	check(f1, f2, flags);
403
787
	output(file1, f1, file2, f2, flags);
404
closem:
405
22258
	if (anychange) {
406
779
		status |= 1;
407
779
		if (rval == D_SAME)
408
779
			rval = D_DIFFER;
409
	}
410
22258
	if (f1 != NULL)
411
22258
		fclose(f1);
412
22258
	if (f2 != NULL)
413
22258
		fclose(f2);
414
415
22258
	return (rval);
416
22258
}
417
418
/*
419
 * Check to see if the given files differ.
420
 * Returns 0 if they are the same, 1 if different, and -1 on error.
421
 * XXX - could use code from cmp(1) [faster]
422
 */
423
static int
424
files_differ(FILE *f1, FILE *f2, int flags)
425
{
426
44516
	char buf1[BUFSIZ], buf2[BUFSIZ];
427
	size_t i, j;
428
429

66162
	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
430
21646
	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
431
612
		return (1);
432
	for (;;) {
433
66674
		i = fread(buf1, 1, sizeof(buf1), f1);
434
66674
		j = fread(buf2, 1, sizeof(buf2), f2);
435




219232
		if ((!i && ferror(f1)) || (!j && ferror(f2)))
436
			return (-1);
437
66674
		if (i != j)
438
			return (1);
439
66674
		if (i == 0)
440
21471
			return (0);
441
45203
		if (memcmp(buf1, buf2, i) != 0)
442
175
			return (1);
443
	}
444
22258
}
445
446
static FILE *
447
opentemp(const char *file)
448
{
449
19054
	char buf[BUFSIZ], tempfile[PATH_MAX];
450
	ssize_t nread;
451
	int ifd, ofd;
452
453
9527
	if (strcmp(file, "-") == 0)
454
7911
		ifd = STDIN_FILENO;
455
1616
	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
456
		return (NULL);
457
458
9527
	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
459
460
9527
	if ((ofd = mkstemp(tempfile)) < 0) {
461
		close(ifd);
462
		return (NULL);
463
	}
464
9527
	unlink(tempfile);
465
34233
	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
466
15179
		if (write(ofd, buf, nread) != nread) {
467
			close(ifd);
468
			close(ofd);
469
			return (NULL);
470
		}
471
	}
472
9527
	close(ifd);
473
9527
	lseek(ofd, (off_t)0, SEEK_SET);
474
9527
	return (fdopen(ofd, "r"));
475
9527
}
476
477
char *
478
splice(char *dir, char *file)
479
{
480
	char *tail, *buf;
481
	size_t dirlen;
482
483
	dirlen = strlen(dir);
484
	while (dirlen != 0 && dir[dirlen - 1] == '/')
485
	    dirlen--;
486
	if ((tail = strrchr(file, '/')) == NULL)
487
		tail = file;
488
	else
489
		tail++;
490
	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
491
	return (buf);
492
}
493
494
static void
495
prepare(int i, FILE *fd, off_t filesize, int flags)
496
{
497
	struct line *p;
498
	int j, h;
499
	size_t sz;
500
501
3148
	rewind(fd);
502
503
1574
	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
504
1574
	if (sz < 100)
505
		sz = 100;
506
507
1574
	p = xcalloc(sz + 3, sizeof(*p));
508
639814
	for (j = 0; (h = readhash(fd, flags));) {
509
318333
		if (j == sz) {
510
164
			sz = sz * 3 / 2;
511
164
			p = xreallocarray(p, sz + 3, sizeof(*p));
512
164
		}
513
318333
		p[++j].value = h;
514
	}
515
1574
	len[i] = j;
516
1574
	file[i] = p;
517
1574
}
518
519
static void
520
prune(void)
521
{
522
	int i, j;
523
524

11126
	for (pref = 0; pref < len[0] && pref < len[1] &&
525
2383
	    file[0][pref + 1].value == file[1][pref + 1].value;
526
	    pref++)
527
		;
528

5821
	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
529
1428
	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
530
1035
	    suff++)
531
		;
532
4722
	for (j = 0; j < 2; j++) {
533
1574
		sfile[j] = file[j] + pref;
534
1574
		slen[j] = len[j] - pref - suff;
535
631038
		for (i = 0; i <= slen[j]; i++)
536
313945
			sfile[j][i].serial = i;
537
	}
538
787
}
539
540
static void
541
equiv(struct line *a, int n, struct line *b, int m, int *c)
542
{
543
	int i, j;
544
545
	i = j = 1;
546

598888
	while (i <= n && j <= m) {
547
298156
		if (a[i].value < b[j].value)
548
37032
			a[i++].value = 0;
549
261124
		else if (a[i].value == b[j].value)
550
88836
			a[i++].value = j;
551
		else
552
172288
			j++;
553
	}
554
3477
	while (i <= n)
555
1345
		a[i++].value = 0;
556
787
	b[m + 1].value = 0;
557
	j = 0;
558
94498
	while (++j <= m) {
559
92924
		c[j] = -b[j].serial;
560
370316
		while (b[j + 1].value == b[j].value) {
561
			j++;
562
92234
			c[j] = b[j].serial;
563
		}
564
	}
565
787
	c[j] = -1;
566
787
}
567
568
/* Code taken from ping.c */
569
static int
570
isqrt(int n)
571
{
572
	int y, x = 1;
573
574
1574
	if (n == 0)
575
286
		return (0);
576
577
	do { /* newton was a stinker */
578
		y = x;
579
1507
		x = n / x;
580
1507
		x += y;
581
1507
		x /= 2;
582

2770
	} while ((x - y) > 1 || (x - y) < -1);
583
584
501
	return (x);
585
787
}
586
587
static int
588
stone(int *a, int n, int *b, int *c, int flags)
589
{
590
	int i, k, y, j, l;
591
	int oldc, tc, oldl, sq;
592
	u_int numtries, bound;
593
594
1574
	if (flags & D_MINIMAL)
595
		bound = UINT_MAX;
596
	else {
597
787
		sq = isqrt(n);
598
787
		bound = MAXIMUM(256, sq);
599
	}
600
601
	k = 0;
602
787
	c[0] = newcand(0, 0, 0);
603
256000
	for (i = 1; i <= n; i++) {
604
127213
		j = a[i];
605
127213
		if (j == 0)
606
			continue;
607
88836
		y = -b[j];
608
		oldl = 0;
609
88836
		oldc = c[0];
610
		numtries = 0;
611
88836
		do {
612
1153599
			if (y <= clist[oldc].y)
613
				continue;
614
1050531
			l = search(c, k, y);
615
1050531
			if (l != oldl + 1)
616
1028032
				oldc = c[l - 1];
617
1050531
			if (l <= k) {
618
965979
				if (clist[c[l]].y <= y)
619
					continue;
620
				tc = c[l];
621
123300
				c[l] = newcand(i, y, oldc);
622
				oldc = tc;
623
				oldl = l;
624
123300
				numtries++;
625
			} else {
626
84552
				c[l] = newcand(i, y, oldc);
627
84552
				k++;
628
84552
				break;
629
			}
630

2257110
		} while ((y = b[++j]) > 0 && numtries < bound);
631
	}
632
787
	return (k);
633
}
634
635
static int
636
newcand(int x, int y, int pred)
637
{
638
	struct cand *q;
639
640
417278
	if (clen == clistlen) {
641
2556
		clistlen = clistlen * 11 / 10;
642
2556
		clist = xreallocarray(clist, clistlen, sizeof(*clist));
643
2556
	}
644
208639
	q = clist + clen;
645
208639
	q->x = x;
646
208639
	q->y = y;
647
208639
	q->pred = pred;
648
208639
	return (clen++);
649
}
650
651
static int
652
search(int *c, int k, int y)
653
{
654
	int i, j, l, t;
655
656
2101062
	if (clist[c[k]].y < y)	/* quick look for typical case */
657
84552
		return (k + 1);
658
	i = 0;
659
965979
	j = k + 1;
660
965979
	for (;;) {
661
8948594
		l = (i + j) / 2;
662
8948594
		if (l <= i)
663
			break;
664
8825294
		t = clist[c[l]].y;
665
8825294
		if (t > y)
666
3687958
			j = l;
667
5137336
		else if (t < y)
668
			i = l;
669
		else
670
842679
			return (l);
671
	}
672
123300
	return (l + 1);
673
1050531
}
674
675
static void
676
unravel(int p)
677
{
678
	struct cand *q;
679
	int i;
680
681
264323
	for (i = 0; i <= len[0]; i++)
682
261962
		J[i] = i <= pref ? i :
683
129283
		    i > len[0] - suff ? i + len[1] - len[0] : 0;
684
170678
	for (q = clist + p; q->y != 0; q = clist + q->pred)
685
84552
		J[q->x + pref] = q->y + pref;
686
787
}
687
688
/*
689
 * Check does double duty:
690
 *  1.	ferret out any fortuitous correspondences due
691
 *	to confounding by hashing (which result in "jackpot")
692
 *  2.  collect random access indexes to the two files
693
 */
694
static void
695
check(FILE *f1, FILE *f2, int flags)
696
{
697
	int i, j, jackpot, c, d;
698
	long ctold, ctnew;
699
700
1574
	rewind(f1);
701
787
	rewind(f2);
702
	j = 1;
703
787
	ixold[0] = ixnew[0] = 0;
704
	jackpot = 0;
705
	ctold = ctnew = 0;
706
261962
	for (i = 1; i <= len[0]; i++) {
707
130194
		if (J[i] == 0) {
708
42661
			ixold[i] = ctold += skipline(f1);
709
42661
			continue;
710
		}
711
220979
		while (j < J[i]) {
712
66723
			ixnew[j] = ctnew += skipline(f2);
713
66723
			j++;
714
		}
715
87533
		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
716
			for (;;) {
717
				c = getc(f1);
718
				d = getc(f2);
719
				/*
720
				 * GNU diff ignores a missing newline
721
				 * in one file for -b or -w.
722
				 */
723
				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
724
					if (c == EOF && d == '\n') {
725
						ctnew++;
726
						break;
727
					} else if (c == '\n' && d == EOF) {
728
						ctold++;
729
						break;
730
					}
731
				}
732
				ctold++;
733
				ctnew++;
734
				if ((flags & D_FOLDBLANKS) && isspace(c) &&
735
				    isspace(d)) {
736
					do {
737
						if (c == '\n')
738
							break;
739
						ctold++;
740
					} while (isspace(c = getc(f1)));
741
					do {
742
						if (d == '\n')
743
							break;
744
						ctnew++;
745
					} while (isspace(d = getc(f2)));
746
				} else if ((flags & D_IGNOREBLANKS)) {
747
					while (isspace(c) && c != '\n') {
748
						c = getc(f1);
749
						ctold++;
750
					}
751
					while (isspace(d) && d != '\n') {
752
						d = getc(f2);
753
						ctnew++;
754
					}
755
				}
756
				if (chrtran[c] != chrtran[d]) {
757
					jackpot++;
758
					J[i] = 0;
759
					if (c != '\n' && c != EOF)
760
						ctold += skipline(f1);
761
					if (d != '\n' && c != EOF)
762
						ctnew += skipline(f2);
763
					break;
764
				}
765
				if (c == '\n' || c == EOF)
766
					break;
767
			}
768
		} else {
769
			for (;;) {
770
2043831
				ctold++;
771
2043831
				ctnew++;
772


14306817
				if ((c = getc(f1)) != (d = getc(f2))) {
773
					/* jackpot++; */
774
108
					J[i] = 0;
775
108
					if (c != '\n' && c != EOF)
776
						ctold += skipline(f1);
777
108
					if (d != '\n' && c != EOF)
778
36
						ctnew += skipline(f2);
779
					break;
780
				}
781
2043723
				if (c == '\n' || c == EOF)
782
					break;
783
			}
784
		}
785
87533
		ixold[i] = ctold;
786
87533
		ixnew[j] = ctnew;
787
87533
		j++;
788
87533
	}
789
68553
	for (; j <= len[1]; j++)
790
33883
		ixnew[j] = ctnew += skipline(f2);
791
	/*
792
	 * if (jackpot)
793
	 *	fprintf(stderr, "jackpot\n");
794
	 */
795
787
}
796
797
/* shellsort CACM #201 */
798
static void
799
sort(struct line *a, int n)
800
{
801
	struct line *ai, *aim, w;
802
	int j, m = 0, k;
803
804
3148
	if (n == 0)
805
466
		return;
806
12428
	for (j = 1; j <= n; j *= 2)
807
5106
		m = 2 * j - 1;
808
10212
	for (m /= 2; m != 0; m /= 2) {
809
3998
		k = n - m;
810
5393620
		for (j = 1; j <= k; j++) {
811
10599274
			for (ai = &a[j]; ai > a; ai -= m) {
812
5131346
				aim = &ai[m];
813
5131346
				if (aim < ai)
814
					break;	/* wraparound */
815

6069642
				if (aim->value > ai[0].value ||
816
3053644
				    (aim->value == ai[0].value &&
817
938296
					aim->serial > ai[0].serial))
818
					break;
819
2606825
				w.value = ai[0].value;
820
2606825
				ai[0].value = aim->value;
821
2606825
				aim->value = w.value;
822
2606825
				w.serial = ai[0].serial;
823
2606825
				ai[0].serial = aim->serial;
824
2606825
				aim->serial = w.serial;
825
			}
826
		}
827
	}
828
2682
}
829
830
static void
831
unsort(struct line *f, int l, int *b)
832
{
833
	int *a, i;
834
835
1574
	a = xcalloc(l + 1, sizeof(*a));
836
256000
	for (i = 1; i <= l; i++)
837
127213
		a[f[i].serial] = f[i].value;
838
256000
	for (i = 1; i <= l; i++)
839
127213
		b[i] = a[i];
840
787
	free(a);
841
787
}
842
843
static int
844
skipline(FILE *f)
845
{
846
	int i, c;
847
848

17734098
	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
849
		continue;
850
143303
	return (i);
851
}
852
853
static void
854
output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
855
{
856
	int m, i0, i1, j0, j1;
857
858
787
	rewind(f1);
859
787
	rewind(f2);
860
787
	m = len[0];
861
787
	J[0] = 0;
862
787
	J[m + 1] = len[1] + 1;
863
787
	if (diff_format != D_EDIT) {
864
32930
		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
865

229623
			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
866
66122
				i0++;
867
15795
			j0 = J[i0 - 1] + 1;
868
			i1 = i0 - 1;
869

143612
			while (i1 < m && J[i1 + 1] == 0)
870
				i1++;
871
15795
			j1 = J[i1 + 1] - 1;
872
15795
			J[i1] = j1;
873
15795
			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
874
		}
875
	} else {
876
10296
		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
877

95220
			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
878
21303
				i0--;
879
5031
			j0 = J[i0 + 1] - 1;
880
			i1 = i0 + 1;
881

46521
			while (i1 > 1 && J[i1 - 1] == 0)
882
10512
				i1--;
883
5031
			j1 = J[i1 - 1] + 1;
884
5031
			J[i1] = j1;
885
5031
			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
886
		}
887
	}
888
787
	if (m == 0)
889
135
		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
890
787
	if (diff_format == D_IFDEF) {
891
		for (;;) {
892
#define	c i0
893
			if ((c = getc(f1)) == EOF)
894
				return;
895
			diff_output("%c", c);
896
		}
897
#undef c
898
	}
899
787
	if (anychange != 0) {
900
779
		if (diff_format == D_CONTEXT)
901
117
			dump_context_vec(f1, f2, flags);
902
662
		else if (diff_format == D_UNIFIED)
903
202
			dump_unified_vec(f1, f2, flags);
904
	}
905
1574
}
906
907
static void
908
range(int a, int b, char *separator)
909
{
910
36284
	diff_output("%d", a > b ? b : a);
911
18142
	if (a < b)
912
7331
		diff_output("%s%d", separator, b);
913
18142
}
914
915
static void
916
uni_range(int a, int b)
917
{
918
4572
	if (a < b)
919
2171
		diff_output("%d,%d", a, b - a + 1);
920
115
	else if (a == b)
921
28
		diff_output("%d", b);
922
	else
923
87
		diff_output("%d,0", b);
924
2286
}
925
926
static char *
927
preadline(int fd, size_t rlen, off_t off)
928
{
929
	char *line;
930
	ssize_t nr;
931
932
366
	line = xmalloc(rlen + 1);
933
183
	if ((nr = pread(fd, line, rlen, off)) < 0)
934
		err(2, "preadline");
935

366
	if (nr > 0 && line[nr-1] == '\n')
936
183
		nr--;
937
183
	line[nr] = '\0';
938
183
	return (line);
939
}
940
941
static int
942
ignoreline(char *line)
943
{
944
	int ret;
945
946
366
	ret = regexec(&ignore_re, line, 0, NULL, 0);
947
183
	free(line);
948
183
	return (ret == 0);	/* if it matched, it should be ignored. */
949
}
950
951
/*
952
 * Indicate that there is a difference between lines a and b of the from file
953
 * to get to lines c to d of the to file.  If a is greater then b then there
954
 * are no lines in the from file involved and this means that there were
955
 * lines appended (beginning at b).  If c is greater than d then there are
956
 * lines missing from the to file.
957
 */
958
static void
959
change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
960
    int *pflags)
961
{
962
	static size_t max_context = 64;
963
41922
	int i;
964
965
restart:
966

46986
	if (diff_format != D_IFDEF && a > b && c > d)
967
315
		return;
968
20655
	if (ignore_pats != NULL) {
969
		char *line;
970
		/*
971
		 * All lines in the change, insert, or delete must
972
		 * match an ignore pattern for the change to be
973
		 * ignored.
974
		 */
975
176
		if (a <= b) {		/* Changes and deletes. */
976
228
			for (i = a; i <= b; i++) {
977
452
				line = preadline(fileno(f1),
978
113
				    ixold[i] - ixold[i - 1], ixold[i - 1]);
979
113
				if (!ignoreline(line))
980
112
					goto proceed;
981
			}
982
		}
983

65
		if (a > b || c <= d) {	/* Changes and inserts. */
984
154
			for (i = c; i <= d; i++) {
985
280
				line = preadline(fileno(f2),
986
70
				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
987
70
				if (!ignoreline(line))
988
56
					goto proceed;
989
			}
990
		}
991
8
		return;
992
	}
993
proceed:
994
20647
	if (*pflags & D_HEADER) {
995
		diff_output("%s %s %s\n", diffargs, file1, file2);
996
		*pflags &= ~D_HEADER;
997
	}
998
20647
	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
999
		/*
1000
		 * Allocate change records as needed.
1001
		 */
1002
10127
		if (context_vec_ptr == context_vec_end - 1) {
1003
337
			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1004
337
			max_context <<= 1;
1005
337
			context_vec_start = xreallocarray(context_vec_start,
1006
			    max_context, sizeof(*context_vec_start));
1007
337
			context_vec_end = context_vec_start + max_context;
1008
337
			context_vec_ptr = context_vec_start + offset;
1009
337
		}
1010
10127
		if (anychange == 0) {
1011
			/*
1012
			 * Print the context/unidiff header first time through.
1013
			 */
1014
319
			print_header(file1, file2);
1015
319
			anychange = 1;
1016

11995
		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1017
1868
		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1018
			/*
1019
			 * If this change is more than 'diff_context' lines from the
1020
			 * previous change, dump the record and reset it.
1021
			 */
1022
1868
			if (diff_format == D_CONTEXT)
1023
927
				dump_context_vec(f1, f2, *pflags);
1024
			else
1025
941
				dump_unified_vec(f1, f2, *pflags);
1026
		}
1027
10127
		context_vec_ptr++;
1028
10127
		context_vec_ptr->a = a;
1029
10127
		context_vec_ptr->b = b;
1030
10127
		context_vec_ptr->c = c;
1031
10127
		context_vec_ptr->d = d;
1032
10127
		return;
1033
	}
1034
10520
	if (anychange == 0)
1035
460
		anychange = 1;
1036

21040
	switch (diff_format) {
1037
	case D_BRIEF:
1038
		return;
1039
	case D_NORMAL:
1040
	case D_EDIT:
1041
10520
		range(a, b, ",");
1042
29137
		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1043
10520
		if (diff_format == D_NORMAL)
1044
5534
			range(c, d, ",");
1045
10520
		diff_output("\n");
1046
10520
		break;
1047
	case D_REVERSE:
1048
		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1049
		range(a, b, " ");
1050
		diff_output("\n");
1051
		break;
1052
	case D_NREVERSE:
1053
		if (a > b)
1054
			diff_output("a%d %d\n", b, d - c + 1);
1055
		else {
1056
			diff_output("d%d %d\n", a, b - a + 1);
1057
			if (!(c > d))
1058
				/* add changed lines */
1059
				diff_output("a%d %d\n", b, d - c + 1);
1060
		}
1061
		break;
1062
	}
1063
10520
	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1064
5534
		fetch(ixold, a, b, f1, '<', 1, *pflags);
1065

9761
		if (a <= b && c <= d && diff_format == D_NORMAL)
1066
3869
			diff_output("---\n");
1067
	}
1068
10520
	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1069
10520
	if (i != 0 && diff_format == D_EDIT) {
1070
		/*
1071
		 * A non-zero return value for D_EDIT indicates that the
1072
		 * last line printed was a bare dot (".") that has been
1073
		 * escaped as ".." to prevent ed(1) from misinterpreting
1074
		 * it.  We have to add a substitute command to change this
1075
		 * back and restart where we left off.
1076
		 */
1077
9
		diff_output(".\n");
1078
9
		diff_output("%ds/.//\n", a + i - 1);
1079
		b = a + i - 1;
1080
		a = b + 1;
1081
9
		c += i;
1082
9
		goto restart;
1083
	}
1084

15488
	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1085
4815
		diff_output(".\n");
1086
10511
	if (inifdef) {
1087
		diff_output("#endif /* %s */\n", ifdefname);
1088
		inifdef = 0;
1089
	}
1090
31472
}
1091
1092
static int
1093
fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1094
{
1095
	int i, j, c, lastc, col, nc;
1096
1097
	/*
1098
	 * When doing #ifdef's, copy down to current line
1099
	 * if this is the first file, so that stuff makes it to output.
1100
	 */
1101
102758
	if (diff_format == D_IFDEF && oldfile) {
1102
		long curpos = ftell(lb);
1103
		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1104
		nc = f[a > b ? b : a - 1] - curpos;
1105
		for (i = 0; i < nc; i++)
1106
			diff_output("%c", getc(lb));
1107
	}
1108
51379
	if (a > b)
1109
2377
		return (0);
1110
49002
	if (diff_format == D_IFDEF) {
1111
		if (inifdef) {
1112
			diff_output("#else /* %s%s */\n",
1113
			    oldfile == 1 ? "!" : "", ifdefname);
1114
		} else {
1115
			if (oldfile)
1116
				diff_output("#ifndef %s\n", ifdefname);
1117
			else
1118
				diff_output("#ifdef %s\n", ifdefname);
1119
		}
1120
		inifdef = 1 + oldfile;
1121
	}
1122
438094
	for (i = a; i <= b; i++) {
1123
170234
		fseek(lb, f[i - 1], SEEK_SET);
1124
170234
		nc = f[i] - f[i - 1];
1125
170234
		if (diff_format != D_IFDEF && ch != '\0') {
1126
148022
			diff_output("%c", ch);
1127

148022
			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1128
			    || diff_format == D_UNIFIED))
1129
				diff_output("\t");
1130
148022
			else if (diff_format != D_UNIFIED)
1131
90884
				diff_output(" ");
1132
		}
1133
		col = 0;
1134
8725960
		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1135

16771740
			if ((c = getc(lb)) == EOF) {
1136
360
				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1137
180
				    diff_format == D_NREVERSE)
1138
18
					warnx("No newline at end of file");
1139
				else
1140
162
					diff_output("\n\\ No newline at end of "
1141
					    "file\n");
1142
180
				return (0);
1143
			}
1144

4332220
			if (c == '\t' && (flags & D_EXPANDTABS)) {
1145
				do {
1146
					diff_output(" ");
1147
				} while (++col & 7);
1148
			} else {
1149
8385510
				if (diff_format == D_EDIT && j == 1 && c == '\n'
1150
4192755
				    && lastc == '.') {
1151
					/*
1152
					 * Don't print a bare "." line
1153
					 * since that will confuse ed(1).
1154
					 * Print ".." instead and return,
1155
					 * giving the caller an offset
1156
					 * from which to restart.
1157
					 */
1158
9
					diff_output(".\n");
1159
9
					return (i - a + 1);
1160
				}
1161
4192746
				diff_output("%c", c);
1162
4192746
				col++;
1163
			}
1164
		}
1165
	}
1166
48813
	return (0);
1167
51379
}
1168
1169
/*
1170
 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1171
 */
1172
static int
1173
readhash(FILE *f, int flags)
1174
{
1175
	int i, t, space;
1176
	int sum;
1177
1178
	sum = 1;
1179
	space = 0;
1180
639814
	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1181
319907
		if (flags & D_IGNORECASE)
1182
			for (i = 0; (t = getc(f)) != '\n'; i++) {
1183
				if (t == EOF) {
1184
					if (i == 0)
1185
						return (0);
1186
					break;
1187
				}
1188
				sum = sum * 127 + chrtran[t];
1189
			}
1190
		else
1191

37716888
			for (i = 0; (t = getc(f)) != '\n'; i++) {
1192
7289314
				if (t == EOF) {
1193
1862
					if (i == 0)
1194
1574
						return (0);
1195
					break;
1196
				}
1197
7287452
				sum = sum * 127 + t;
1198
			}
1199
	} else {
1200
		for (i = 0;;) {
1201
			switch (t = getc(f)) {
1202
			case '\t':
1203
			case '\r':
1204
			case '\v':
1205
			case '\f':
1206
			case ' ':
1207
				space++;
1208
				continue;
1209
			default:
1210
				if (space && (flags & D_IGNOREBLANKS) == 0) {
1211
					i++;
1212
					space = 0;
1213
				}
1214
				sum = sum * 127 + chrtran[t];
1215
				i++;
1216
				continue;
1217
			case EOF:
1218
				if (i == 0)
1219
					return (0);
1220
				/* FALLTHROUGH */
1221
			case '\n':
1222
				break;
1223
			}
1224
			break;
1225
		}
1226
	}
1227
	/*
1228
	 * There is a remote possibility that we end up with a zero sum.
1229
	 * Zero is used as an EOF marker, so return 1 instead.
1230
	 */
1231
318333
	return (sum == 0 ? 1 : sum);
1232
319907
}
1233
1234
static int
1235
asciifile(FILE *f)
1236
{
1237
2872
	unsigned char buf[BUFSIZ];
1238
	size_t cnt;
1239
1240
1436
	if (f == NULL)
1241
		return (1);
1242
1243
1436
	rewind(f);
1244
1436
	cnt = fread(buf, 1, sizeof(buf), f);
1245
1436
	return (memchr(buf, '\0', cnt) == NULL);
1246
1436
}
1247
1248
#define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1249
1250
static char *
1251
match_function(const long *f, int pos, FILE *fp)
1252
{
1253
	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1254
	size_t nc;
1255
	int last = lastline;
1256
	char *state = NULL;
1257
1258
	lastline = pos;
1259
	while (pos > last) {
1260
		fseek(fp, f[pos - 1], SEEK_SET);
1261
		nc = f[pos] - f[pos - 1];
1262
		if (nc >= sizeof(buf))
1263
			nc = sizeof(buf) - 1;
1264
		nc = fread(buf, 1, nc, fp);
1265
		if (nc > 0) {
1266
			buf[nc] = '\0';
1267
			buf[strcspn(buf, "\n")] = '\0';
1268
			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1269
				if (begins_with(buf, "private:")) {
1270
					if (!state)
1271
						state = " (private)";
1272
				} else if (begins_with(buf, "protected:")) {
1273
					if (!state)
1274
						state = " (protected)";
1275
				} else if (begins_with(buf, "public:")) {
1276
					if (!state)
1277
						state = " (public)";
1278
				} else {
1279
					strlcpy(lastbuf, buf, sizeof lastbuf);
1280
					if (state)
1281
						strlcat(lastbuf, state,
1282
						    sizeof lastbuf);
1283
					lastmatchline = pos;
1284
					return lastbuf;
1285
				}
1286
			}
1287
		}
1288
		pos--;
1289
	}
1290
	return lastmatchline > 0 ? lastbuf : NULL;
1291
}
1292
1293
/* dump accumulated "context" diff changes */
1294
static void
1295
dump_context_vec(FILE *f1, FILE *f2, int flags)
1296
{
1297
2088
	struct context_vec *cvp = context_vec_start;
1298
	int lowa, upb, lowc, upd, do_output;
1299
	int a, b, c, d;
1300
	char ch, *f;
1301
1302
1044
	if (context_vec_start > context_vec_ptr)
1303
		return;
1304
1305
	b = d = 0;		/* gcc */
1306
3042
	lowa = MAXIMUM(1, cvp->a - diff_context);
1307
3132
	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1308
3042
	lowc = MAXIMUM(1, cvp->c - diff_context);
1309
3132
	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1310
1311
1044
	diff_output("***************");
1312
1044
	if ((flags & D_PROTOTYPE)) {
1313
		f = match_function(ixold, lowa-1, f1);
1314
		if (f != NULL)
1315
			diff_output(" %s", f);
1316
	}
1317
1044
	diff_output("\n*** ");
1318
1044
	range(lowa, upb, ",");
1319
1044
	diff_output(" ****\n");
1320
1321
	/*
1322
	 * Output changes to the "old" file.  The first loop suppresses
1323
	 * output if there were no changes to the "old" file (we'll see
1324
	 * the "old" lines as context in the "new" list).
1325
	 */
1326
	do_output = 0;
1327
3078
	for (; cvp <= context_vec_ptr; cvp++)
1328
1251
		if (cvp->a <= cvp->b) {
1329
756
			cvp = context_vec_start;
1330
			do_output++;
1331
756
			break;
1332
		}
1333
1044
	if (do_output) {
1334
10116
		while (cvp <= context_vec_ptr) {
1335
4680
			a = cvp->a;
1336
4680
			b = cvp->b;
1337
4680
			c = cvp->c;
1338
4680
			d = cvp->d;
1339
1340

8550
			if (a <= b && c <= d)
1341
3708
				ch = 'c';
1342
			else
1343
972
				ch = (a <= b) ? 'd' : 'a';
1344
1345
4680
			if (ch == 'a')
1346
810
				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1347
			else {
1348
3870
				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1349
7740
				fetch(ixold, a, b, f1,
1350
3870
				    ch == 'c' ? '!' : '-', 0, flags);
1351
			}
1352
4680
			lowa = b + 1;
1353
4680
			cvp++;
1354
		}
1355
756
		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1356
756
	}
1357
	/* output changes to the "new" file */
1358
1044
	diff_output("--- ");
1359
1044
	range(lowc, upd, ",");
1360
1044
	diff_output(" ----\n");
1361
1362
	do_output = 0;
1363
2196
	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1364
1071
		if (cvp->c <= cvp->d) {
1365
1017
			cvp = context_vec_start;
1366
			do_output++;
1367
1017
			break;
1368
		}
1369
1044
	if (do_output) {
1370
10935
		while (cvp <= context_vec_ptr) {
1371
4959
			a = cvp->a;
1372
4959
			b = cvp->b;
1373
4959
			c = cvp->c;
1374
4959
			d = cvp->d;
1375
1376

8802
			if (a <= b && c <= d)
1377
3708
				ch = 'c';
1378
			else
1379
1251
				ch = (a <= b) ? 'd' : 'a';
1380
1381
4959
			if (ch == 'd')
1382
135
				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1383
			else {
1384
4824
				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1385
9648
				fetch(ixnew, c, d, f2,
1386
4824
				    ch == 'c' ? '!' : '+', 0, flags);
1387
			}
1388
4959
			lowc = d + 1;
1389
4959
			cvp++;
1390
		}
1391
1017
		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1392
1017
	}
1393
1044
	context_vec_ptr = context_vec_start - 1;
1394
2088
}
1395
1396
/* dump accumulated "unified" diff changes */
1397
static void
1398
dump_unified_vec(FILE *f1, FILE *f2, int flags)
1399
{
1400
2286
	struct context_vec *cvp = context_vec_start;
1401
	int lowa, upb, lowc, upd;
1402
	int a, b, c, d;
1403
	char ch, *f;
1404
1405
1143
	if (context_vec_start > context_vec_ptr)
1406
		return;
1407
1408
	b = d = 0;		/* gcc */
1409
3268
	lowa = MAXIMUM(1, cvp->a - diff_context);
1410
3429
	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1411
3268
	lowc = MAXIMUM(1, cvp->c - diff_context);
1412
3429
	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1413
1414
1143
	diff_output("@@ -");
1415
1143
	uni_range(lowa, upb);
1416
1143
	diff_output(" +");
1417
1143
	uni_range(lowc, upd);
1418
1143
	diff_output(" @@");
1419
1143
	if ((flags & D_PROTOTYPE)) {
1420
		f = match_function(ixold, lowa-1, f1);
1421
		if (f != NULL)
1422
			diff_output(" %s", f);
1423
	}
1424
1143
	diff_output("\n");
1425
1426
	/*
1427
	 * Output changes in "unified" diff format--the old and new lines
1428
	 * are printed together.
1429
	 */
1430
12568
	for (; cvp <= context_vec_ptr; cvp++) {
1431
5141
		a = cvp->a;
1432
5141
		b = cvp->b;
1433
5141
		c = cvp->c;
1434
5141
		d = cvp->d;
1435
1436
		/*
1437
		 * c: both new and old changes
1438
		 * d: only changes in the old file
1439
		 * a: only changes in the new file
1440
		 */
1441

9097
		if (a <= b && c <= d)
1442
3794
			ch = 'c';
1443
		else
1444
1347
			ch = (a <= b) ? 'd' : 'a';
1445
1446

10282
		switch (ch) {
1447
		case 'c':
1448
3794
			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1449
3794
			fetch(ixold, a, b, f1, '-', 0, flags);
1450
3794
			fetch(ixnew, c, d, f2, '+', 0, flags);
1451
3794
			break;
1452
		case 'd':
1453
162
			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1454
162
			fetch(ixold, a, b, f1, '-', 0, flags);
1455
162
			break;
1456
		case 'a':
1457
1185
			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1458
1185
			fetch(ixnew, c, d, f2, '+', 0, flags);
1459
1185
			break;
1460
		}
1461
5141
		lowa = b + 1;
1462
5141
		lowc = d + 1;
1463
	}
1464
1143
	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1465
1466
1143
	context_vec_ptr = context_vec_start - 1;
1467
2286
}
1468
1469
static void
1470
print_header(const char *file1, const char *file2)
1471
{
1472
638
	if (label[0] != NULL)
1473
		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1474
		    label[0]);
1475
	else
1476
319
		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1477
319
		    file1, ctime(&stb1.st_mtime));
1478
319
	if (label[1] != NULL)
1479
		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1480
		    label[1]);
1481
	else
1482
319
		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1483
319
		    file2, ctime(&stb2.st_mtime));
1484
319
}