GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: bin/pax/pat_rep.c Lines: 21 353 5.9 %
Date: 2017-11-13 Branches: 16 316 5.1 %

Line Branch Exec Source
1
/*	$OpenBSD: pat_rep.c,v 1.43 2017/09/16 07:42:34 otto Exp $	*/
2
/*	$NetBSD: pat_rep.c,v 1.4 1995/03/21 09:07:33 cgd Exp $	*/
3
4
/*-
5
 * Copyright (c) 1992 Keith Muller.
6
 * Copyright (c) 1992, 1993
7
 *	The Regents of the University of California.  All rights reserved.
8
 *
9
 * This code is derived from software contributed to Berkeley by
10
 * Keith Muller of the University of California, San Diego.
11
 *
12
 * Redistribution and use in source and binary forms, with or without
13
 * modification, are permitted provided that the following conditions
14
 * are met:
15
 * 1. Redistributions of source code must retain the above copyright
16
 *    notice, this list of conditions and the following disclaimer.
17
 * 2. Redistributions in binary form must reproduce the above copyright
18
 *    notice, this list of conditions and the following disclaimer in the
19
 *    documentation and/or other materials provided with the distribution.
20
 * 3. Neither the name of the University nor the names of its contributors
21
 *    may be used to endorse or promote products derived from this software
22
 *    without specific prior written permission.
23
 *
24
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34
 * SUCH DAMAGE.
35
 */
36
37
#include <sys/types.h>
38
#include <sys/stat.h>
39
#include <regex.h>
40
#include <stdio.h>
41
#include <stdlib.h>
42
#include <string.h>
43
44
#include "pax.h"
45
#include "extern.h"
46
47
/*
48
 * data structure for storing user supplied replacement strings (-s)
49
 */
50
typedef struct replace {
51
	char		*nstr;	/* the new string we will substitute with */
52
	regex_t		rcmp;	/* compiled regular expression used to match */
53
	int		flgs;	/* print conversions? global in operation?  */
54
#define	PRNT		0x1
55
#define	GLOB		0x2
56
	struct replace	*fow;	/* pointer to next pattern */
57
} REPLACE;
58
59
/*
60
 * routines to handle pattern matching, name modification (regular expression
61
 * substitution and interactive renames), and destination name modification for
62
 * copy (-rw). Both file name and link names are adjusted as required in these
63
 * routines.
64
 */
65
66
#define MAXSUBEXP	10		/* max subexpressions, DO NOT CHANGE */
67
static PATTERN *pathead = NULL;		/* file pattern match list head */
68
static PATTERN *pattail = NULL;		/* file pattern match list tail */
69
static REPLACE *rephead = NULL;		/* replacement string list head */
70
static REPLACE *reptail = NULL;		/* replacement string list tail */
71
72
static int rep_name(char *, size_t, int *, int);
73
static int tty_rename(ARCHD *);
74
static int fix_path(char *, int *, char *, int);
75
static int fn_match(char *, char *, char **);
76
static char * range_match(char *, int);
77
static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *);
78
79
/*
80
 * rep_add()
81
 *	parses the -s replacement string; compiles the regular expression
82
 *	and stores the compiled value and it's replacement string together in
83
 *	replacement string list. Input to this function is of the form:
84
 *		/old/new/pg
85
 *	The first char in the string specifies the delimiter used by this
86
 *	replacement string. "Old" is a regular expression in "ed" format which
87
 *	is compiled by regcomp() and is applied to filenames. "new" is the
88
 *	substitution string; p and g are options flags for printing and global
89
 *	replacement (over the single filename)
90
 * Return:
91
 *	0 if a proper replacement string and regular expression was added to
92
 *	the list of replacement patterns; -1 otherwise.
93
 */
94
95
int
96
rep_add(char *str)
97
{
98
	char *pt1;
99
	char *pt2;
100
	REPLACE *rep;
101
	int res;
102
	char rebuf[BUFSIZ];
103
104
	/*
105
	 * throw out the bad parameters
106
	 */
107
	if ((str == NULL) || (*str == '\0')) {
108
		paxwarn(1, "Empty replacement string");
109
		return(-1);
110
	}
111
112
	/*
113
	 * first character in the string specifies what the delimiter is for
114
	 * this expression
115
	 */
116
	for (pt1 = str+1; *pt1; pt1++) {
117
		if (*pt1 == '\\') {
118
			pt1++;
119
			continue;
120
		}
121
		if (*pt1 == *str)
122
			break;
123
	}
124
	if (*pt1 == '\0') {
125
		paxwarn(1, "Invalid replacement string %s", str);
126
		return(-1);
127
	}
128
129
	/*
130
	 * allocate space for the node that handles this replacement pattern
131
	 * and split out the regular expression and try to compile it
132
	 */
133
	if ((rep = malloc(sizeof(REPLACE))) == NULL) {
134
		paxwarn(1, "Unable to allocate memory for replacement string");
135
		return(-1);
136
	}
137
138
	*pt1 = '\0';
139
	if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
140
		regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
141
		paxwarn(1, "%s while compiling regular expression %s", rebuf, str);
142
		free(rep);
143
		return(-1);
144
	}
145
146
	/*
147
	 * put the delimiter back in case we need an error message and
148
	 * locate the delimiter at the end of the replacement string
149
	 * we then point the node at the new substitution string
150
	 */
151
	*pt1++ = *str;
152
	for (pt2 = pt1; *pt2; pt2++) {
153
		if (*pt2 == '\\') {
154
			pt2++;
155
			continue;
156
		}
157
		if (*pt2 == *str)
158
			break;
159
	}
160
	if (*pt2 == '\0') {
161
		regfree(&(rep->rcmp));
162
		free(rep);
163
		paxwarn(1, "Invalid replacement string %s", str);
164
		return(-1);
165
	}
166
167
	*pt2 = '\0';
168
	rep->nstr = pt1;
169
	pt1 = pt2++;
170
	rep->flgs = 0;
171
172
	/*
173
	 * set the options if any
174
	 */
175
	while (*pt2 != '\0') {
176
		switch (*pt2) {
177
		case 'g':
178
		case 'G':
179
			rep->flgs  |= GLOB;
180
			break;
181
		case 'p':
182
		case 'P':
183
			rep->flgs  |= PRNT;
184
			break;
185
		default:
186
			regfree(&(rep->rcmp));
187
			free(rep);
188
			*pt1 = *str;
189
			paxwarn(1, "Invalid replacement string option %s", str);
190
			return(-1);
191
		}
192
		++pt2;
193
	}
194
195
	/*
196
	 * all done, link it in at the end
197
	 */
198
	rep->fow = NULL;
199
	if (rephead == NULL) {
200
		reptail = rephead = rep;
201
		return(0);
202
	}
203
	reptail->fow = rep;
204
	reptail = rep;
205
	return(0);
206
}
207
208
/*
209
 * pat_add()
210
 *	add a pattern match to the pattern match list. Pattern matches are used
211
 *	to select which archive members are extracted. (They appear as
212
 *	arguments to pax in the list and read modes). If no patterns are
213
 *	supplied to pax, all members in the archive will be selected (and the
214
 *	pattern match list is empty).
215
 * Return:
216
 *	0 if the pattern was added to the list, -1 otherwise
217
 */
218
219
int
220
pat_add(char *str, char *chdirname)
221
{
222
	PATTERN *pt;
223
224
	/*
225
	 * throw out the junk
226
	 */
227
	if ((str == NULL) || (*str == '\0')) {
228
		paxwarn(1, "Empty pattern string");
229
		return(-1);
230
	}
231
232
	/*
233
	 * allocate space for the pattern and store the pattern. the pattern is
234
	 * part of argv so do not bother to copy it, just point at it. Add the
235
	 * node to the end of the pattern list
236
	 */
237
	if ((pt = malloc(sizeof(PATTERN))) == NULL) {
238
		paxwarn(1, "Unable to allocate memory for pattern string");
239
		return(-1);
240
	}
241
242
	pt->pstr = str;
243
	pt->pend = NULL;
244
	pt->plen = strlen(str);
245
	pt->fow = NULL;
246
	pt->flgs = 0;
247
	pt->chdname = chdirname;
248
249
	if (pathead == NULL) {
250
		pattail = pathead = pt;
251
		return(0);
252
	}
253
	pattail->fow = pt;
254
	pattail = pt;
255
	return(0);
256
}
257
258
/*
259
 * pat_chk()
260
 *	complain if any the user supplied pattern did not result in a match to
261
 *	a selected archive member.
262
 */
263
264
void
265
pat_chk(void)
266
{
267
	PATTERN *pt;
268
	int wban = 0;
269
270
	/*
271
	 * walk down the list checking the flags to make sure MTCH was set,
272
	 * if not complain
273
	 */
274
180
	for (pt = pathead; pt != NULL; pt = pt->fow) {
275
		if (pt->flgs & MTCH)
276
			continue;
277
		if (!wban) {
278
			paxwarn(1, "WARNING! These patterns were not matched:");
279
			++wban;
280
		}
281
		(void)fprintf(stderr, "%s\n", pt->pstr);
282
	}
283
60
}
284
285
/*
286
 * pat_sel()
287
 *	the archive member which matches a pattern was selected. Mark the
288
 *	pattern as having selected an archive member. arcn->pat points at the
289
 *	pattern that was matched. arcn->pat is set in pat_match()
290
 *
291
 *	NOTE: When the -c option is used, we are called when there was no match
292
 *	by pat_match() (that means we did match before the inverted sense of
293
 *	the logic). Now this seems really strange at first, but with -c we
294
 *	need to keep track of those patterns that cause an archive member to NOT
295
 *	be selected (it found an archive member with a specified pattern)
296
 * Return:
297
 *	0 if the pattern pointed at by arcn->pat was tagged as creating a
298
 *	match, -1 otherwise.
299
 */
300
301
int
302
pat_sel(ARCHD *arcn)
303
{
304
	PATTERN *pt;
305
	PATTERN **ppt;
306
	size_t len;
307
308
	/*
309
	 * if no patterns just return
310
	 */
311

684
	if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
312
342
		return(0);
313
314
	/*
315
	 * when we are NOT limited to a single match per pattern mark the
316
	 * pattern and return
317
	 */
318
	if (!nflag) {
319
		pt->flgs |= MTCH;
320
		return(0);
321
	}
322
323
	/*
324
	 * we reach this point only when we allow a single selected match per
325
	 * pattern, if the pattern matches a directory and we do not have -d
326
	 * (dflag) we are done with this pattern. We may also be handed a file
327
	 * in the subtree of a directory. in that case when we are operating
328
	 * with -d, this pattern was already selected and we are done
329
	 */
330
	if (pt->flgs & DIR_MTCH)
331
		return(0);
332
333
	if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
334
		/*
335
		 * ok we matched a directory and we are allowing
336
		 * subtree matches but because of the -n only its children will
337
		 * match. This is tagged as a DIR_MTCH type.
338
		 * WATCH IT, the code assumes that pt->pend points
339
		 * into arcn->name and arcn->name has not been modified.
340
		 * If not we will have a big mess. Yup this is another kludge
341
		 */
342
343
		/*
344
		 * if this was a prefix match, remove trailing part of path
345
		 * so we can copy it. Future matches will be exact prefix match
346
		 */
347
		if (pt->pend != NULL)
348
			*pt->pend = '\0';
349
350
		if ((pt->pstr = strdup(arcn->name)) == NULL) {
351
			paxwarn(1, "Pattern select out of memory");
352
			if (pt->pend != NULL)
353
				*pt->pend = '/';
354
			pt->pend = NULL;
355
			return(-1);
356
		}
357
358
		/*
359
		 * put the trailing / back in the source string
360
		 */
361
		if (pt->pend != NULL) {
362
			*pt->pend = '/';
363
			pt->pend = NULL;
364
		}
365
		pt->plen = strlen(pt->pstr);
366
367
		/*
368
		 * strip off any trailing /, this should really never happen
369
		 */
370
		len = pt->plen - 1;
371
		if (*(pt->pstr + len) == '/') {
372
			*(pt->pstr + len) = '\0';
373
			pt->plen = len;
374
		}
375
		pt->flgs = DIR_MTCH | MTCH;
376
		arcn->pat = pt;
377
		return(0);
378
	}
379
380
	/*
381
	 * we are then done with this pattern, so we delete it from the list
382
	 * because it can never be used for another match.
383
	 * Seems kind of strange to do for a -c, but the pax spec is really
384
	 * vague on the interaction of -c, -n and -d. We assume that when -c
385
	 * and the pattern rejects a member (i.e. it matched it) it is done.
386
	 * In effect we place the order of the flags as having -c last.
387
	 */
388
	pt = pathead;
389
	ppt = &pathead;
390
	while ((pt != NULL) && (pt != arcn->pat)) {
391
		ppt = &(pt->fow);
392
		pt = pt->fow;
393
	}
394
395
	if (pt == NULL) {
396
		/*
397
		 * should never happen....
398
		 */
399
		paxwarn(1, "Pattern list inconsistent");
400
		return(-1);
401
	}
402
	*ppt = pt->fow;
403
	free(pt);
404
	arcn->pat = NULL;
405
	return(0);
406
342
}
407
408
/*
409
 * pat_match()
410
 *	see if this archive member matches any supplied pattern, if a match
411
 *	is found, arcn->pat is set to point at the potential pattern. Later if
412
 *	this archive member is "selected" we process and mark the pattern as
413
 *	one which matched a selected archive member (see pat_sel())
414
 * Return:
415
 *	0 if this archive member should be processed, 1 if it should be
416
 *	skipped and -1 if we are done with all patterns (and pax should quit
417
 *	looking for more members)
418
 */
419
420
int
421
pat_match(ARCHD *arcn)
422
{
423
	PATTERN *pt;
424
425
684
	arcn->pat = NULL;
426
427
	/*
428
	 * if there are no more patterns and we have -n (and not -c) we are
429
	 * done. otherwise with no patterns to match, matches all
430
	 */
431
342
	if (pathead == NULL) {
432
342
		if (nflag && !cflag)
433
			return(-1);
434
342
		return(0);
435
	}
436
437
	/*
438
	 * have to search down the list one at a time looking for a match.
439
	 */
440
	pt = pathead;
441
	while (pt != NULL) {
442
		/*
443
		 * check for a file name match unless we have DIR_MTCH set in
444
		 * this pattern then we want a prefix match
445
		 */
446
		if (pt->flgs & DIR_MTCH) {
447
			/*
448
			 * this pattern was matched before to a directory
449
			 * as we must have -n set for this (but not -d). We can
450
			 * only match CHILDREN of that directory so we must use
451
			 * an exact prefix match (no wildcards).
452
			 */
453
			if ((arcn->name[pt->plen] == '/') &&
454
			    (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
455
				break;
456
		} else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
457
			break;
458
		pt = pt->fow;
459
	}
460
461
	/*
462
	 * return the result, remember that cflag (-c) inverts the sense of a
463
	 * match
464
	 */
465
	if (pt == NULL)
466
		return(cflag ? 0 : 1);
467
468
	/*
469
	 * we had a match, now when we invert the sense (-c) we reject this
470
	 * member. However we have to tag the pattern a being successful, (in a
471
	 * match, not in selecting a archive member) so we call pat_sel() here.
472
	 */
473
	arcn->pat = pt;
474
	if (!cflag)
475
		return(0);
476
477
	if (pat_sel(arcn) < 0)
478
		return(-1);
479
	arcn->pat = NULL;
480
	return(1);
481
342
}
482
483
/*
484
 * fn_match()
485
 * Return:
486
 *	0 if this archive member should be processed, 1 if it should be
487
 *	skipped and -1 if we are done with all patterns (and pax should quit
488
 *	looking for more members)
489
 *	Note: *pend may be changed to show where the prefix ends.
490
 */
491
492
static int
493
fn_match(char *pattern, char *string, char **pend)
494
{
495
	char c;
496
	char test;
497
498
	*pend = NULL;
499
	for (;;) {
500
		switch (c = *pattern++) {
501
		case '\0':
502
			/*
503
			 * Ok we found an exact match
504
			 */
505
			if (*string == '\0')
506
				return(0);
507
508
			/*
509
			 * Check if it is a prefix match
510
			 */
511
			if ((dflag == 1) || (*string != '/'))
512
				return(-1);
513
514
			/*
515
			 * It is a prefix match, remember where the trailing
516
			 * / is located
517
			 */
518
			*pend = string;
519
			return(0);
520
		case '?':
521
			if ((test = *string++) == '\0')
522
				return (-1);
523
			break;
524
		case '*':
525
			c = *pattern;
526
			/*
527
			 * Collapse multiple *'s.
528
			 */
529
			while (c == '*')
530
				c = *++pattern;
531
532
			/*
533
			 * Optimized hack for pattern with a * at the end
534
			 */
535
			if (c == '\0')
536
				return (0);
537
538
			/*
539
			 * General case, use recursion.
540
			 */
541
			while ((test = *string) != '\0') {
542
				if (!fn_match(pattern, string, pend))
543
					return (0);
544
				++string;
545
			}
546
			return (-1);
547
		case '[':
548
			/*
549
			 * range match
550
			 */
551
			if (((test = *string++) == '\0') ||
552
			    ((pattern = range_match(pattern, test)) == NULL))
553
				return (-1);
554
			break;
555
		case '\\':
556
			if ((c = *pattern++) == '\0')
557
				return (-1);
558
			/* FALLTHROUGH */
559
		default:
560
			if (c != *string++)
561
				return (-1);
562
			break;
563
		}
564
	}
565
	/* NOTREACHED */
566
}
567
568
static char *
569
range_match(char *pattern, int test)
570
{
571
	char c;
572
	char c2;
573
	int negate;
574
	int ok = 0;
575
576
	if ((negate = (*pattern == '!')) != 0)
577
		++pattern;
578
579
	while ((c = *pattern++) != ']') {
580
		/*
581
		 * Illegal pattern
582
		 */
583
		if (c == '\0')
584
			return (NULL);
585
586
		if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
587
		    (c2 != ']')) {
588
			if ((c <= test) && (test <= c2))
589
				ok = 1;
590
			pattern += 2;
591
		} else if (c == test)
592
			ok = 1;
593
	}
594
	return (ok == negate ? NULL : pattern);
595
}
596
597
/*
598
 * has_dotdot()
599
 *	Returns true iff the supplied path contains a ".." component.
600
 */
601
602
int
603
has_dotdot(const char *path)
604
{
605
	const char *p = path;
606
607
	while ((p = strstr(p, "..")) != NULL) {
608
		if ((p == path || p[-1] == '/') &&
609
		    (p[2] == '/' || p[2] == '\0'))
610
			return (1);
611
		p += 2;
612
	}
613
	return (0);
614
}
615
616
/*
617
 * mod_name()
618
 *	modify a selected file name. first attempt to apply replacement string
619
 *	expressions, then apply interactive file rename. We apply replacement
620
 *	string expressions to both filenames and file links (if we didn't the
621
 *	links would point to the wrong place, and we could never be able to
622
 *	move an archive that has a file link in it). When we rename files
623
 *	interactively, we store that mapping (old name to user input name) so
624
 *	if we spot any file links to the old file name in the future, we will
625
 *	know exactly how to fix the file link.
626
 * Return:
627
 *	0 continue to  process file, 1 skip this file, -1 pax is finished
628
 */
629
630
int
631
mod_name(ARCHD *arcn)
632
{
633
	int res = 0;
634
635
	/*
636
	 * Strip off leading '/' if appropriate.
637
	 * Currently, this option is only set for the tar format.
638
	 */
639

1749
	while (rmleadslash && arcn->name[0] == '/') {
640
		if (arcn->name[1] == '\0') {
641
			arcn->name[0] = '.';
642
		} else {
643
			(void)memmove(arcn->name, &arcn->name[1],
644
			    strlen(arcn->name));
645
			arcn->nlen--;
646
		}
647
		if (rmleadslash < 2) {
648
			rmleadslash = 2;
649
			paxwarn(0, "Removing leading / from absolute path names in the archive");
650
		}
651
	}
652

1294
	while (rmleadslash && arcn->ln_name[0] == '/' &&
653
	    PAX_IS_HARDLINK(arcn->type)) {
654
		if (arcn->ln_name[1] == '\0') {
655
			arcn->ln_name[0] = '.';
656
		} else {
657
			(void)memmove(arcn->ln_name, &arcn->ln_name[1],
658
			    strlen(arcn->ln_name));
659
			arcn->ln_nlen--;
660
		}
661
		if (rmleadslash < 2) {
662
			rmleadslash = 2;
663
			paxwarn(0, "Removing leading / from absolute path names in the archive");
664
		}
665
	}
666
455
	if (rmleadslash) {
667
		const char *last = NULL;
668
384
		const char *p = arcn->name;
669
670
768
		while ((p = strstr(p, "..")) != NULL) {
671
			if ((p == arcn->name || p[-1] == '/') &&
672
			    (p[2] == '/' || p[2] == '\0'))
673
				last = p + 2;
674
			p += 2;
675
		}
676
384
		if (last != NULL) {
677
			last++;
678
			paxwarn(1, "Removing leading \"%.*s\"",
679
			    (int)(last - arcn->name), arcn->name);
680
			arcn->nlen = strlen(last);
681
			if (arcn->nlen > 0)
682
				memmove(arcn->name, last, arcn->nlen + 1);
683
			else {
684
				arcn->name[0] = '.';
685
				arcn->name[1] = '\0';
686
				arcn->nlen = 1;
687
			}
688
		}
689
384
	}
690
691
	/*
692
	 * IMPORTANT: We have a problem. what do we do with symlinks?
693
	 * Modifying a hard link name makes sense, as we know the file it
694
	 * points at should have been seen already in the archive (and if it
695
	 * wasn't seen because of a read error or a bad archive, we lose
696
	 * anyway). But there are no such requirements for symlinks. On one
697
	 * hand the symlink that refers to a file in the archive will have to
698
	 * be modified to so it will still work at its new location in the
699
	 * file system. On the other hand a symlink that points elsewhere (and
700
	 * should continue to do so) should not be modified. There is clearly
701
	 * no perfect solution here. So we handle them like hardlinks. Clearly
702
	 * a replacement made by the interactive rename mapping is very likely
703
	 * to be correct since it applies to a single file and is an exact
704
	 * match. The regular expression replacements are a little harder to
705
	 * justify though. We claim that the symlink name is only likely
706
	 * to be replaced when it points within the file tree being moved and
707
	 * in that case it should be modified. what we really need to do is to
708
	 * call an oracle here. :)
709
	 */
710
455
	if (rephead != NULL) {
711
		/*
712
		 * we have replacement strings, modify the name and the link
713
		 * name if any.
714
		 */
715
		if ((res = rep_name(arcn->name, sizeof(arcn->name), &(arcn->nlen), 1)) != 0)
716
			return(res);
717
718
		if (PAX_IS_LINK(arcn->type)) {
719
			if ((res = rep_name(arcn->ln_name,
720
			    sizeof(arcn->ln_name), &(arcn->ln_nlen), 0)) != 0)
721
				return(res);
722
		}
723
	}
724
725
455
	if (iflag) {
726
		/*
727
		 * perform interactive file rename, then map the link if any
728
		 */
729
		if ((res = tty_rename(arcn)) != 0)
730
			return(res);
731
		if (PAX_IS_LINK(arcn->type))
732
			sub_name(arcn->ln_name, &(arcn->ln_nlen),
733
			    sizeof(arcn->ln_name));
734
	}
735
455
	return(res);
736
455
}
737
738
/*
739
 * tty_rename()
740
 *	Prompt the user for a replacement file name. A "." keeps the old name,
741
 *	a empty line skips the file, and an EOF on reading the tty, will cause
742
 *	pax to stop processing and exit. Otherwise the file name input, replaces
743
 *	the old one.
744
 * Return:
745
 *	0 process this file, 1 skip this file, -1 we need to exit pax
746
 */
747
748
static int
749
tty_rename(ARCHD *arcn)
750
{
751
	char tmpname[PAXPATHLEN+2];
752
	int res;
753
754
	/*
755
	 * prompt user for the replacement name for a file, keep trying until
756
	 * we get some reasonable input. Archives may have more than one file
757
	 * on them with the same name (from updates etc). We print verbose info
758
	 * on the file so the user knows what is up.
759
	 */
760
	tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
761
762
	for (;;) {
763
		ls_tty(arcn);
764
		tty_prnt("Input new name, or a \".\" to keep the old name, ");
765
		tty_prnt("or a \"return\" to skip this file.\n");
766
		tty_prnt("Input > ");
767
		if (tty_read(tmpname, sizeof(tmpname)) < 0)
768
			return(-1);
769
		if (strcmp(tmpname, "..") == 0) {
770
			tty_prnt("Try again, illegal file name: ..\n");
771
			continue;
772
		}
773
		if (strlen(tmpname) > PAXPATHLEN) {
774
			tty_prnt("Try again, file name too long\n");
775
			continue;
776
		}
777
		break;
778
	}
779
780
	/*
781
	 * empty file name, skips this file. a "." leaves it alone
782
	 */
783
	if (tmpname[0] == '\0') {
784
		tty_prnt("Skipping file.\n");
785
		return(1);
786
	}
787
	if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
788
		tty_prnt("Processing continues, name unchanged.\n");
789
		return(0);
790
	}
791
792
	/*
793
	 * ok the name changed. We may run into links that point at this
794
	 * file later. we have to remember where the user sent the file
795
	 * in order to repair any links.
796
	 */
797
	tty_prnt("Processing continues, name changed to: %s\n", tmpname);
798
	res = add_name(arcn->name, arcn->nlen, tmpname);
799
	arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name));
800
	if ((size_t)arcn->nlen >= sizeof(arcn->name))
801
		arcn->nlen = sizeof(arcn->name) - 1; /* XXX truncate? */
802
	if (res < 0)
803
		return(-1);
804
	return(0);
805
}
806
807
/*
808
 * set_dest()
809
 *	fix up the file name and the link name (if any) so this file will land
810
 *	in the destination directory (used during copy() -rw).
811
 * Return:
812
 *	0 if ok, -1 if failure (name too long)
813
 */
814
815
int
816
set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
817
{
818
	if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
819
		return(-1);
820
821
	/*
822
	 * It is really hard to deal with symlinks here, we cannot be sure
823
	 * if the name they point was moved (or will be moved). It is best to
824
	 * leave them alone.
825
	 */
826
	if (!PAX_IS_HARDLINK(arcn->type))
827
		return(0);
828
829
	if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
830
		return(-1);
831
	return(0);
832
}
833
834
/*
835
 * fix_path
836
 *	concatenate dir_name and or_name and store the result in or_name (if
837
 *	it fits). This is one ugly function.
838
 * Return:
839
 *	0 if ok, -1 if the final name is too long
840
 */
841
842
static int
843
fix_path(char *or_name, int *or_len, char *dir_name, int dir_len)
844
{
845
	char *src;
846
	char *dest;
847
	char *start;
848
	int len;
849
850
	/*
851
	 * we shift the or_name to the right enough to tack in the dir_name
852
	 * at the front. We make sure we have enough space for it all before
853
	 * we start. since dest always ends in a slash, we skip of or_name
854
	 * if it also starts with one.
855
	 */
856
	start = or_name;
857
	src = start + *or_len;
858
	dest = src + dir_len;
859
	if (*start == '/') {
860
		++start;
861
		--dest;
862
	}
863
	if ((len = dest - or_name) > PAXPATHLEN) {
864
		paxwarn(1, "File name %s/%s, too long", dir_name, start);
865
		return(-1);
866
	}
867
	*or_len = len;
868
869
	/*
870
	 * enough space, shift
871
	 */
872
	while (src >= start)
873
		*dest-- = *src--;
874
	src = dir_name + dir_len - 1;
875
876
	/*
877
	 * splice in the destination directory name
878
	 */
879
	while (src >= dir_name)
880
		*dest-- = *src--;
881
882
	*(or_name + len) = '\0';
883
	return(0);
884
}
885
886
/*
887
 * rep_name()
888
 *	walk down the list of replacement strings applying each one in order.
889
 *	when we find one with a successful substitution, we modify the name
890
 *	as specified. if required, we print the results. if the resulting name
891
 *	is empty, we will skip this archive member. We use the regexp(3)
892
 *	routines (regexp() ought to win a prize as having the most cryptic
893
 *	library function manual page).
894
 *	--Parameters--
895
 *	name is the file name we are going to apply the regular expressions to
896
 *	(and may be modified)
897
 *	nsize is the size of the name buffer.
898
 *	nlen is the length of this name (and is modified to hold the length of
899
 *	the final string).
900
 *	prnt is a flag that says whether to print the final result.
901
 * Return:
902
 *	0 if substitution was successful, 1 if we are to skip the file (the name
903
 *	ended up empty)
904
 */
905
906
static int
907
rep_name(char *name, size_t nsize, int *nlen, int prnt)
908
{
909
	REPLACE *pt;
910
	char *inpt;
911
	char *outpt;
912
	char *endpt;
913
	char *rpt;
914
	int found = 0;
915
	int res;
916
	regmatch_t pm[MAXSUBEXP];
917
	char nname[PAXPATHLEN+1];	/* final result of all replacements */
918
	char buf1[PAXPATHLEN+1];	/* where we work on the name */
919
920
	/*
921
	 * copy the name into buf1, where we will work on it. We need to keep
922
	 * the orig string around so we can print out the result of the final
923
	 * replacement. We build up the final result in nname. inpt points at
924
	 * the string we apply the regular expression to. prnt is used to
925
	 * suppress printing when we handle replacements on the link field
926
	 * (the user already saw that substitution go by)
927
	 */
928
	pt = rephead;
929
	(void)strlcpy(buf1, name, sizeof(buf1));
930
	inpt = buf1;
931
	outpt = nname;
932
	endpt = outpt + PAXPATHLEN;
933
934
	/*
935
	 * try each replacement string in order
936
	 */
937
	while (pt != NULL) {
938
		do {
939
			char *oinpt = inpt;
940
			/*
941
			 * check for a successful substitution, if not go to
942
			 * the next pattern, or cleanup if we were global
943
			 */
944
			if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
945
				break;
946
947
			/*
948
			 * ok we found one. We have three parts, the prefix
949
			 * which did not match, the section that did and the
950
			 * tail (that also did not match). Copy the prefix to
951
			 * the final output buffer (watching to make sure we
952
			 * do not create a string too long).
953
			 */
954
			found = 1;
955
			rpt = inpt + pm[0].rm_so;
956
957
			while ((inpt < rpt) && (outpt < endpt))
958
				*outpt++ = *inpt++;
959
			if (outpt == endpt)
960
				break;
961
962
			/*
963
			 * for the second part (which matched the regular
964
			 * expression) apply the substitution using the
965
			 * replacement string and place it the prefix in the
966
			 * final output. If we have problems, skip it.
967
			 */
968
			if ((res = resub(&(pt->rcmp),pm,pt->nstr,oinpt,outpt,endpt))
969
			    < 0) {
970
				if (prnt)
971
					paxwarn(1, "Replacement name error %s",
972
					    name);
973
				return(1);
974
			}
975
			outpt += res;
976
977
			/*
978
			 * we set up to look again starting at the first
979
			 * character in the tail (of the input string right
980
			 * after the last character matched by the regular
981
			 * expression (inpt always points at the first char in
982
			 * the string to process). If we are not doing a global
983
			 * substitution, we will use inpt to copy the tail to
984
			 * the final result. Make sure we do not overrun the
985
			 * output buffer
986
			 */
987
			inpt += pm[0].rm_eo - pm[0].rm_so;
988
989
			if ((outpt == endpt) || (*inpt == '\0'))
990
				break;
991
992
			/*
993
			 * if the user wants global we keep trying to
994
			 * substitute until it fails, then we are done.
995
			 */
996
		} while (pt->flgs & GLOB);
997
998
		if (found)
999
			break;
1000
1001
		/*
1002
		 * a successful substitution did NOT occur, try the next one
1003
		 */
1004
		pt = pt->fow;
1005
	}
1006
1007
	if (found) {
1008
		/*
1009
		 * we had a substitution, copy the last tail piece (if there is
1010
		 * room) to the final result
1011
		 */
1012
		while ((outpt < endpt) && (*inpt != '\0'))
1013
			*outpt++ = *inpt++;
1014
1015
		*outpt = '\0';
1016
		if ((outpt == endpt) && (*inpt != '\0')) {
1017
			if (prnt)
1018
				paxwarn(1,"Replacement name too long %s >> %s",
1019
				    name, nname);
1020
			return(1);
1021
		}
1022
1023
		/*
1024
		 * inform the user of the result if wanted
1025
		 */
1026
		if (prnt && (pt->flgs & PRNT)) {
1027
			if (*nname == '\0')
1028
				(void)fprintf(stderr,"%s >> <empty string>\n",
1029
				    name);
1030
			else
1031
				(void)fprintf(stderr,"%s >> %s\n", name, nname);
1032
		}
1033
1034
		/*
1035
		 * if empty inform the caller this file is to be skipped
1036
		 * otherwise copy the new name over the orig name and return
1037
		 */
1038
		if (*nname == '\0')
1039
			return(1);
1040
		*nlen = strlcpy(name, nname, nsize);
1041
	}
1042
	return(0);
1043
}
1044
1045
/*
1046
 * resub()
1047
 *	apply the replacement to the matched expression. expand out the old
1048
 *	style ed(1) subexpression expansion.
1049
 * Return:
1050
 *	-1 if error, or the number of characters added to the destination.
1051
 */
1052
1053
static int
1054
resub(regex_t *rp, regmatch_t *pm, char *src, char *inpt, char *dest,
1055
	char *destend)
1056
{
1057
	char *spt;
1058
	char *dpt;
1059
	char c;
1060
	regmatch_t *pmpt;
1061
	int len;
1062
	int subexcnt;
1063
1064
	spt =  src;
1065
	dpt = dest;
1066
	subexcnt = rp->re_nsub;
1067
	while ((dpt < destend) && ((c = *spt++) != '\0')) {
1068
		/*
1069
		 * see if we just have an ordinary replacement character
1070
		 * or we refer to a subexpression.
1071
		 */
1072
		if (c == '&') {
1073
			pmpt = pm;
1074
		} else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) {
1075
			/*
1076
			 * make sure there is a subexpression as specified
1077
			 */
1078
			if ((len = *spt++ - '0') > subexcnt)
1079
				return(-1);
1080
			pmpt = pm + len;
1081
		} else {
1082
			/*
1083
			 * Ordinary character, just copy it
1084
			 */
1085
			if ((c == '\\') && (*spt != '\0'))
1086
				c = *spt++;
1087
			*dpt++ = c;
1088
			continue;
1089
		}
1090
1091
		/*
1092
		 * continue if the subexpression is bogus
1093
		 */
1094
		if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1095
		    ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1096
			continue;
1097
1098
		/*
1099
		 * copy the subexpression to the destination.
1100
		 * fail if we run out of space or the match string is damaged
1101
		 */
1102
		if (len > (destend - dpt))
1103
			return (-1);
1104
		strncpy(dpt, inpt + pmpt->rm_so, len);
1105
		dpt += len;
1106
	}
1107
	return(dpt - dest);
1108
}