GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: bin/pax/tables.c Lines: 103 511 20.2 %
Date: 2016-12-06 Branches: 51 380 13.4 %

Line Branch Exec Source
1
/*	$OpenBSD: tables.c,v 1.47 2015/03/19 05:14:24 guenther Exp $	*/
2
/*	$NetBSD: tables.c,v 1.4 1995/03/21 09:07:45 cgd Exp $	*/
3
4
/*-
5
 * Copyright (c) 1992 Keith Muller.
6
 * Copyright (c) 1992, 1993
7
 *	The Regents of the University of California.  All rights reserved.
8
 *
9
 * This code is derived from software contributed to Berkeley by
10
 * Keith Muller of the University of California, San Diego.
11
 *
12
 * Redistribution and use in source and binary forms, with or without
13
 * modification, are permitted provided that the following conditions
14
 * are met:
15
 * 1. Redistributions of source code must retain the above copyright
16
 *    notice, this list of conditions and the following disclaimer.
17
 * 2. Redistributions in binary form must reproduce the above copyright
18
 *    notice, this list of conditions and the following disclaimer in the
19
 *    documentation and/or other materials provided with the distribution.
20
 * 3. Neither the name of the University nor the names of its contributors
21
 *    may be used to endorse or promote products derived from this software
22
 *    without specific prior written permission.
23
 *
24
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34
 * SUCH DAMAGE.
35
 */
36
37
#include <sys/types.h>
38
#include <sys/time.h>
39
#include <sys/stat.h>
40
#include <fcntl.h>
41
#include <limits.h>
42
#include <signal.h>
43
#include <stdio.h>
44
#include <string.h>
45
#include <unistd.h>
46
#include <errno.h>
47
#include <stdlib.h>
48
#include "pax.h"
49
#include "tables.h"
50
#include "extern.h"
51
52
/*
53
 * Routines for controlling the contents of all the different databases pax
54
 * keeps. Tables are dynamically created only when they are needed. The
55
 * goal was speed and the ability to work with HUGE archives. The databases
56
 * were kept simple, but do have complex rules for when the contents change.
57
 * As of this writing, the posix library functions were more complex than
58
 * needed for this application (pax databases have very short lifetimes and
59
 * do not survive after pax is finished). Pax is required to handle very
60
 * large archives. These database routines carefully combine memory usage and
61
 * temporary file storage in ways which will not significantly impact runtime
62
 * performance while allowing the largest possible archives to be handled.
63
 * Trying to force the fit to the posix database routines was not considered
64
 * time well spent.
65
 */
66
67
static HRDLNK **ltab = NULL;	/* hard link table for detecting hard links */
68
static FTM **ftab = NULL;	/* file time table for updating arch */
69
static NAMT **ntab = NULL;	/* interactive rename storage table */
70
static DEVT **dtab = NULL;	/* device/inode mapping tables */
71
static ATDIR **atab = NULL;	/* file tree directory time reset table */
72
static DIRDATA *dirp = NULL;	/* storage for setting created dir time/mode */
73
static size_t dirsize;		/* size of dirp table */
74
static size_t dircnt = 0;	/* entries in dir time/mode storage */
75
static int ffd = -1;		/* tmp file for file time table name storage */
76
77
/*
78
 * hard link table routines
79
 *
80
 * The hard link table tries to detect hard links to files using the device and
81
 * inode values. We do this when writing an archive, so we can tell the format
82
 * write routine that this file is a hard link to another file. The format
83
 * write routine then can store this file in whatever way it wants (as a hard
84
 * link if the format supports that like tar, or ignore this info like cpio).
85
 * (Actually a field in the format driver table tells us if the format wants
86
 * hard link info. if not, we do not waste time looking for them). We also use
87
 * the same table when reading an archive. In that situation, this table is
88
 * used by the format read routine to detect hard links from stored dev and
89
 * inode numbers (like cpio). This will allow pax to create a link when one
90
 * can be detected by the archive format.
91
 */
92
93
/*
94
 * lnk_start
95
 *	Creates the hard link table.
96
 * Return:
97
 *	0 if created, -1 if failure
98
 */
99
100
int
101
lnk_start(void)
102
10
{
103
10
	if (ltab != NULL)
104
		return(0);
105
10
	if ((ltab = calloc(L_TAB_SZ, sizeof(HRDLNK *))) == NULL) {
106
		paxwarn(1, "Cannot allocate memory for hard link table");
107
		return(-1);
108
	}
109
10
	return(0);
110
}
111
112
/*
113
 * chk_lnk()
114
 *	Looks up entry in hard link hash table. If found, it copies the name
115
 *	of the file it is linked to (we already saw that file) into ln_name.
116
 *	lnkcnt is decremented and if goes to 1 the node is deleted from the
117
 *	database. (We have seen all the links to this file). If not found,
118
 *	we add the file to the database if it has the potential for having
119
 *	hard links to other files we may process (it has a link count > 1)
120
 * Return:
121
 *	if found returns 1; if not found returns 0; -1 on error
122
 */
123
124
int
125
chk_lnk(ARCHD *arcn)
126
18
{
127
	HRDLNK *pt;
128
	HRDLNK **ppt;
129
	u_int indx;
130
131
18
	if (ltab == NULL)
132
		return(-1);
133
	/*
134
	 * ignore those nodes that cannot have hard links
135
	 */
136

18
	if ((arcn->type == PAX_DIR) || (arcn->sb.st_nlink <= 1))
137
10
		return(0);
138
139
	/*
140
	 * hash inode number and look for this file
141
	 */
142
8
	indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ;
143
8
	if ((pt = ltab[indx]) != NULL) {
144
		/*
145
		 * its hash chain in not empty, walk down looking for it
146
		 */
147
4
		ppt = &(ltab[indx]);
148
8
		while (pt != NULL) {
149

4
			if ((pt->ino == arcn->sb.st_ino) &&
150
			    (pt->dev == arcn->sb.st_dev))
151
4
				break;
152
			ppt = &(pt->fow);
153
			pt = pt->fow;
154
		}
155
156
4
		if (pt != NULL) {
157
			/*
158
			 * found a link. set the node type and copy in the
159
			 * name of the file it is to link to. we need to
160
			 * handle hardlinks to regular files differently than
161
			 * other links.
162
			 */
163
4
			arcn->ln_nlen = strlcpy(arcn->ln_name, pt->name,
164
				sizeof(arcn->ln_name));
165
			/* XXX truncate? */
166
4
			if (arcn->nlen >= sizeof(arcn->name))
167
				arcn->nlen = sizeof(arcn->name) - 1;
168
4
			if (arcn->type == PAX_REG)
169
4
				arcn->type = PAX_HRG;
170
			else
171
				arcn->type = PAX_HLK;
172
173
			/*
174
			 * if we have found all the links to this file, remove
175
			 * it from the database
176
			 */
177
4
			if (--pt->nlink <= 1) {
178
4
				*ppt = pt->fow;
179
4
				free(pt->name);
180
4
				free(pt);
181
			}
182
4
			return(1);
183
		}
184
	}
185
186
	/*
187
	 * we never saw this file before. It has links so we add it to the
188
	 * front of this hash chain
189
	 */
190
4
	if ((pt = malloc(sizeof(HRDLNK))) != NULL) {
191
4
		if ((pt->name = strdup(arcn->name)) != NULL) {
192
4
			pt->dev = arcn->sb.st_dev;
193
4
			pt->ino = arcn->sb.st_ino;
194
4
			pt->nlink = arcn->sb.st_nlink;
195
4
			pt->fow = ltab[indx];
196
4
			ltab[indx] = pt;
197
4
			return(0);
198
		}
199
		free(pt);
200
	}
201
202
	paxwarn(1, "Hard link table out of memory");
203
	return(-1);
204
}
205
206
/*
207
 * purg_lnk
208
 *	remove reference for a file that we may have added to the data base as
209
 *	a potential source for hard links. We ended up not using the file, so
210
 *	we do not want to accidently point another file at it later on.
211
 */
212
213
void
214
purg_lnk(ARCHD *arcn)
215
{
216
	HRDLNK *pt;
217
	HRDLNK **ppt;
218
	u_int indx;
219
220
	if (ltab == NULL)
221
		return;
222
	/*
223
	 * do not bother to look if it could not be in the database
224
	 */
225
	if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR) ||
226
	    PAX_IS_HARDLINK(arcn->type))
227
		return;
228
229
	/*
230
	 * find the hash chain for this inode value, if empty return
231
	 */
232
	indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ;
233
	if ((pt = ltab[indx]) == NULL)
234
		return;
235
236
	/*
237
	 * walk down the list looking for the inode/dev pair, unlink and
238
	 * free if found
239
	 */
240
	ppt = &(ltab[indx]);
241
	while (pt != NULL) {
242
		if ((pt->ino == arcn->sb.st_ino) &&
243
		    (pt->dev == arcn->sb.st_dev))
244
			break;
245
		ppt = &(pt->fow);
246
		pt = pt->fow;
247
	}
248
	if (pt == NULL)
249
		return;
250
251
	/*
252
	 * remove and free it
253
	 */
254
	*ppt = pt->fow;
255
	free(pt->name);
256
	free(pt);
257
}
258
259
/*
260
 * lnk_end()
261
 *	pull apart a existing link table so we can reuse it. We do this between
262
 *	read and write phases of append with update. (The format may have
263
 *	used the link table, and we need to start with a fresh table for the
264
 *	write phase
265
 */
266
267
void
268
lnk_end(void)
269
6
{
270
	int i;
271
	HRDLNK *pt;
272
	HRDLNK *ppt;
273
274
6
	if (ltab == NULL)
275
6
		return;
276
277
	for (i = 0; i < L_TAB_SZ; ++i) {
278
		if (ltab[i] == NULL)
279
			continue;
280
		pt = ltab[i];
281
		ltab[i] = NULL;
282
283
		/*
284
		 * free up each entry on this chain
285
		 */
286
		while (pt != NULL) {
287
			ppt = pt;
288
			pt = ppt->fow;
289
			free(ppt->name);
290
			free(ppt);
291
		}
292
	}
293
}
294
295
/*
296
 * modification time table routines
297
 *
298
 * The modification time table keeps track of last modification times for all
299
 * files stored in an archive during a write phase when -u is set. We only
300
 * add a file to the archive if it is newer than a file with the same name
301
 * already stored on the archive (if there is no other file with the same
302
 * name on the archive it is added). This applies to writes and appends.
303
 * An append with an -u must read the archive and store the modification time
304
 * for every file on that archive before starting the write phase. It is clear
305
 * that this is one HUGE database. To save memory space, the actual file names
306
 * are stored in a scratch file and indexed by an in-memory hash table. The
307
 * hash table is indexed by hashing the file path. The nodes in the table store
308
 * the length of the filename and the lseek offset within the scratch file
309
 * where the actual name is stored. Since there are never any deletions from
310
 * this table, fragmentation of the scratch file is never a issue. Lookups
311
 * seem to not exhibit any locality at all (files in the database are rarely
312
 * looked up more than once...), so caching is just a waste of memory. The
313
 * only limitation is the amount of scratch file space available to store the
314
 * path names.
315
 */
316
317
/*
318
 * ftime_start()
319
 *	create the file time hash table and open for read/write the scratch
320
 *	file. (after created it is unlinked, so when we exit we leave
321
 *	no witnesses).
322
 * Return:
323
 *	0 if the table and file was created ok, -1 otherwise
324
 */
325
326
int
327
ftime_start(void)
328
{
329
330
	if (ftab != NULL)
331
		return(0);
332
	if ((ftab = calloc(F_TAB_SZ, sizeof(FTM *))) == NULL) {
333
		paxwarn(1, "Cannot allocate memory for file time table");
334
		return(-1);
335
	}
336
337
	/*
338
	 * get random name and create temporary scratch file, unlink name
339
	 * so it will get removed on exit
340
	 */
341
	memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE));
342
	if ((ffd = mkstemp(tempfile)) < 0) {
343
		syswarn(1, errno, "Unable to create temporary file: %s",
344
		    tempfile);
345
		return(-1);
346
	}
347
	(void)unlink(tempfile);
348
349
	return(0);
350
}
351
352
/*
353
 * chk_ftime()
354
 *	looks up entry in file time hash table. If not found, the file is
355
 *	added to the hash table and the file named stored in the scratch file.
356
 *	If a file with the same name is found, the file times are compared and
357
 *	the most recent file time is retained. If the new file was younger (or
358
 *	was not in the database) the new file is selected for storage.
359
 * Return:
360
 *	0 if file should be added to the archive, 1 if it should be skipped,
361
 *	-1 on error
362
 */
363
364
int
365
chk_ftime(ARCHD *arcn)
366
{
367
	FTM *pt;
368
	int namelen;
369
	u_int indx;
370
	char ckname[PAXPATHLEN+1];
371
372
	/*
373
	 * no info, go ahead and add to archive
374
	 */
375
	if (ftab == NULL)
376
		return(0);
377
378
	/*
379
	 * hash the pathname and look up in table
380
	 */
381
	namelen = arcn->nlen;
382
	indx = st_hash(arcn->name, namelen, F_TAB_SZ);
383
	if ((pt = ftab[indx]) != NULL) {
384
		/*
385
		 * the hash chain is not empty, walk down looking for match
386
		 * only read up the path names if the lengths match, speeds
387
		 * up the search a lot
388
		 */
389
		while (pt != NULL) {
390
			if (pt->namelen == namelen) {
391
				/*
392
				 * potential match, have to read the name
393
				 * from the scratch file.
394
				 */
395
				if (lseek(ffd,pt->seek,SEEK_SET) != pt->seek) {
396
					syswarn(1, errno,
397
					    "Failed ftime table seek");
398
					return(-1);
399
				}
400
				if (read(ffd, ckname, namelen) != namelen) {
401
					syswarn(1, errno,
402
					    "Failed ftime table read");
403
					return(-1);
404
				}
405
406
				/*
407
				 * if the names match, we are done
408
				 */
409
				if (!strncmp(ckname, arcn->name, namelen))
410
					break;
411
			}
412
413
			/*
414
			 * try the next entry on the chain
415
			 */
416
			pt = pt->fow;
417
		}
418
419
		if (pt != NULL) {
420
			/*
421
			 * found the file, compare the times, save the newer
422
			 */
423
			if (timespeccmp(&arcn->sb.st_mtim, &pt->mtim, >)) {
424
				/*
425
				 * file is newer
426
				 */
427
				pt->mtim = arcn->sb.st_mtim;
428
				return(0);
429
			}
430
			/*
431
			 * file is older
432
			 */
433
			return(1);
434
		}
435
	}
436
437
	/*
438
	 * not in table, add it
439
	 */
440
	if ((pt = malloc(sizeof(FTM))) != NULL) {
441
		/*
442
		 * add the name at the end of the scratch file, saving the
443
		 * offset. add the file to the head of the hash chain
444
		 */
445
		if ((pt->seek = lseek(ffd, (off_t)0, SEEK_END)) >= 0) {
446
			if (write(ffd, arcn->name, namelen) == namelen) {
447
				pt->mtim = arcn->sb.st_mtim;
448
				pt->namelen = namelen;
449
				pt->fow = ftab[indx];
450
				ftab[indx] = pt;
451
				return(0);
452
			}
453
			syswarn(1, errno, "Failed write to file time table");
454
		} else
455
			syswarn(1, errno, "Failed seek on file time table");
456
	} else
457
		paxwarn(1, "File time table ran out of memory");
458
459
	if (pt != NULL)
460
		free(pt);
461
	return(-1);
462
}
463
464
/*
465
 * escaping (absolute or w/"..") symlink table routines
466
 *
467
 * By default, an archive shouldn't be able extract to outside of the
468
 * current directory.  What should we do if the archive contains a symlink
469
 * whose value is either absolute or contains ".." components?  What we'll
470
 * do is initially create the path as an empty file (to block attempts to
471
 * reference _through_ it) and instead record its path and desired
472
 * final value and mode.  Then once all the other archive
473
 * members are created (but before the pass to set timestamps on
474
 * directories) we'll process those records, replacing the placeholder with
475
 * the correct symlink and setting them to the correct mode, owner, group,
476
 * and timestamps.
477
 *
478
 * Note: we also need to handle hardlinks to symlinks (barf) as well as
479
 * hardlinks whose target is replaced by a later entry in the archive (barf^2).
480
 *
481
 * So we track things by dev+ino of the placeholder file, associating with
482
 * that the value and mode of the final symlink and a list of paths that
483
 * should all be hardlinks of that.  We'll 'store' the symlink's desired
484
 * timestamps, owner, and group by setting them on the placeholder file.
485
 *
486
 * The operations are:
487
 * a) create an escaping symlink: create the placeholder file and add an entry
488
 *    for the new link
489
 * b) create a hardlink: do the link.  If the target turns out to be a
490
 *    zero-length file whose dev+ino are in the symlink table, then add this
491
 *    path to the list of names for that link
492
 * c) perform deferred processing: for each entry, check each associated path:
493
 *    if it's a zero-length file with the correct dev+ino then recreate it as
494
 *    the specified symlink or hardlink to the first such
495
 */
496
497
struct slpath {
498
	char	*sp_path;
499
	struct	slpath *sp_next;
500
};
501
struct slinode {
502
	ino_t	sli_ino;
503
	char	*sli_value;
504
	struct	slpath sli_paths;
505
	struct	slinode *sli_fow;		/* hash table chain */
506
	dev_t	sli_dev;
507
	mode_t	sli_mode;
508
};
509
510
static struct slinode **slitab = NULL;
511
512
/*
513
 * sltab_start()
514
 *	create the hash table
515
 * Return:
516
 *	0 if the table and file was created ok, -1 otherwise
517
 */
518
519
int
520
sltab_start(void)
521
2
{
522
523
2
	if ((slitab = calloc(SL_TAB_SZ, sizeof *slitab)) == NULL) {
524
		syswarn(1, errno, "symlink table");
525
		return(-1);
526
	}
527
528
2
	return(0);
529
}
530
531
/*
532
 * sltab_add_sym()
533
 *	Create the placeholder and tracking info for an escaping symlink.
534
 * Return:
535
 *	0 on success, -1 otherwise
536
 */
537
538
int
539
sltab_add_sym(const char *path0, const char *value0, mode_t mode)
540
{
541
	struct stat sb;
542
	struct slinode *s;
543
	struct slpath *p;
544
	char *path, *value;
545
	u_int indx;
546
	int fd;
547
548
	/* create the placeholder */
549
	fd = open(path0, O_WRONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0600);
550
	if (fd == -1)
551
		return (-1);
552
	if (fstat(fd, &sb) == -1) {
553
		unlink(path0);
554
		close(fd);
555
		return (-1);
556
	}
557
	close(fd);
558
559
	if (havechd && *path0 != '/') {
560
		if ((path = realpath(path0, NULL)) == NULL) {
561
			syswarn(1, errno, "Cannot canonicalize %s", path0);
562
			unlink(path0);
563
			return (-1);
564
		}
565
	} else if ((path = strdup(path0)) == NULL) {
566
		syswarn(1, errno, "defered symlink path");
567
		unlink(path0);
568
		return (-1);
569
	}
570
	if ((value = strdup(value0)) == NULL) {
571
		syswarn(1, errno, "defered symlink value");
572
		unlink(path);
573
		free(path);
574
		return (-1);
575
	}
576
577
	/* now check the hash table for conflicting entry */
578
	indx = (sb.st_ino ^ sb.st_dev) % SL_TAB_SZ;
579
	for (s = slitab[indx]; s != NULL; s = s->sli_fow) {
580
		if (s->sli_ino != sb.st_ino || s->sli_dev != sb.st_dev)
581
			continue;
582
583
		/*
584
		 * One of our placeholders got removed behind our back and
585
		 * we've reused the inode.  Weird, but clean up the mess.
586
		 */
587
		free(s->sli_value);
588
		free(s->sli_paths.sp_path);
589
		p = s->sli_paths.sp_next;
590
		while (p != NULL) {
591
			struct slpath *next_p = p->sp_next;
592
593
			free(p->sp_path);
594
			free(p);
595
			p = next_p;
596
		}
597
		goto set_value;
598
	}
599
600
	/* Normal case: create a new node */
601
	if ((s = malloc(sizeof *s)) == NULL) {
602
		syswarn(1, errno, "defered symlink");
603
		unlink(path);
604
		free(path);
605
		free(value);
606
		return (-1);
607
	}
608
	s->sli_ino = sb.st_ino;
609
	s->sli_dev = sb.st_dev;
610
	s->sli_fow = slitab[indx];
611
	slitab[indx] = s;
612
613
set_value:
614
	s->sli_paths.sp_path = path;
615
	s->sli_paths.sp_next = NULL;
616
	s->sli_value = value;
617
	s->sli_mode = mode;
618
	return (0);
619
}
620
621
/*
622
 * sltab_add_link()
623
 *	A hardlink was created; if it looks like a placeholder, handle the
624
 *	tracking.
625
 * Return:
626
 *	0 if things are ok, -1 if something went wrong
627
 */
628
629
int
630
sltab_add_link(const char *path, const struct stat *sb)
631
4
{
632
	struct slinode *s;
633
	struct slpath *p;
634
	u_int indx;
635
636

4
	if (!S_ISREG(sb->st_mode) || sb->st_size != 0)
637
		return (1);
638
639
	/* find the hash table entry for this hardlink */
640
4
	indx = (sb->st_ino ^ sb->st_dev) % SL_TAB_SZ;
641
4
	for (s = slitab[indx]; s != NULL; s = s->sli_fow) {
642
		if (s->sli_ino != sb->st_ino || s->sli_dev != sb->st_dev)
643
			continue;
644
645
		if ((p = malloc(sizeof *p)) == NULL) {
646
			syswarn(1, errno, "deferred symlink hardlink");
647
			return (-1);
648
		}
649
		if (havechd && *path != '/') {
650
			if ((p->sp_path = realpath(path, NULL)) == NULL) {
651
				syswarn(1, errno, "Cannot canonicalize %s",
652
				    path);
653
				free(p);
654
				return (-1);
655
			}
656
		} else if ((p->sp_path = strdup(path)) == NULL) {
657
			syswarn(1, errno, "defered symlink hardlink path");
658
			free(p);
659
			return (-1);
660
		}
661
662
		/* link it in */
663
		p->sp_next = s->sli_paths.sp_next;
664
		s->sli_paths.sp_next = p;
665
		return (0);
666
	}
667
668
	/* not found */
669
4
	return (1);
670
}
671
672
673
static int
674
sltab_process_one(struct slinode *s, struct slpath *p, const char *first,
675
    int in_sig)
676
{
677
	struct stat sb;
678
	char *path = p->sp_path;
679
	mode_t mode;
680
	int err;
681
682
	/*
683
	 * is it the expected placeholder?  This can fail legimately
684
	 * if the archive overwrote the link with another, later entry,
685
	 * so don't warn.
686
	 */
687
	if (stat(path, &sb) != 0 || !S_ISREG(sb.st_mode) || sb.st_size != 0 ||
688
	    sb.st_ino != s->sli_ino || sb.st_dev != s->sli_dev)
689
		return (0);
690
691
	if (unlink(path) && errno != ENOENT) {
692
		if (!in_sig)
693
			syswarn(1, errno, "deferred symlink removal");
694
		return (0);
695
	}
696
697
	err = 0;
698
	if (first != NULL) {
699
		/* add another hardlink to the existing symlink */
700
		if (linkat(AT_FDCWD, first, AT_FDCWD, path, 0) == 0)
701
			return (0);
702
703
		/*
704
		 * Couldn't hardlink the symlink for some reason, so we'll
705
		 * try creating it as its own symlink, but save the error
706
		 * for reporting if that fails.
707
		 */
708
		err = errno;
709
	}
710
711
	if (symlink(s->sli_value, path)) {
712
		if (!in_sig) {
713
			const char *qualifier = "";
714
			if (err)
715
				qualifier = " hardlink";
716
			else
717
				err = errno;
718
719
			syswarn(1, err, "deferred symlink%s: %s",
720
			    qualifier, path);
721
		}
722
		return (0);
723
	}
724
725
	/* success, so set the id, mode, and times */
726
	mode = s->sli_mode;
727
	if (pids) {
728
		/* if can't set the ids, force the set[ug]id bits off */
729
		if (set_ids(path, sb.st_uid, sb.st_gid))
730
			mode &= ~(SETBITS);
731
	}
732
733
	if (pmode)
734
		set_pmode(path, mode);
735
736
	if (patime || pmtime)
737
		set_ftime(path, &sb.st_mtim, &sb.st_atim, 0);
738
739
	/*
740
	 * If we tried to link to first but failed, then this new symlink
741
	 * might be a better one to try in the future.  Guess from the errno.
742
	 */
743
	if (err == 0 || err == ENOENT || err == EMLINK || err == EOPNOTSUPP)
744
		return (1);
745
	return (0);
746
}
747
748
/*
749
 * sltab_process()
750
 *	Do all the delayed process for escape symlinks
751
 */
752
753
void
754
sltab_process(int in_sig)
755
2
{
756
	struct slinode *s;
757
	struct slpath *p;
758
	char *first;
759
	u_int indx;
760
761
2
	if (slitab == NULL)
762
		return;
763
764
	/* walk across the entire hash table */
765
636
	for (indx = 0; indx < SL_TAB_SZ; indx++) {
766
634
		while ((s = slitab[indx]) != NULL) {
767
			/* pop this entry */
768
			slitab[indx] = s->sli_fow;
769
770
			first = NULL;
771
			p = &s->sli_paths;
772
			while (1) {
773
				struct slpath *next_p;
774
775
				if (sltab_process_one(s, p, first, in_sig)) {
776
					if (!in_sig)
777
						free(first);
778
					first = p->sp_path;
779
				} else if (!in_sig)
780
					free(p->sp_path);
781
782
				if ((next_p = p->sp_next) == NULL)
783
					break;
784
				*p = *next_p;
785
				if (!in_sig)
786
					free(next_p);
787
			}
788
			if (!in_sig) {
789
				free(first);
790
				free(s->sli_value);
791
				free(s);
792
			}
793
		}
794
	}
795
2
	if (!in_sig)
796
2
		free(slitab);
797
2
	slitab = NULL;
798
}
799
800
801
/*
802
 * Interactive rename table routines
803
 *
804
 * The interactive rename table keeps track of the new names that the user
805
 * assigns to files from tty input. Since this map is unique for each file
806
 * we must store it in case there is a reference to the file later in archive
807
 * (a link). Otherwise we will be unable to find the file we know was
808
 * extracted. The remapping of these files is stored in a memory based hash
809
 * table (it is assumed since input must come from /dev/tty, it is unlikely to
810
 * be a very large table).
811
 */
812
813
/*
814
 * name_start()
815
 *	create the interactive rename table
816
 * Return:
817
 *	0 if successful, -1 otherwise
818
 */
819
820
int
821
name_start(void)
822
{
823
	if (ntab != NULL)
824
		return(0);
825
	if ((ntab = calloc(N_TAB_SZ, sizeof(NAMT *))) == NULL) {
826
		paxwarn(1, "Cannot allocate memory for interactive rename table");
827
		return(-1);
828
	}
829
	return(0);
830
}
831
832
/*
833
 * add_name()
834
 *	add the new name to old name mapping just created by the user.
835
 *	If an old name mapping is found (there may be duplicate names on an
836
 *	archive) only the most recent is kept.
837
 * Return:
838
 *	0 if added, -1 otherwise
839
 */
840
841
int
842
add_name(char *oname, int onamelen, char *nname)
843
{
844
	NAMT *pt;
845
	u_int indx;
846
847
	if (ntab == NULL) {
848
		/*
849
		 * should never happen
850
		 */
851
		paxwarn(0, "No interactive rename table, links may fail");
852
		return(0);
853
	}
854
855
	/*
856
	 * look to see if we have already mapped this file, if so we
857
	 * will update it
858
	 */
859
	indx = st_hash(oname, onamelen, N_TAB_SZ);
860
	if ((pt = ntab[indx]) != NULL) {
861
		/*
862
		 * look down the has chain for the file
863
		 */
864
		while ((pt != NULL) && (strcmp(oname, pt->oname) != 0))
865
			pt = pt->fow;
866
867
		if (pt != NULL) {
868
			/*
869
			 * found an old mapping, replace it with the new one
870
			 * the user just input (if it is different)
871
			 */
872
			if (strcmp(nname, pt->nname) == 0)
873
				return(0);
874
875
			free(pt->nname);
876
			if ((pt->nname = strdup(nname)) == NULL) {
877
				paxwarn(1, "Cannot update rename table");
878
				return(-1);
879
			}
880
			return(0);
881
		}
882
	}
883
884
	/*
885
	 * this is a new mapping, add it to the table
886
	 */
887
	if ((pt = malloc(sizeof(NAMT))) != NULL) {
888
		if ((pt->oname = strdup(oname)) != NULL) {
889
			if ((pt->nname = strdup(nname)) != NULL) {
890
				pt->fow = ntab[indx];
891
				ntab[indx] = pt;
892
				return(0);
893
			}
894
			free(pt->oname);
895
		}
896
		free(pt);
897
	}
898
	paxwarn(1, "Interactive rename table out of memory");
899
	return(-1);
900
}
901
902
/*
903
 * sub_name()
904
 *	look up a link name to see if it points at a file that has been
905
 *	remapped by the user. If found, the link is adjusted to contain the
906
 *	new name (oname is the link to name)
907
 */
908
909
void
910
sub_name(char *oname, int *onamelen, size_t onamesize)
911
{
912
	NAMT *pt;
913
	u_int indx;
914
915
	if (ntab == NULL)
916
		return;
917
	/*
918
	 * look the name up in the hash table
919
	 */
920
	indx = st_hash(oname, *onamelen, N_TAB_SZ);
921
	if ((pt = ntab[indx]) == NULL)
922
		return;
923
924
	while (pt != NULL) {
925
		/*
926
		 * walk down the hash chain looking for a match
927
		 */
928
		if (strcmp(oname, pt->oname) == 0) {
929
			/*
930
			 * found it, replace it with the new name
931
			 * and return (we know that oname has enough space)
932
			 */
933
			*onamelen = strlcpy(oname, pt->nname, onamesize);
934
			if (*onamelen >= onamesize)
935
				*onamelen = onamesize - 1; /* XXX truncate? */
936
			return;
937
		}
938
		pt = pt->fow;
939
	}
940
941
	/*
942
	 * no match, just return
943
	 */
944
}
945
946
#ifndef NOCPIO
947
/*
948
 * device/inode mapping table routines
949
 * (used with formats that store device and inodes fields)
950
 *
951
 * device/inode mapping tables remap the device field in a archive header. The
952
 * device/inode fields are used to determine when files are hard links to each
953
 * other. However these values have very little meaning outside of that. This
954
 * database is used to solve one of two different problems.
955
 *
956
 * 1) when files are appended to an archive, while the new files may have hard
957
 * links to each other, you cannot determine if they have hard links to any
958
 * file already stored on the archive from a prior run of pax. We must assume
959
 * that these inode/device pairs are unique only within a SINGLE run of pax
960
 * (which adds a set of files to an archive). So we have to make sure the
961
 * inode/dev pairs we add each time are always unique. We do this by observing
962
 * while the inode field is very dense, the use of the dev field is fairly
963
 * sparse. Within each run of pax, we remap any device number of a new archive
964
 * member that has a device number used in a prior run and already stored in a
965
 * file on the archive. During the read phase of the append, we store the
966
 * device numbers used and mark them to not be used by any file during the
967
 * write phase. If during write we go to use one of those old device numbers,
968
 * we remap it to a new value.
969
 *
970
 * 2) Often the fields in the archive header used to store these values are
971
 * too small to store the entire value. The result is an inode or device value
972
 * which can be truncated. This really can foul up an archive. With truncation
973
 * we end up creating links between files that are really not links (after
974
 * truncation the inodes are the same value). We address that by detecting
975
 * truncation and forcing a remap of the device field to split truncated
976
 * inodes away from each other. Each truncation creates a pattern of bits that
977
 * are removed. We use this pattern of truncated bits to partition the inodes
978
 * on a single device to many different devices (each one represented by the
979
 * truncated bit pattern). All inodes on the same device that have the same
980
 * truncation pattern are mapped to the same new device. Two inodes that
981
 * truncate to the same value clearly will always have different truncation
982
 * bit patterns, so they will be split from away each other. When we spot
983
 * device truncation we remap the device number to a non truncated value.
984
 * (for more info see table.h for the data structures involved).
985
 */
986
987
static DEVT *chk_dev(dev_t, int);
988
989
/*
990
 * dev_start()
991
 *	create the device mapping table
992
 * Return:
993
 *	0 if successful, -1 otherwise
994
 */
995
996
int
997
dev_start(void)
998
{
999
	if (dtab != NULL)
1000
		return(0);
1001
	if ((dtab = calloc(D_TAB_SZ, sizeof(DEVT *))) == NULL) {
1002
		paxwarn(1, "Cannot allocate memory for device mapping table");
1003
		return(-1);
1004
	}
1005
	return(0);
1006
}
1007
1008
/*
1009
 * add_dev()
1010
 *	add a device number to the table. this will force the device to be
1011
 *	remapped to a new value if it be used during a write phase. This
1012
 *	function is called during the read phase of an append to prohibit the
1013
 *	use of any device number already in the archive.
1014
 * Return:
1015
 *	0 if added ok, -1 otherwise
1016
 */
1017
1018
int
1019
add_dev(ARCHD *arcn)
1020
{
1021
	if (chk_dev(arcn->sb.st_dev, 1) == NULL)
1022
		return(-1);
1023
	return(0);
1024
}
1025
1026
/*
1027
 * chk_dev()
1028
 *	check for a device value in the device table. If not found and the add
1029
 *	flag is set, it is added. This does NOT assign any mapping values, just
1030
 *	adds the device number as one that need to be remapped. If this device
1031
 *	is already mapped, just return with a pointer to that entry.
1032
 * Return:
1033
 *	pointer to the entry for this device in the device map table. Null
1034
 *	if the add flag is not set and the device is not in the table (it is
1035
 *	not been seen yet). If add is set and the device cannot be added, null
1036
 *	is returned (indicates an error).
1037
 */
1038
1039
static DEVT *
1040
chk_dev(dev_t dev, int add)
1041
{
1042
	DEVT *pt;
1043
	u_int indx;
1044
1045
	if (dtab == NULL)
1046
		return(NULL);
1047
	/*
1048
	 * look to see if this device is already in the table
1049
	 */
1050
	indx = ((unsigned)dev) % D_TAB_SZ;
1051
	if ((pt = dtab[indx]) != NULL) {
1052
		while ((pt != NULL) && (pt->dev != dev))
1053
			pt = pt->fow;
1054
1055
		/*
1056
		 * found it, return a pointer to it
1057
		 */
1058
		if (pt != NULL)
1059
			return(pt);
1060
	}
1061
1062
	/*
1063
	 * not in table, we add it only if told to as this may just be a check
1064
	 * to see if a device number is being used.
1065
	 */
1066
	if (add == 0)
1067
		return(NULL);
1068
1069
	/*
1070
	 * allocate a node for this device and add it to the front of the hash
1071
	 * chain. Note we do not assign remaps values here, so the pt->list
1072
	 * list must be NULL.
1073
	 */
1074
	if ((pt = malloc(sizeof(DEVT))) == NULL) {
1075
		paxwarn(1, "Device map table out of memory");
1076
		return(NULL);
1077
	}
1078
	pt->dev = dev;
1079
	pt->list = NULL;
1080
	pt->fow = dtab[indx];
1081
	dtab[indx] = pt;
1082
	return(pt);
1083
}
1084
/*
1085
 * map_dev()
1086
 *	given an inode and device storage mask (the mask has a 1 for each bit
1087
 *	the archive format is able to store in a header), we check for inode
1088
 *	and device truncation and remap the device as required. Device mapping
1089
 *	can also occur when during the read phase of append a device number was
1090
 *	seen (and was marked as do not use during the write phase). WE ASSUME
1091
 *	that unsigned longs are the same size or bigger than the fields used
1092
 *	for ino_t and dev_t. If not the types will have to be changed.
1093
 * Return:
1094
 *	0 if all ok, -1 otherwise.
1095
 */
1096
1097
int
1098
map_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask)
1099
{
1100
	DEVT *pt;
1101
	DLIST *dpt;
1102
	static dev_t lastdev = 0;	/* next device number to try */
1103
	int trc_ino = 0;
1104
	int trc_dev = 0;
1105
	ino_t trunc_bits = 0;
1106
	ino_t nino;
1107
1108
	if (dtab == NULL)
1109
		return(0);
1110
	/*
1111
	 * check for device and inode truncation, and extract the truncated
1112
	 * bit pattern.
1113
	 */
1114
	if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev)
1115
		++trc_dev;
1116
	if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) {
1117
		++trc_ino;
1118
		trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask);
1119
	}
1120
1121
	/*
1122
	 * see if this device is already being mapped, look up the device
1123
	 * then find the truncation bit pattern which applies
1124
	 */
1125
	if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL) {
1126
		/*
1127
		 * this device is already marked to be remapped
1128
		 */
1129
		for (dpt = pt->list; dpt != NULL; dpt = dpt->fow)
1130
			if (dpt->trunc_bits == trunc_bits)
1131
				break;
1132
1133
		if (dpt != NULL) {
1134
			/*
1135
			 * we are being remapped for this device and pattern
1136
			 * change the device number to be stored and return
1137
			 */
1138
			arcn->sb.st_dev = dpt->dev;
1139
			arcn->sb.st_ino = nino;
1140
			return(0);
1141
		}
1142
	} else {
1143
		/*
1144
		 * this device is not being remapped YET. if we do not have any
1145
		 * form of truncation, we do not need a remap
1146
		 */
1147
		if (!trc_ino && !trc_dev)
1148
			return(0);
1149
1150
		/*
1151
		 * we have truncation, have to add this as a device to remap
1152
		 */
1153
		if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL)
1154
			goto bad;
1155
1156
		/*
1157
		 * if we just have a truncated inode, we have to make sure that
1158
		 * all future inodes that do not truncate (they have the
1159
		 * truncation pattern of all 0's) continue to map to the same
1160
		 * device number. We probably have already written inodes with
1161
		 * this device number to the archive with the truncation
1162
		 * pattern of all 0's. So we add the mapping for all 0's to the
1163
		 * same device number.
1164
		 */
1165
		if (!trc_dev && (trunc_bits != 0)) {
1166
			if ((dpt = malloc(sizeof(DLIST))) == NULL)
1167
				goto bad;
1168
			dpt->trunc_bits = 0;
1169
			dpt->dev = arcn->sb.st_dev;
1170
			dpt->fow = pt->list;
1171
			pt->list = dpt;
1172
		}
1173
	}
1174
1175
	/*
1176
	 * look for a device number not being used. We must watch for wrap
1177
	 * around on lastdev (so we do not get stuck looking forever!)
1178
	 */
1179
	while (++lastdev > 0) {
1180
		if (chk_dev(lastdev, 0) != NULL)
1181
			continue;
1182
		/*
1183
		 * found an unused value. If we have reached truncation point
1184
		 * for this format we are hosed, so we give up. Otherwise we
1185
		 * mark it as being used.
1186
		 */
1187
		if (((lastdev & ((dev_t)dev_mask)) != lastdev) ||
1188
		    (chk_dev(lastdev, 1) == NULL))
1189
			goto bad;
1190
		break;
1191
	}
1192
1193
	if ((lastdev <= 0) || ((dpt = malloc(sizeof(DLIST))) == NULL))
1194
		goto bad;
1195
1196
	/*
1197
	 * got a new device number, store it under this truncation pattern.
1198
	 * change the device number this file is being stored with.
1199
	 */
1200
	dpt->trunc_bits = trunc_bits;
1201
	dpt->dev = lastdev;
1202
	dpt->fow = pt->list;
1203
	pt->list = dpt;
1204
	arcn->sb.st_dev = lastdev;
1205
	arcn->sb.st_ino = nino;
1206
	return(0);
1207
1208
    bad:
1209
	paxwarn(1, "Unable to fix truncated inode/device field when storing %s",
1210
	    arcn->name);
1211
	paxwarn(0, "Archive may create improper hard links when extracted");
1212
	return(0);
1213
}
1214
#endif /* NOCPIO */
1215
1216
/*
1217
 * directory access/mod time reset table routines (for directories READ by pax)
1218
 *
1219
 * The pax -t flag requires that access times of archive files be the same
1220
 * before being read by pax. For regular files, access time is restored after
1221
 * the file has been copied. This database provides the same functionality for
1222
 * directories read during file tree traversal. Restoring directory access time
1223
 * is more complex than files since directories may be read several times until
1224
 * all the descendants in their subtree are visited by fts. Directory access
1225
 * and modification times are stored during the fts pre-order visit (done
1226
 * before any descendants in the subtree are visited) and restored after the
1227
 * fts post-order visit (after all the descendants have been visited). In the
1228
 * case of premature exit from a subtree (like from the effects of -n), any
1229
 * directory entries left in this database are reset during final cleanup
1230
 * operations of pax. Entries are hashed by inode number for fast lookup.
1231
 */
1232
1233
/*
1234
 * atdir_start()
1235
 *	create the directory access time database for directories READ by pax.
1236
 * Return:
1237
 *	0 is created ok, -1 otherwise.
1238
 */
1239
1240
int
1241
atdir_start(void)
1242
{
1243
	if (atab != NULL)
1244
		return(0);
1245
	if ((atab = calloc(A_TAB_SZ, sizeof(ATDIR *))) == NULL) {
1246
		paxwarn(1,"Cannot allocate space for directory access time table");
1247
		return(-1);
1248
	}
1249
	return(0);
1250
}
1251
1252
1253
/*
1254
 * atdir_end()
1255
 *	walk through the directory access time table and reset the access time
1256
 *	of any directory who still has an entry left in the database. These
1257
 *	entries are for directories READ by pax
1258
 */
1259
1260
void
1261
atdir_end(void)
1262
{
1263
	ATDIR *pt;
1264
	int i;
1265
1266
	if (atab == NULL)
1267
		return;
1268
	/*
1269
	 * for each non-empty hash table entry reset all the directories
1270
	 * chained there.
1271
	 */
1272
	for (i = 0; i < A_TAB_SZ; ++i) {
1273
		if ((pt = atab[i]) == NULL)
1274
			continue;
1275
		/*
1276
		 * remember to force the times, set_ftime() looks at pmtime
1277
		 * and patime, which only applies to things CREATED by pax,
1278
		 * not read by pax. Read time reset is controlled by -t.
1279
		 */
1280
		for (; pt != NULL; pt = pt->fow)
1281
			set_attr(&pt->ft, 1, 0, 0, 0);
1282
	}
1283
}
1284
1285
/*
1286
 * add_atdir()
1287
 *	add a directory to the directory access time table. Table is hashed
1288
 *	and chained by inode number. This is for directories READ by pax
1289
 */
1290
1291
void
1292
add_atdir(char *fname, dev_t dev, ino_t ino, const struct timespec *mtimp,
1293
    const struct timespec *atimp)
1294
{
1295
	ATDIR *pt;
1296
	sigset_t allsigs, savedsigs;
1297
	u_int indx;
1298
1299
	if (atab == NULL)
1300
		return;
1301
1302
	/*
1303
	 * make sure this directory is not already in the table, if so just
1304
	 * return (the older entry always has the correct time). The only
1305
	 * way this will happen is when the same subtree can be traversed by
1306
	 * different args to pax and the -n option is aborting fts out of a
1307
	 * subtree before all the post-order visits have been made.
1308
	 */
1309
	indx = ((unsigned)ino) % A_TAB_SZ;
1310
	if ((pt = atab[indx]) != NULL) {
1311
		while (pt != NULL) {
1312
			if ((pt->ft.ft_ino == ino) && (pt->ft.ft_dev == dev))
1313
				break;
1314
			pt = pt->fow;
1315
		}
1316
1317
		/*
1318
		 * oops, already there. Leave it alone.
1319
		 */
1320
		if (pt != NULL)
1321
			return;
1322
	}
1323
1324
	/*
1325
	 * add it to the front of the hash chain
1326
	 */
1327
	sigfillset(&allsigs);
1328
	sigprocmask(SIG_BLOCK, &allsigs, &savedsigs);
1329
	if ((pt = malloc(sizeof *pt)) != NULL) {
1330
		if ((pt->ft.ft_name = strdup(fname)) != NULL) {
1331
			pt->ft.ft_dev = dev;
1332
			pt->ft.ft_ino = ino;
1333
			pt->ft.ft_mtim = *mtimp;
1334
			pt->ft.ft_atim = *atimp;
1335
			pt->fow = atab[indx];
1336
			atab[indx] = pt;
1337
			sigprocmask(SIG_SETMASK, &savedsigs, NULL);
1338
			return;
1339
		}
1340
		free(pt);
1341
	}
1342
1343
	sigprocmask(SIG_SETMASK, &savedsigs, NULL);
1344
	paxwarn(1, "Directory access time reset table ran out of memory");
1345
}
1346
1347
/*
1348
 * get_atdir()
1349
 *	look up a directory by inode and device number to obtain the access
1350
 *	and modification time you want to set to. If found, the modification
1351
 *	and access time parameters are set and the entry is removed from the
1352
 *	table (as it is no longer needed). These are for directories READ by
1353
 *	pax
1354
 * Return:
1355
 *	0 if found, -1 if not found.
1356
 */
1357
1358
int
1359
do_atdir(const char *name, dev_t dev, ino_t ino)
1360
{
1361
	ATDIR *pt;
1362
	ATDIR **ppt;
1363
	sigset_t allsigs, savedsigs;
1364
	u_int indx;
1365
1366
	if (atab == NULL)
1367
		return(-1);
1368
	/*
1369
	 * hash by inode and search the chain for an inode and device match
1370
	 */
1371
	indx = ((unsigned)ino) % A_TAB_SZ;
1372
	if ((pt = atab[indx]) == NULL)
1373
		return(-1);
1374
1375
	ppt = &(atab[indx]);
1376
	while (pt != NULL) {
1377
		if ((pt->ft.ft_ino == ino) && (pt->ft.ft_dev == dev))
1378
			break;
1379
		/*
1380
		 * no match, go to next one
1381
		 */
1382
		ppt = &(pt->fow);
1383
		pt = pt->fow;
1384
	}
1385
1386
	/*
1387
	 * return if we did not find it.
1388
	 */
1389
	if (pt == NULL || pt->ft.ft_name == NULL ||
1390
	    strcmp(name, pt->ft.ft_name) == 0)
1391
		return(-1);
1392
1393
	/*
1394
	 * found it. set the times and remove the entry from the table.
1395
	 */
1396
	set_attr(&pt->ft, 1, 0, 0, 0);
1397
	sigfillset(&allsigs);
1398
	sigprocmask(SIG_BLOCK, &allsigs, &savedsigs);
1399
	*ppt = pt->fow;
1400
	sigprocmask(SIG_SETMASK, &savedsigs, NULL);
1401
	free(pt->ft.ft_name);
1402
	free(pt);
1403
	return(0);
1404
}
1405
1406
/*
1407
 * directory access mode and time storage routines (for directories CREATED
1408
 * by pax).
1409
 *
1410
 * Pax requires that extracted directories, by default, have their access/mod
1411
 * times and permissions set to the values specified in the archive. During the
1412
 * actions of extracting (and creating the destination subtree during -rw copy)
1413
 * directories extracted may be modified after being created. Even worse is
1414
 * that these directories may have been created with file permissions which
1415
 * prohibits any descendants of these directories from being extracted. When
1416
 * directories are created by pax, access rights may be added to permit the
1417
 * creation of files in their subtree. Every time pax creates a directory, the
1418
 * times and file permissions specified by the archive are stored. After all
1419
 * files have been extracted (or copied), these directories have their times
1420
 * and file modes reset to the stored values. The directory info is restored in
1421
 * reverse order as entries were added from root to leaf: to restore atime
1422
 * properly, we must go backwards.
1423
 */
1424
1425
/*
1426
 * dir_start()
1427
 *	set up the directory time and file mode storage for directories CREATED
1428
 *	by pax.
1429
 * Return:
1430
 *	0 if ok, -1 otherwise
1431
 */
1432
1433
int
1434
dir_start(void)
1435
2
{
1436
2
	if (dirp != NULL)
1437
		return(0);
1438
1439
2
	dirsize = DIRP_SIZE;
1440
2
	if ((dirp = reallocarray(NULL, dirsize, sizeof(DIRDATA))) == NULL) {
1441
		paxwarn(1, "Unable to allocate memory for directory times");
1442
		return(-1);
1443
	}
1444
2
	return(0);
1445
}
1446
1447
/*
1448
 * add_dir()
1449
 *	add the mode and times for a newly CREATED directory
1450
 *	name is name of the directory, psb the stat buffer with the data in it,
1451
 *	frc_mode is a flag that says whether to force the setting of the mode
1452
 *	(ignoring the user set values for preserving file mode). Frc_mode is
1453
 *	for the case where we created a file and found that the resulting
1454
 *	directory was not writeable and the user asked for file modes to NOT
1455
 *	be preserved. (we have to preserve what was created by default, so we
1456
 *	have to force the setting at the end. this is stated explicitly in the
1457
 *	pax spec)
1458
 */
1459
1460
void
1461
add_dir(char *name, struct stat *psb, int frc_mode)
1462
4
{
1463
	DIRDATA *dblk;
1464
	sigset_t allsigs, savedsigs;
1465
	char realname[PATH_MAX], *rp;
1466
1467
4
	if (dirp == NULL)
1468
		return;
1469
1470

4
	if (havechd && *name != '/') {
1471
		if ((rp = realpath(name, realname)) == NULL) {
1472
			paxwarn(1, "Cannot canonicalize %s", name);
1473
			return;
1474
		}
1475
		name = rp;
1476
	}
1477
4
	if (dircnt == dirsize) {
1478
		dblk = reallocarray(dirp, dirsize, 2 * sizeof(DIRDATA));
1479
		if (dblk == NULL) {
1480
			paxwarn(1, "Unable to store mode and times for created"
1481
			    " directory: %s", name);
1482
			return;
1483
		}
1484
		sigprocmask(SIG_BLOCK, &allsigs, &savedsigs);
1485
		dirp = dblk;
1486
		dirsize *= 2;
1487
		sigprocmask(SIG_SETMASK, &savedsigs, NULL);
1488
	}
1489
4
	dblk = &dirp[dircnt];
1490
4
	if ((dblk->ft.ft_name = strdup(name)) == NULL) {
1491
		paxwarn(1, "Unable to store mode and times for created"
1492
		    " directory: %s", name);
1493
		return;
1494
	}
1495
4
	dblk->ft.ft_mtim = psb->st_mtim;
1496
4
	dblk->ft.ft_atim = psb->st_atim;
1497
4
	dblk->ft.ft_ino = psb->st_ino;
1498
4
	dblk->ft.ft_dev = psb->st_dev;
1499
4
	dblk->mode = psb->st_mode & ABITS;
1500
4
	dblk->frc_mode = frc_mode;
1501
4
	sigprocmask(SIG_BLOCK, &allsigs, &savedsigs);
1502
4
	++dircnt;
1503
4
	sigprocmask(SIG_SETMASK, &savedsigs, NULL);
1504
}
1505
1506
/*
1507
 * delete_dir()
1508
 *	When we rmdir a directory, we may want to make sure we don't
1509
 *	later warn about being unable to set its mode and times.
1510
 */
1511
1512
void
1513
delete_dir(dev_t dev, ino_t ino)
1514
{
1515
	DIRDATA *dblk;
1516
	char *name;
1517
	size_t i;
1518
1519
	if (dirp == NULL)
1520
		return;
1521
	for (i = 0; i < dircnt; i++) {
1522
		dblk = &dirp[i];
1523
1524
		if (dblk->ft.ft_name == NULL)
1525
			continue;
1526
		if (dblk->ft.ft_dev == dev && dblk->ft.ft_ino == ino) {
1527
			name = dblk->ft.ft_name;
1528
			dblk->ft.ft_name = NULL;
1529
			free(name);
1530
			break;
1531
		}
1532
	}
1533
}
1534
1535
/*
1536
 * proc_dir(int in_sig)
1537
 *	process all file modes and times stored for directories CREATED
1538
 *	by pax.  If in_sig is set, we're in a signal handler and can't
1539
 *	free stuff.
1540
 */
1541
1542
void
1543
proc_dir(int in_sig)
1544
2
{
1545
	DIRDATA *dblk;
1546
	size_t cnt;
1547
1548
2
	if (dirp == NULL)
1549
		return;
1550
	/*
1551
	 * read backwards through the file and process each directory
1552
	 */
1553
2
	cnt = dircnt;
1554
8
	while (cnt-- > 0) {
1555
4
		dblk = &dirp[cnt];
1556
		/*
1557
		 * If we remove a directory we created, we replace the
1558
		 * ft_name with NULL.  Ignore those.
1559
		 */
1560
4
		if (dblk->ft.ft_name == NULL)
1561
			continue;
1562
1563
		/*
1564
		 * frc_mode set, make sure we set the file modes even if
1565
		 * the user didn't ask for it (see file_subs.c for more info)
1566
		 */
1567

4
		set_attr(&dblk->ft, 0, dblk->mode, pmode || dblk->frc_mode,
1568
		    in_sig);
1569
4
		if (!in_sig)
1570
4
			free(dblk->ft.ft_name);
1571
	}
1572
1573
2
	if (!in_sig)
1574
2
		free(dirp);
1575
2
	dirp = NULL;
1576
2
	dircnt = 0;
1577
}
1578
1579
/*
1580
 * database independent routines
1581
 */
1582
1583
/*
1584
 * st_hash()
1585
 *	hashes filenames to a u_int for hashing into a table. Looks at the tail
1586
 *	end of file, as this provides far better distribution than any other
1587
 *	part of the name. For performance reasons we only care about the last
1588
 *	MAXKEYLEN chars (should be at LEAST large enough to pick off the file
1589
 *	name). Was tested on 500,000 name file tree traversal from the root
1590
 *	and gave almost a perfectly uniform distribution of keys when used with
1591
 *	prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int)
1592
 *	chars at a time and pads with 0 for last addition.
1593
 * Return:
1594
 *	the hash value of the string MOD (%) the table size.
1595
 */
1596
1597
u_int
1598
st_hash(const char *name, int len, int tabsz)
1599
216
{
1600
	const char *pt;
1601
	char *dest;
1602
	const char *end;
1603
	int i;
1604
216
	u_int key = 0;
1605
	int steps;
1606
	int res;
1607
	u_int val;
1608
1609
	/*
1610
	 * only look at the tail up to MAXKEYLEN, we do not need to waste
1611
	 * time here (remember these are pathnames, the tail is what will
1612
	 * spread out the keys)
1613
	 */
1614
216
	if (len > MAXKEYLEN) {
1615
		pt = &(name[len - MAXKEYLEN]);
1616
		len = MAXKEYLEN;
1617
	} else
1618
216
		pt = name;
1619
1620
	/*
1621
	 * calculate the number of u_int size steps in the string and if
1622
	 * there is a runt to deal with
1623
	 */
1624
216
	steps = len/sizeof(u_int);
1625
216
	res = len % sizeof(u_int);
1626
1627
	/*
1628
	 * add up the value of the string in unsigned integer sized pieces
1629
	 * too bad we cannot have unsigned int aligned strings, then we
1630
	 * could avoid the expensive copy.
1631
	 */
1632
432
	for (i = 0; i < steps; ++i) {
1633
216
		end = pt + sizeof(u_int);
1634
216
		dest = (char *)&val;
1635
1296
		while (pt < end)
1636
864
			*dest++ = *pt++;
1637
216
		key += val;
1638
	}
1639
1640
	/*
1641
	 * add in the runt padded with zero to the right
1642
	 */
1643
216
	if (res) {
1644
8
		val = 0;
1645
8
		end = pt + res;
1646
8
		dest = (char *)&val;
1647
32
		while (pt < end)
1648
16
			*dest++ = *pt++;
1649
8
		key += val;
1650
	}
1651
1652
	/*
1653
	 * return the result mod the table size
1654
	 */
1655
216
	return(key % tabsz);
1656
}