GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/mg/re_search.c Lines: 0 248 0.0 %
Date: 2017-11-07 Branches: 0 146 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: re_search.c,v 1.33 2017/08/06 04:39:45 bcallah Exp $	*/
2
3
/* This file is in the public domain. */
4
5
/*
6
 *	regular expression search commands for Mg
7
 *
8
 * This file contains functions to implement several of gnuemacs's regular
9
 * expression functions for Mg.  Several of the routines below are just minor
10
 * re-arrangements of Mg's non-regular expression search functions.  Some of
11
 * them are similar in structure to the original MicroEMACS, others are
12
 * modifications of Rich Ellison's code.  Peter Newton re-wrote about half of
13
 * them from scratch.
14
 */
15
16
#ifdef REGEX
17
#include <sys/queue.h>
18
#include <sys/types.h>
19
#include <regex.h>
20
#include <signal.h>
21
#include <stdio.h>
22
#include <string.h>
23
24
#include "def.h"
25
#include "macro.h"
26
27
#define SRCH_BEGIN	(0)		/* search sub-codes		    */
28
#define SRCH_FORW	(-1)
29
#define SRCH_BACK	(-2)
30
#define SRCH_NOPR	(-3)
31
#define SRCH_ACCM	(-4)
32
#define SRCH_MARK	(-5)
33
34
#define RE_NMATCH	10		/* max number of matches	    */
35
#define REPLEN		256		/* max length of replacement string */
36
37
char	re_pat[NPAT];			/* regex pattern		    */
38
int	re_srch_lastdir = SRCH_NOPR;	/* last search flags		    */
39
int	casefoldsearch = TRUE;		/* does search ignore case?	    */
40
41
static int	 re_doreplace(RSIZE, char *);
42
static int	 re_forwsrch(void);
43
static int	 re_backsrch(void);
44
static int	 re_readpattern(char *);
45
static int	 killmatches(int);
46
static int	 countmatches(int);
47
48
/*
49
 * Search forward.
50
 * Get a search string from the user and search for it starting at ".".  If
51
 * found, move "." to just after the matched characters.  display does all
52
 * the hard stuff.  If not found, it just prints a message.
53
 */
54
/* ARGSUSED */
55
int
56
re_forwsearch(int f, int n)
57
{
58
	int	s;
59
60
	if ((s = re_readpattern("RE Search")) != TRUE)
61
		return (s);
62
	if (re_forwsrch() == FALSE) {
63
		dobeep();
64
		ewprintf("Search failed: \"%s\"", re_pat);
65
		return (FALSE);
66
	}
67
	re_srch_lastdir = SRCH_FORW;
68
	return (TRUE);
69
}
70
71
/*
72
 * Reverse search.
73
 * Get a search string from the user, and search, starting at "."
74
 * and proceeding toward the front of the buffer. If found "." is left
75
 * pointing at the first character of the pattern [the last character that
76
 * was matched].
77
 */
78
/* ARGSUSED */
79
int
80
re_backsearch(int f, int n)
81
{
82
	int	s;
83
84
	if ((s = re_readpattern("RE Search backward")) != TRUE)
85
		return (s);
86
	if (re_backsrch() == FALSE) {
87
		dobeep();
88
		ewprintf("Search failed: \"%s\"", re_pat);
89
		return (FALSE);
90
	}
91
	re_srch_lastdir = SRCH_BACK;
92
	return (TRUE);
93
}
94
95
/*
96
 * Search again, using the same search string and direction as the last search
97
 * command.  The direction has been saved in "srch_lastdir", so you know which
98
 * way to go.
99
 *
100
 * XXX: This code has problems -- some incompatibility(?) with extend.c causes
101
 * match to fail when it should not.
102
 */
103
/* ARGSUSED */
104
int
105
re_searchagain(int f, int n)
106
{
107
	if (re_srch_lastdir == SRCH_NOPR) {
108
		dobeep();
109
		ewprintf("No last search");
110
		return (FALSE);
111
	}
112
	if (re_srch_lastdir == SRCH_FORW) {
113
		if (re_forwsrch() == FALSE) {
114
			dobeep();
115
			ewprintf("Search failed: \"%s\"", re_pat);
116
			return (FALSE);
117
		}
118
		return (TRUE);
119
	}
120
	if (re_srch_lastdir == SRCH_BACK)
121
		if (re_backsrch() == FALSE) {
122
			dobeep();
123
			ewprintf("Search failed: \"%s\"", re_pat);
124
			return (FALSE);
125
		}
126
127
	return (TRUE);
128
}
129
130
/* Compiled regex goes here-- changed only when new pattern read */
131
static regex_t		regex_buff;
132
static regmatch_t	regex_match[RE_NMATCH];
133
134
/*
135
 * Re-Query Replace.
136
 *	Replace strings selectively.  Does a search and replace operation.
137
 */
138
/* ARGSUSED */
139
int
140
re_queryrepl(int f, int n)
141
{
142
	int	rcnt = 0;		/* replacements made so far	*/
143
	int	plen, s;		/* length of found string	*/
144
	char	news[NPAT];		/* replacement string		*/
145
146
	if ((s = re_readpattern("RE Query replace")) != TRUE)
147
		return (s);
148
	if (eread("Query replace %s with: ", news, NPAT,
149
	    EFNUL | EFNEW | EFCR, re_pat) == NULL)
150
		return (ABORT);
151
	ewprintf("Query replacing %s with %s:", re_pat, news);
152
153
	/*
154
	 * Search forward repeatedly, checking each time whether to insert
155
	 * or not.  The "!" case makes the check always true, so it gets put
156
	 * into a tighter loop for efficiency.
157
	 */
158
	while (re_forwsrch() == TRUE) {
159
retry:
160
		update(CMODE);
161
		switch (getkey(FALSE)) {
162
		case ' ':
163
			plen = regex_match[0].rm_eo - regex_match[0].rm_so;
164
			if (re_doreplace((RSIZE)plen, news) == FALSE)
165
				return (FALSE);
166
			rcnt++;
167
			break;
168
169
		case '.':
170
			plen = regex_match[0].rm_eo - regex_match[0].rm_so;
171
			if (re_doreplace((RSIZE)plen, news) == FALSE)
172
				return (FALSE);
173
			rcnt++;
174
			goto stopsearch;
175
176
		case CCHR('G'):				/* ^G */
177
			(void)ctrlg(FFRAND, 0);
178
			goto stopsearch;
179
		case CCHR('['):				/* ESC */
180
		case '`':
181
			goto stopsearch;
182
		case '!':
183
			do {
184
				plen = regex_match[0].rm_eo - regex_match[0].rm_so;
185
				if (re_doreplace((RSIZE)plen, news) == FALSE)
186
					return (FALSE);
187
				rcnt++;
188
			} while (re_forwsrch() == TRUE);
189
			goto stopsearch;
190
191
		case CCHR('?'):				/* To not replace */
192
			break;
193
194
		default:
195
			ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
196
			goto retry;
197
		}
198
	}
199
200
stopsearch:
201
	curwp->w_rflag |= WFFULL;
202
	update(CMODE);
203
	if (!inmacro) {
204
		if (rcnt == 0)
205
			ewprintf("(No replacements done)");
206
		else if (rcnt == 1)
207
			ewprintf("(1 replacement done)");
208
		else
209
			ewprintf("(%d replacements done)", rcnt);
210
	}
211
	return (TRUE);
212
}
213
214
/*
215
 * Routine re_doreplace calls lreplace to make replacements needed by
216
 * re_query replace.  Its reason for existence is to deal with \1, \2. etc.
217
 *  plen: length to remove
218
 *  st:   replacement string
219
 */
220
static int
221
re_doreplace(RSIZE plen, char *st)
222
{
223
	int	 j, k, s, more, num, state;
224
	struct line	*clp;
225
	char	 repstr[REPLEN];
226
227
	clp = curwp->w_dotp;
228
	more = TRUE;
229
	j = 0;
230
	state = 0;
231
	num = 0;
232
233
	/* The following FSA parses the replacement string */
234
	while (more) {
235
		switch (state) {
236
		case 0:
237
			if (*st == '\\') {
238
				st++;
239
				state = 1;
240
			} else if (*st == '\0')
241
				more = FALSE;
242
			else {
243
				repstr[j] = *st;
244
				j++;
245
				if (j >= REPLEN)
246
					return (FALSE);
247
				st++;
248
			}
249
			break;
250
		case 1:
251
			if (*st >= '0' && *st <= '9') {
252
				num = *st - '0';
253
				st++;
254
				state = 2;
255
			} else if (*st == '\0')
256
				more = FALSE;
257
			else {
258
				repstr[j] = *st;
259
				j++;
260
				if (j >= REPLEN)
261
					return (FALSE);
262
				st++;
263
				state = 0;
264
			}
265
			break;
266
		case 2:
267
			if (*st >= '0' && *st <= '9') {
268
				num = 10 * num + *st - '0';
269
				st++;
270
			} else {
271
				if (num >= RE_NMATCH)
272
					return (FALSE);
273
				k = regex_match[num].rm_eo - regex_match[num].rm_so;
274
				if (j + k >= REPLEN)
275
					return (FALSE);
276
				bcopy(&(clp->l_text[regex_match[num].rm_so]),
277
				    &repstr[j], k);
278
				j += k;
279
				if (*st == '\0')
280
					more = FALSE;
281
				if (*st == '\\') {
282
					st++;
283
					state = 1;
284
				} else {
285
					repstr[j] = *st;
286
					j++;
287
					if (j >= REPLEN)
288
						return (FALSE);
289
					st++;
290
					state = 0;
291
				}
292
			}
293
			break;
294
		}		/* switch (state) */
295
	}			/* while (more)   */
296
297
	repstr[j] = '\0';
298
	s = lreplace(plen, repstr);
299
	return (s);
300
}
301
302
/*
303
 * This routine does the real work of a forward search.  The pattern is
304
 * sitting in the external variable "pat".  If found, dot is updated, the
305
 * window system is notified of the change, and TRUE is returned.  If the
306
 * string isn't found, FALSE is returned.
307
 */
308
static int
309
re_forwsrch(void)
310
{
311
	int	 tbo, tdotline, error;
312
	struct line	*clp;
313
314
	clp = curwp->w_dotp;
315
	tbo = curwp->w_doto;
316
	tdotline = curwp->w_dotline;
317
318
	if (tbo == clp->l_used)
319
		/*
320
		 * Don't start matching past end of line -- must move to
321
		 * beginning of next line, unless at end of file.
322
		 */
323
		if (clp != curbp->b_headp) {
324
			clp = lforw(clp);
325
			tdotline++;
326
			tbo = 0;
327
		}
328
	/*
329
	 * Note this loop does not process the last line, but this editor
330
	 * always makes the last line empty so this is good.
331
	 */
332
	while (clp != (curbp->b_headp)) {
333
		regex_match[0].rm_so = tbo;
334
		regex_match[0].rm_eo = llength(clp);
335
		error = regexec(&regex_buff, ltext(clp), RE_NMATCH, regex_match,
336
		    REG_STARTEND);
337
		if (error != 0) {
338
			clp = lforw(clp);
339
			tdotline++;
340
			tbo = 0;
341
		} else {
342
			curwp->w_doto = regex_match[0].rm_eo;
343
			curwp->w_dotp = clp;
344
			curwp->w_dotline = tdotline;
345
			curwp->w_rflag |= WFMOVE;
346
			return (TRUE);
347
		}
348
	}
349
	return (FALSE);
350
}
351
352
/*
353
 * This routine does the real work of a backward search.  The pattern is sitting
354
 * in the external variable "re_pat".  If found, dot is updated, the window
355
 * system is notified of the change, and TRUE is returned.  If the string isn't
356
 * found, FALSE is returned.
357
 */
358
static int
359
re_backsrch(void)
360
{
361
	struct line		*clp;
362
	int		 tbo, tdotline;
363
	regmatch_t	 lastmatch;
364
365
	clp = curwp->w_dotp;
366
	tbo = curwp->w_doto;
367
	tdotline = curwp->w_dotline;
368
369
	/* Start search one position to the left of dot */
370
	tbo = tbo - 1;
371
	if (tbo < 0) {
372
		/* must move up one line */
373
		clp = lback(clp);
374
		tdotline--;
375
		tbo = llength(clp);
376
	}
377
378
	/*
379
	 * Note this loop does not process the last line, but this editor
380
	 * always makes the last line empty so this is good.
381
	 */
382
	while (clp != (curbp->b_headp)) {
383
		regex_match[0].rm_so = 0;
384
		regex_match[0].rm_eo = llength(clp);
385
		lastmatch.rm_so = -1;
386
		/*
387
		 * Keep searching until we don't match any longer.  Assumes a
388
		 * non-match does not modify the regex_match array.  We have to
389
		 * do this character-by-character after the first match since
390
		 * POSIX regexps don't give you a way to do reverse matches.
391
		 */
392
		while (!regexec(&regex_buff, ltext(clp), RE_NMATCH, regex_match,
393
		    REG_STARTEND) && regex_match[0].rm_so < tbo) {
394
			memcpy(&lastmatch, &regex_match[0], sizeof(regmatch_t));
395
			regex_match[0].rm_so++;
396
			regex_match[0].rm_eo = llength(clp);
397
		}
398
		if (lastmatch.rm_so == -1) {
399
			clp = lback(clp);
400
			tdotline--;
401
			tbo = llength(clp);
402
		} else {
403
			memcpy(&regex_match[0], &lastmatch, sizeof(regmatch_t));
404
			curwp->w_doto = regex_match[0].rm_so;
405
			curwp->w_dotp = clp;
406
			curwp->w_dotline = tdotline;
407
			curwp->w_rflag |= WFMOVE;
408
			return (TRUE);
409
		}
410
	}
411
	return (FALSE);
412
}
413
414
/*
415
 * Read a pattern.
416
 * Stash it in the external variable "re_pat". The "pat" is
417
 * not updated if the user types in an empty line. If the user typed
418
 * an empty line, and there is no old pattern, it is an error.
419
 * Display the old pattern, in the style of Jeff Lomicka. There is
420
 * some do-it-yourself control expansion.
421
 */
422
static int
423
re_readpattern(char *re_prompt)
424
{
425
	static int	dofree = 0;
426
	int		flags, error, s;
427
	char		tpat[NPAT], *rep;
428
429
	if (re_pat[0] == '\0')
430
		rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, re_prompt);
431
	else
432
		rep = eread("%s (default %s): ", tpat, NPAT,
433
		    EFNUL | EFNEW | EFCR, re_prompt, re_pat);
434
	if (rep == NULL)
435
		return (ABORT);
436
	if (rep[0] != '\0') {
437
		/* New pattern given */
438
		(void)strlcpy(re_pat, tpat, sizeof(re_pat));
439
		if (casefoldsearch)
440
			flags = REG_EXTENDED | REG_ICASE;
441
		else
442
			flags = REG_EXTENDED;
443
		if (dofree)
444
			regfree(&regex_buff);
445
		error = regcomp(&regex_buff, re_pat, flags);
446
		if (error != 0) {
447
			char	message[256];
448
			regerror(error, &regex_buff, message, sizeof(message));
449
			dobeep();
450
			ewprintf("Regex Error: %s", message);
451
			re_pat[0] = '\0';
452
			return (FALSE);
453
		}
454
		dofree = 1;
455
		s = TRUE;
456
	} else if (rep[0] == '\0' && re_pat[0] != '\0')
457
		/* Just using old pattern */
458
		s = TRUE;
459
	else
460
		s = FALSE;
461
	return (s);
462
}
463
464
/*
465
 * Cause case to not matter in searches.  This is the default.	If called
466
 * with argument cause case to matter.
467
 */
468
/* ARGSUSED*/
469
int
470
setcasefold(int f, int n)
471
{
472
	if (f & FFARG) {
473
		casefoldsearch = FALSE;
474
		ewprintf("Case-fold-search unset");
475
	} else {
476
		casefoldsearch = TRUE;
477
		ewprintf("Case-fold-search set");
478
	}
479
480
	/*
481
	 * Invalidate the regular expression pattern since I'm too lazy to
482
	 * recompile it.
483
	 */
484
	re_pat[0] = '\0';
485
	return (TRUE);
486
}
487
488
/*
489
 * Delete all lines after dot that contain a string matching regex.
490
 */
491
/* ARGSUSED */
492
int
493
delmatchlines(int f, int n)
494
{
495
	int	s;
496
497
	if ((s = re_readpattern("Flush lines (containing match for regexp)"))
498
	    != TRUE)
499
		return (s);
500
501
	s = killmatches(TRUE);
502
	return (s);
503
}
504
505
/*
506
 * Delete all lines after dot that don't contain a string matching regex.
507
 */
508
/* ARGSUSED */
509
int
510
delnonmatchlines(int f, int n)
511
{
512
	int	s;
513
514
	if ((s = re_readpattern("Keep lines (containing match for regexp)"))
515
	    != TRUE)
516
		return (s);
517
518
	s = killmatches(FALSE);
519
	return (s);
520
}
521
522
/*
523
 * This function does the work of deleting matching lines.
524
 */
525
static int
526
killmatches(int cond)
527
{
528
	int	 s, error;
529
	int	 count = 0;
530
	struct line	*clp;
531
532
	clp = curwp->w_dotp;
533
	if (curwp->w_doto == llength(clp))
534
		/* Consider dot on next line */
535
		clp = lforw(clp);
536
537
	while (clp != (curbp->b_headp)) {
538
		/* see if line matches */
539
		regex_match[0].rm_so = 0;
540
		regex_match[0].rm_eo = llength(clp);
541
		error = regexec(&regex_buff, ltext(clp), RE_NMATCH, regex_match,
542
		    REG_STARTEND);
543
544
		/* Delete line when appropriate */
545
		if ((cond == FALSE && error) || (cond == TRUE && !error)) {
546
			curwp->w_doto = 0;
547
			curwp->w_dotp = clp;
548
			count++;
549
			s = ldelete(llength(clp) + 1, KNONE);
550
			clp = curwp->w_dotp;
551
			curwp->w_rflag |= WFMOVE;
552
			if (s == FALSE)
553
				return (FALSE);
554
		} else
555
			clp = lforw(clp);
556
	}
557
558
	ewprintf("%d line(s) deleted", count);
559
	if (count > 0)
560
		curwp->w_rflag |= WFMOVE;
561
562
	return (TRUE);
563
}
564
565
/*
566
 * Count lines matching regex.
567
 */
568
/* ARGSUSED */
569
int
570
cntmatchlines(int f, int n)
571
{
572
	int	s;
573
574
	if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
575
		return (s);
576
	s = countmatches(TRUE);
577
578
	return (s);
579
}
580
581
/*
582
 * Count lines that fail to match regex.
583
 */
584
/* ARGSUSED */
585
int
586
cntnonmatchlines(int f, int n)
587
{
588
	int	s;
589
590
	if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
591
		return (s);
592
	s = countmatches(FALSE);
593
594
	return (s);
595
}
596
597
/*
598
 * This function does the work of counting matching lines.
599
 */
600
int
601
countmatches(int cond)
602
{
603
	int	 error;
604
	int	 count = 0;
605
	struct line	*clp;
606
607
	clp = curwp->w_dotp;
608
	if (curwp->w_doto == llength(clp))
609
		/* Consider dot on next line */
610
		clp = lforw(clp);
611
612
	while (clp != (curbp->b_headp)) {
613
		/* see if line matches */
614
		regex_match[0].rm_so = 0;
615
		regex_match[0].rm_eo = llength(clp);
616
		error = regexec(&regex_buff, ltext(clp), RE_NMATCH, regex_match,
617
		    REG_STARTEND);
618
619
		/* Count line when appropriate */
620
		if ((cond == FALSE && error) || (cond == TRUE && !error))
621
			count++;
622
		clp = lforw(clp);
623
	}
624
625
	if (cond)
626
		ewprintf("Number of lines matching: %d", count);
627
	else
628
		ewprintf("Number of lines not matching: %d", count);
629
630
	return (TRUE);
631
}
632
#endif	/* REGEX */