GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/vi/build/../vi/v_word.c Lines: 0 167 0.0 %
Date: 2017-11-07 Branches: 0 288 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: v_word.c,v 1.7 2014/11/12 04:28:41 bentley Exp $	*/
2
3
/*-
4
 * Copyright (c) 1992, 1993, 1994
5
 *	The Regents of the University of California.  All rights reserved.
6
 * Copyright (c) 1992, 1993, 1994, 1995, 1996
7
 *	Keith Bostic.  All rights reserved.
8
 *
9
 * See the LICENSE file for redistribution information.
10
 */
11
12
#include "config.h"
13
14
#include <sys/types.h>
15
#include <sys/queue.h>
16
#include <sys/time.h>
17
18
#include <bitstring.h>
19
#include <ctype.h>
20
#include <limits.h>
21
#include <stdio.h>
22
23
#include "../common/common.h"
24
#include "vi.h"
25
26
/*
27
 * There are two types of "words".  Bigwords are easy -- groups of anything
28
 * delimited by whitespace.  Normal words are trickier.  They are either a
29
 * group of characters, numbers and underscores, or a group of anything but,
30
 * delimited by whitespace.  When for a word, if you're in whitespace, it's
31
 * easy, just remove the whitespace and go to the beginning or end of the
32
 * word.  Otherwise, figure out if the next character is in a different group.
33
 * If it is, go to the beginning or end of that group, otherwise, go to the
34
 * beginning or end of the current group.  The historic version of vi didn't
35
 * get this right, so, for example, there were cases where "4e" was not the
36
 * same as "eeee" -- in particular, single character words, and commands that
37
 * began in whitespace were almost always handled incorrectly.  To get it right
38
 * you have to resolve the cursor after each search so that the look-ahead to
39
 * figure out what type of "word" the cursor is in will be correct.
40
 *
41
 * Empty lines, and lines that consist of only white-space characters count
42
 * as a single word, and the beginning and end of the file counts as an
43
 * infinite number of words.
44
 *
45
 * Movements associated with commands are different than movement commands.
46
 * For example, in "abc  def", with the cursor on the 'a', "cw" is from
47
 * 'a' to 'c', while "w" is from 'a' to 'd'.  In general, trailing white
48
 * space is discarded from the change movement.  Another example is that,
49
 * in the same string, a "cw" on any white space character replaces that
50
 * single character, and nothing else.  Ain't nothin' in here that's easy.
51
 *
52
 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
53
 * would treat groups of empty lines as individual words, i.e. the command
54
 * would move the cursor to each new empty line.  The 'e' and 'E' commands
55
 * would treat groups of empty lines as a single word, i.e. the first use
56
 * would move past the group of lines.  The 'b' command would just beep at
57
 * you, or, if you did it from the start of the line as part of a motion
58
 * command, go absolutely nuts.  If the lines contained only white-space
59
 * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
60
 * 'b', 'E' and 'e' commands would treat the group as a single word, and
61
 * the 'B' and 'b' commands will treat the lines as individual words.  This
62
 * implementation treats all of these cases as a single white-space word.
63
 */
64
65
enum which {BIGWORD, LITTLEWORD};
66
67
static int bword(SCR *, VICMD *, enum which);
68
static int eword(SCR *, VICMD *, enum which);
69
static int fword(SCR *, VICMD *, enum which);
70
71
/*
72
 * v_wordW -- [count]W
73
 *	Move forward a bigword at a time.
74
 *
75
 * PUBLIC: int v_wordW(SCR *, VICMD *);
76
 */
77
int
78
v_wordW(SCR *sp, VICMD *vp)
79
{
80
	return (fword(sp, vp, BIGWORD));
81
}
82
83
/*
84
 * v_wordw -- [count]w
85
 *	Move forward a word at a time.
86
 *
87
 * PUBLIC: int v_wordw(SCR *, VICMD *);
88
 */
89
int
90
v_wordw(SCR *sp, VICMD *vp)
91
{
92
	return (fword(sp, vp, LITTLEWORD));
93
}
94
95
/*
96
 * fword --
97
 *	Move forward by words.
98
 */
99
static int
100
fword(SCR *sp, VICMD *vp, enum which type)
101
{
102
	enum { INWORD, NOTWORD } state;
103
	VCS cs;
104
	u_long cnt;
105
106
	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
107
	cs.cs_lno = vp->m_start.lno;
108
	cs.cs_cno = vp->m_start.cno;
109
	if (cs_init(sp, &cs))
110
		return (1);
111
112
	/*
113
	 * If in white-space:
114
	 *	If the count is 1, and it's a change command, we're done.
115
	 *	Else, move to the first non-white-space character, which
116
	 *	counts as a single word move.  If it's a motion command,
117
	 *	don't move off the end of the line.
118
	 */
119
	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && isblank(cs.cs_ch))) {
120
		if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
121
			if (ISCMD(vp->rkp, 'c'))
122
				return (0);
123
			if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
124
				if (cs_fspace(sp, &cs))
125
					return (1);
126
				goto ret;
127
			}
128
		}
129
		if (cs_fblank(sp, &cs))
130
			return (1);
131
		--cnt;
132
	}
133
134
	/*
135
	 * Cyclically move to the next word -- this involves skipping
136
	 * over word characters and then any trailing non-word characters.
137
	 * Note, for the 'w' command, the definition of a word keeps
138
	 * switching.
139
	 */
140
	if (type == BIGWORD)
141
		while (cnt--) {
142
			for (;;) {
143
				if (cs_next(sp, &cs))
144
					return (1);
145
				if (cs.cs_flags == CS_EOF)
146
					goto ret;
147
				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
148
					break;
149
			}
150
			/*
151
			 * If a motion command and we're at the end of the
152
			 * last word, we're done.  Delete and yank eat any
153
			 * trailing blanks, but we don't move off the end
154
			 * of the line regardless.
155
			 */
156
			if (cnt == 0 && ISMOTION(vp)) {
157
				if ((ISCMD(vp->rkp, 'd') ||
158
				    ISCMD(vp->rkp, 'y')) &&
159
				    cs_fspace(sp, &cs))
160
					return (1);
161
				break;
162
			}
163
164
			/* Eat whitespace characters. */
165
			if (cs_fblank(sp, &cs))
166
				return (1);
167
			if (cs.cs_flags == CS_EOF)
168
				goto ret;
169
		}
170
	else
171
		while (cnt--) {
172
			state = cs.cs_flags == 0 &&
173
			    inword(cs.cs_ch) ? INWORD : NOTWORD;
174
			for (;;) {
175
				if (cs_next(sp, &cs))
176
					return (1);
177
				if (cs.cs_flags == CS_EOF)
178
					goto ret;
179
				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
180
					break;
181
				if (state == INWORD) {
182
					if (!inword(cs.cs_ch))
183
						break;
184
				} else
185
					if (inword(cs.cs_ch))
186
						break;
187
			}
188
			/* See comment above. */
189
			if (cnt == 0 && ISMOTION(vp)) {
190
				if ((ISCMD(vp->rkp, 'd') ||
191
				    ISCMD(vp->rkp, 'y')) &&
192
				    cs_fspace(sp, &cs))
193
					return (1);
194
				break;
195
			}
196
197
			/* Eat whitespace characters. */
198
			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
199
				if (cs_fblank(sp, &cs))
200
					return (1);
201
			if (cs.cs_flags == CS_EOF)
202
				goto ret;
203
		}
204
205
	/*
206
	 * If we didn't move, we must be at EOF.
207
	 *
208
	 * !!!
209
	 * That's okay for motion commands, however.
210
	 */
211
ret:	if (!ISMOTION(vp) &&
212
	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
213
		v_eof(sp, &vp->m_start);
214
		return (1);
215
	}
216
217
	/* Adjust the end of the range for motion commands. */
218
	vp->m_stop.lno = cs.cs_lno;
219
	vp->m_stop.cno = cs.cs_cno;
220
	if (ISMOTION(vp) && cs.cs_flags == 0)
221
		--vp->m_stop.cno;
222
223
	/*
224
	 * Non-motion commands move to the end of the range.  Delete
225
	 * and yank stay at the start, ignore others.
226
	 */
227
	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
228
	return (0);
229
}
230
231
/*
232
 * v_wordE -- [count]E
233
 *	Move forward to the end of the bigword.
234
 *
235
 * PUBLIC: int v_wordE(SCR *, VICMD *);
236
 */
237
int
238
v_wordE(SCR *sp, VICMD *vp)
239
{
240
	return (eword(sp, vp, BIGWORD));
241
}
242
243
/*
244
 * v_worde -- [count]e
245
 *	Move forward to the end of the word.
246
 *
247
 * PUBLIC: int v_worde(SCR *, VICMD *);
248
 */
249
int
250
v_worde(SCR *sp, VICMD *vp)
251
{
252
	return (eword(sp, vp, LITTLEWORD));
253
}
254
255
/*
256
 * eword --
257
 *	Move forward to the end of the word.
258
 */
259
static int
260
eword(SCR *sp, VICMD *vp, enum which type)
261
{
262
	enum { INWORD, NOTWORD } state;
263
	VCS cs;
264
	u_long cnt;
265
266
	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
267
	cs.cs_lno = vp->m_start.lno;
268
	cs.cs_cno = vp->m_start.cno;
269
	if (cs_init(sp, &cs))
270
		return (1);
271
272
	/*
273
	 * !!!
274
	 * If in whitespace, or the next character is whitespace, move past
275
	 * it.  (This doesn't count as a word move.)  Stay at the character
276
	 * past the current one, it sets word "state" for the 'e' command.
277
	 */
278
	if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
279
		if (cs_next(sp, &cs))
280
			return (1);
281
		if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
282
			goto start;
283
	}
284
	if (cs_fblank(sp, &cs))
285
		return (1);
286
287
	/*
288
	 * Cyclically move to the next word -- this involves skipping
289
	 * over word characters and then any trailing non-word characters.
290
	 * Note, for the 'e' command, the definition of a word keeps
291
	 * switching.
292
	 */
293
start:	if (type == BIGWORD)
294
		while (cnt--) {
295
			for (;;) {
296
				if (cs_next(sp, &cs))
297
					return (1);
298
				if (cs.cs_flags == CS_EOF)
299
					goto ret;
300
				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
301
					break;
302
			}
303
			/*
304
			 * When we reach the start of the word after the last
305
			 * word, we're done.  If we changed state, back up one
306
			 * to the end of the previous word.
307
			 */
308
			if (cnt == 0) {
309
				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
310
					return (1);
311
				break;
312
			}
313
314
			/* Eat whitespace characters. */
315
			if (cs_fblank(sp, &cs))
316
				return (1);
317
			if (cs.cs_flags == CS_EOF)
318
				goto ret;
319
		}
320
	else
321
		while (cnt--) {
322
			state = cs.cs_flags == 0 &&
323
			    inword(cs.cs_ch) ? INWORD : NOTWORD;
324
			for (;;) {
325
				if (cs_next(sp, &cs))
326
					return (1);
327
				if (cs.cs_flags == CS_EOF)
328
					goto ret;
329
				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
330
					break;
331
				if (state == INWORD) {
332
					if (!inword(cs.cs_ch))
333
						break;
334
				} else
335
					if (inword(cs.cs_ch))
336
						break;
337
			}
338
			/* See comment above. */
339
			if (cnt == 0) {
340
				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
341
					return (1);
342
				break;
343
			}
344
345
			/* Eat whitespace characters. */
346
			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
347
				if (cs_fblank(sp, &cs))
348
					return (1);
349
			if (cs.cs_flags == CS_EOF)
350
				goto ret;
351
		}
352
353
	/*
354
	 * If we didn't move, we must be at EOF.
355
	 *
356
	 * !!!
357
	 * That's okay for motion commands, however.
358
	 */
359
ret:	if (!ISMOTION(vp) &&
360
	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
361
		v_eof(sp, &vp->m_start);
362
		return (1);
363
	}
364
365
	/* Set the end of the range for motion commands. */
366
	vp->m_stop.lno = cs.cs_lno;
367
	vp->m_stop.cno = cs.cs_cno;
368
369
	/*
370
	 * Non-motion commands move to the end of the range.
371
	 * Delete and yank stay at the start, ignore others.
372
	 */
373
	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
374
	return (0);
375
}
376
377
/*
378
 * v_WordB -- [count]B
379
 *	Move backward a bigword at a time.
380
 *
381
 * PUBLIC: int v_wordB(SCR *, VICMD *);
382
 */
383
int
384
v_wordB(SCR *sp, VICMD *vp)
385
{
386
	return (bword(sp, vp, BIGWORD));
387
}
388
389
/*
390
 * v_wordb -- [count]b
391
 *	Move backward a word at a time.
392
 *
393
 * PUBLIC: int v_wordb(SCR *, VICMD *);
394
 */
395
int
396
v_wordb(SCR *sp, VICMD *vp)
397
{
398
	return (bword(sp, vp, LITTLEWORD));
399
}
400
401
/*
402
 * bword --
403
 *	Move backward by words.
404
 */
405
static int
406
bword(SCR *sp, VICMD *vp, enum which type)
407
{
408
	enum { INWORD, NOTWORD } state;
409
	VCS cs;
410
	u_long cnt;
411
412
	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
413
	cs.cs_lno = vp->m_start.lno;
414
	cs.cs_cno = vp->m_start.cno;
415
	if (cs_init(sp, &cs))
416
		return (1);
417
418
	/*
419
	 * !!!
420
	 * If in whitespace, or the previous character is whitespace, move
421
	 * past it.  (This doesn't count as a word move.)  Stay at the
422
	 * character before the current one, it sets word "state" for the
423
	 * 'b' command.
424
	 */
425
	if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
426
		if (cs_prev(sp, &cs))
427
			return (1);
428
		if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
429
			goto start;
430
	}
431
	if (cs_bblank(sp, &cs))
432
		return (1);
433
434
	/*
435
	 * Cyclically move to the beginning of the previous word -- this
436
	 * involves skipping over word characters and then any trailing
437
	 * non-word characters.  Note, for the 'b' command, the definition
438
	 * of a word keeps switching.
439
	 */
440
start:	if (type == BIGWORD)
441
		while (cnt--) {
442
			for (;;) {
443
				if (cs_prev(sp, &cs))
444
					return (1);
445
				if (cs.cs_flags == CS_SOF)
446
					goto ret;
447
				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
448
					break;
449
			}
450
			/*
451
			 * When we reach the end of the word before the last
452
			 * word, we're done.  If we changed state, move forward
453
			 * one to the end of the next word.
454
			 */
455
			if (cnt == 0) {
456
				if (cs.cs_flags == 0 && cs_next(sp, &cs))
457
					return (1);
458
				break;
459
			}
460
461
			/* Eat whitespace characters. */
462
			if (cs_bblank(sp, &cs))
463
				return (1);
464
			if (cs.cs_flags == CS_SOF)
465
				goto ret;
466
		}
467
	else
468
		while (cnt--) {
469
			state = cs.cs_flags == 0 &&
470
			    inword(cs.cs_ch) ? INWORD : NOTWORD;
471
			for (;;) {
472
				if (cs_prev(sp, &cs))
473
					return (1);
474
				if (cs.cs_flags == CS_SOF)
475
					goto ret;
476
				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
477
					break;
478
				if (state == INWORD) {
479
					if (!inword(cs.cs_ch))
480
						break;
481
				} else
482
					if (inword(cs.cs_ch))
483
						break;
484
			}
485
			/* See comment above. */
486
			if (cnt == 0) {
487
				if (cs.cs_flags == 0 && cs_next(sp, &cs))
488
					return (1);
489
				break;
490
			}
491
492
			/* Eat whitespace characters. */
493
			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
494
				if (cs_bblank(sp, &cs))
495
					return (1);
496
			if (cs.cs_flags == CS_SOF)
497
				goto ret;
498
		}
499
500
	/* If we didn't move, we must be at SOF. */
501
ret:	if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
502
		v_sof(sp, &vp->m_start);
503
		return (1);
504
	}
505
506
	/* Set the end of the range for motion commands. */
507
	vp->m_stop.lno = cs.cs_lno;
508
	vp->m_stop.cno = cs.cs_cno;
509
510
	/*
511
	 * All commands move to the end of the range.  Motion commands
512
	 * adjust the starting point to the character before the current
513
	 * one.
514
	 *
515
	 * !!!
516
	 * The historic vi didn't get this right -- the `yb' command yanked
517
	 * the right stuff and even updated the cursor value, but the cursor
518
	 * was not actually updated on the screen.
519
	 */
520
	vp->m_final = vp->m_stop;
521
	if (ISMOTION(vp))
522
		--vp->m_start.cno;
523
	return (0);
524
}