GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: lib/libexpat/lib/xmltok_impl.c Lines: 679 796 85.3 %
Date: 2017-11-07 Branches: 1421 4097 34.7 %

Line Branch Exec Source
1
/* This file is included!
2
                            __  __            _
3
                         ___\ \/ /_ __   __ _| |_
4
                        / _ \\  /| '_ \ / _` | __|
5
                       |  __//  \| |_) | (_| | |_
6
                        \___/_/\_\ .__/ \__,_|\__|
7
                                 |_| XML parser
8
9
   Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10
   Copyright (c) 2000-2017 Expat development team
11
   Licensed under the MIT license:
12
13
   Permission is  hereby granted,  free of charge,  to any  person obtaining
14
   a  copy  of  this  software   and  associated  documentation  files  (the
15
   "Software"),  to  deal in  the  Software  without restriction,  including
16
   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17
   distribute, sublicense, and/or sell copies of the Software, and to permit
18
   persons  to whom  the Software  is  furnished to  do so,  subject to  the
19
   following conditions:
20
21
   The above copyright  notice and this permission notice  shall be included
22
   in all copies or substantial portions of the Software.
23
24
   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25
   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27
   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28
   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29
   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30
   USE OR OTHER DEALINGS IN THE SOFTWARE.
31
*/
32
33
#ifdef XML_TOK_IMPL_C
34
35
#ifndef IS_INVALID_CHAR
36
#define IS_INVALID_CHAR(enc, ptr, n) (0)
37
#endif
38
39
#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
40
    case BT_LEAD ## n: \
41
      if (end - ptr < n) \
42
        return XML_TOK_PARTIAL_CHAR; \
43
      if (IS_INVALID_CHAR(enc, ptr, n)) { \
44
        *(nextTokPtr) = (ptr); \
45
        return XML_TOK_INVALID; \
46
      } \
47
      ptr += n; \
48
      break;
49
50
#define INVALID_CASES(ptr, nextTokPtr) \
51
  INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
52
  INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
53
  INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
54
  case BT_NONXML: \
55
  case BT_MALFORM: \
56
  case BT_TRAIL: \
57
    *(nextTokPtr) = (ptr); \
58
    return XML_TOK_INVALID;
59
60
#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
61
   case BT_LEAD ## n: \
62
     if (end - ptr < n) \
63
       return XML_TOK_PARTIAL_CHAR; \
64
     if (!IS_NAME_CHAR(enc, ptr, n)) { \
65
       *nextTokPtr = ptr; \
66
       return XML_TOK_INVALID; \
67
     } \
68
     ptr += n; \
69
     break;
70
71
#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
72
  case BT_NONASCII: \
73
    if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
74
      *nextTokPtr = ptr; \
75
      return XML_TOK_INVALID; \
76
    } \
77
  case BT_NMSTRT: \
78
  case BT_HEX: \
79
  case BT_DIGIT: \
80
  case BT_NAME: \
81
  case BT_MINUS: \
82
    ptr += MINBPC(enc); \
83
    break; \
84
  CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
85
  CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
86
  CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
87
88
#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
89
   case BT_LEAD ## n: \
90
     if (end - ptr < n) \
91
       return XML_TOK_PARTIAL_CHAR; \
92
     if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
93
       *nextTokPtr = ptr; \
94
       return XML_TOK_INVALID; \
95
     } \
96
     ptr += n; \
97
     break;
98
99
#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
100
  case BT_NONASCII: \
101
    if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
102
      *nextTokPtr = ptr; \
103
      return XML_TOK_INVALID; \
104
    } \
105
  case BT_NMSTRT: \
106
  case BT_HEX: \
107
    ptr += MINBPC(enc); \
108
    break; \
109
  CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
110
  CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
111
  CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
112
113
#ifndef PREFIX
114
#define PREFIX(ident) ident
115
#endif
116
117
118
#define HAS_CHARS(enc, ptr, end, count) \
119
    (end - ptr >= count * MINBPC(enc))
120
121
#define HAS_CHAR(enc, ptr, end) \
122
    HAS_CHARS(enc, ptr, end, 1)
123
124
#define REQUIRE_CHARS(enc, ptr, end, count) \
125
    { \
126
      if (! HAS_CHARS(enc, ptr, end, count)) { \
127
        return XML_TOK_PARTIAL; \
128
      } \
129
    }
130
131
#define REQUIRE_CHAR(enc, ptr, end) \
132
    REQUIRE_CHARS(enc, ptr, end, 1)
133
134
135
/* ptr points to character following "<!-" */
136
137
static int PTRCALL
138
PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
139
                    const char *end, const char **nextTokPtr)
140
{
141

511320
  if (HAS_CHAR(enc, ptr, end)) {
142


256010
    if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
143
      *nextTokPtr = ptr;
144
      return XML_TOK_INVALID;
145
    }
146
255430
    ptr += MINBPC(enc);
147

82925530
    while (HAS_CHAR(enc, ptr, end)) {
148







82673020
      switch (BYTE_TYPE(enc, ptr)) {
149
      INVALID_CASES(ptr, nextTokPtr)
150
      case BT_MINUS:
151
1700670
        ptr += MINBPC(enc);
152

1700900
        REQUIRE_CHAR(enc, ptr, end);
153


1700500
        if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
154
250440
          ptr += MINBPC(enc);
155

250670
          REQUIRE_CHAR(enc, ptr, end);
156


250230
          if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
157
            *nextTokPtr = ptr;
158
            return XML_TOK_INVALID;
159
          }
160
250210
          *nextTokPtr = ptr + MINBPC(enc);
161
250210
          return XML_TOK_COMMENT;
162
        }
163
        break;
164
      default:
165
80964670
        ptr += MINBPC(enc);
166
80964670
        break;
167
      }
168
    }
169
  }
170
4990
  return XML_TOK_PARTIAL;
171
255660
}
172
173
/* ptr points to character following "<!" */
174
175
static int PTRCALL
176
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
177
                 const char *end, const char **nextTokPtr)
178
{
179

878212
  REQUIRE_CHAR(enc, ptr, end);
180




415398
  switch (BYTE_TYPE(enc, ptr)) {
181
  case BT_MINUS:
182
204790
    return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
183
  case BT_LSQB:
184
70
    *nextTokPtr = ptr + MINBPC(enc);
185
70
    return XML_TOK_COND_SECT_OPEN;
186
  case BT_NMSTRT:
187
  case BT_HEX:
188
204438
    ptr += MINBPC(enc);
189
    break;
190
  default:
191
    *nextTokPtr = ptr;
192
    return XML_TOK_INVALID;
193
  }
194

2030752
  while (HAS_CHAR(enc, ptr, end)) {
195






898300
    switch (BYTE_TYPE(enc, ptr)) {
196
    case BT_PERCNT:
197
      REQUIRE_CHARS(enc, ptr, end, 2);
198
      /* don't allow <!ENTITY% foo "whatever"> */
199
      switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
200
      case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
201
        *nextTokPtr = ptr;
202
        return XML_TOK_INVALID;
203
      }
204
      /* fall through */
205
    case BT_S: case BT_CR: case BT_LF:
206
72522
      *nextTokPtr = ptr;
207
72522
      return XML_TOK_DECL_OPEN;
208
    case BT_NMSTRT:
209
    case BT_HEX:
210
810938
      ptr += MINBPC(enc);
211
      break;
212
    default:
213
      *nextTokPtr = ptr;
214
      return XML_TOK_INVALID;
215
    }
216
  }
217
131916
  return XML_TOK_PARTIAL;
218
429170
}
219
220
static int PTRCALL
221
PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
222
                      const char *end, int *tokPtr)
223
{
224
  int upper = 0;
225
708980
  *tokPtr = XML_TOK_PI;
226

354490
  if (end - ptr != MINBPC(enc)*3)
227
113190
    return 1;
228



280800
  switch (BYTE_TO_ASCII(enc, ptr)) {
229
  case ASCII_x:
230
    break;
231
  case ASCII_X:
232
    upper = 1;
233
    break;
234
  default:
235
220
    return 1;
236
  }
237
241080
  ptr += MINBPC(enc);
238



280580
  switch (BYTE_TO_ASCII(enc, ptr)) {
239
  case ASCII_m:
240
    break;
241
  case ASCII_M:
242
    upper = 1;
243
    break;
244
  default:
245
200
    return 1;
246
  }
247
240880
  ptr += MINBPC(enc);
248



280380
  switch (BYTE_TO_ASCII(enc, ptr)) {
249
  case ASCII_l:
250
    break;
251
  case ASCII_L:
252
    upper = 1;
253
    break;
254
  default:
255
230
    return 1;
256
  }
257

240650
  if (upper)
258
    return 0;
259
240650
  *tokPtr = XML_TOK_XML_DECL;
260
240650
  return 1;
261
354490
}
262
263
/* ptr points to character following "<?" */
264
265
static int PTRCALL
266
PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
267
               const char *end, const char **nextTokPtr)
268
{
269
735700
  int tok;
270
  const char *target = ptr;
271

371220
  REQUIRE_CHAR(enc, ptr, end);
272






408000
  switch (BYTE_TYPE(enc, ptr)) {
273







364620
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
274
  default:
275
    *nextTokPtr = ptr;
276
    return XML_TOK_INVALID;
277
  }
278

1756560
  while (HAS_CHAR(enc, ptr, end)) {
279











1806000
    switch (BYTE_TYPE(enc, ptr)) {
280







1329090
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
281
    case BT_S: case BT_CR: case BT_LF:
282

354390
      if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
283
        *nextTokPtr = ptr;
284
        return XML_TOK_INVALID;
285
      }
286
354390
      ptr += MINBPC(enc);
287

88001620
      while (HAS_CHAR(enc, ptr, end)) {
288







88035480
        switch (BYTE_TYPE(enc, ptr)) {
289






110
        INVALID_CASES(ptr, nextTokPtr)
290
        case BT_QUEST:
291
106470
          ptr += MINBPC(enc);
292

109780
          REQUIRE_CHAR(enc, ptr, end);
293


103470
          if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
294
103160
            *nextTokPtr = ptr + MINBPC(enc);
295
103160
            return tok;
296
          }
297
          break;
298
        default:
299
87292810
          ptr += MINBPC(enc);
300
87292810
          break;
301
        }
302
      }
303
247910
      return XML_TOK_PARTIAL;
304
    case BT_QUEST:
305

100
      if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
306
        *nextTokPtr = ptr;
307
        return XML_TOK_INVALID;
308
      }
309
100
      ptr += MINBPC(enc);
310

160
      REQUIRE_CHAR(enc, ptr, end);
311


60
      if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
312
40
        *nextTokPtr = ptr + MINBPC(enc);
313
40
        return tok;
314
      }
315
      /* fall through */
316
    default:
317
      *nextTokPtr = ptr;
318
      return XML_TOK_INVALID;
319
    }
320
  }
321
9990
  return XML_TOK_PARTIAL;
322
367850
}
323
324
static int PTRCALL
325
PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
326
                         const char *end, const char **nextTokPtr)
327
{
328
  static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
329
                                     ASCII_T, ASCII_A, ASCII_LSQB };
330
  int i;
331
  /* CDATA[ */
332

11300
  REQUIRE_CHARS(enc, ptr, end, 6);
333

4140
  for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
334


2520
    if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
335
40
      *nextTokPtr = ptr;
336
40
      return XML_TOK_INVALID;
337
    }
338
  }
339
270
  *nextTokPtr = ptr;
340
270
  return XML_TOK_CDATA_SECT_OPEN;
341
3870
}
342
343
static int PTRCALL
344
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
345
                        const char *end, const char **nextTokPtr)
346
{
347

9000
  if (ptr >= end)
348
1940
    return XML_TOK_NONE;
349
  if (MINBPC(enc) > 1) {
350
710
    size_t n = end - ptr;
351

710
    if (n & (MINBPC(enc) - 1)) {
352
360
      n &= ~(MINBPC(enc) - 1);
353

360
      if (n == 0)
354
260
        return XML_TOK_PARTIAL;
355
100
      end = ptr + n;
356
100
    }
357

450
  }
358








3200
  switch (BYTE_TYPE(enc, ptr)) {
359
  case BT_RSQB:
360
480
    ptr += MINBPC(enc);
361

660
    REQUIRE_CHAR(enc, ptr, end);
362


400
    if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
363
      break;
364
270
    ptr += MINBPC(enc);
365

420
    REQUIRE_CHAR(enc, ptr, end);
366


160
    if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
367
10
      ptr -= MINBPC(enc);
368
10
      break;
369
    }
370
110
    *nextTokPtr = ptr + MINBPC(enc);
371
110
    return XML_TOK_CDATA_SECT_CLOSE;
372
  case BT_CR:
373
    ptr += MINBPC(enc);
374
    REQUIRE_CHAR(enc, ptr, end);
375
    if (BYTE_TYPE(enc, ptr) == BT_LF)
376
      ptr += MINBPC(enc);
377
    *nextTokPtr = ptr;
378
    return XML_TOK_DATA_NEWLINE;
379
  case BT_LF:
380
10
    *nextTokPtr = ptr + MINBPC(enc);
381
10
    return XML_TOK_DATA_NEWLINE;
382






400
  INVALID_CASES(ptr, nextTokPtr)
383
  default:
384
1620
    ptr += MINBPC(enc);
385
1620
    break;
386
  }
387

12130
  while (HAS_CHAR(enc, ptr, end)) {
388








31240
    switch (BYTE_TYPE(enc, ptr)) {
389
#define LEAD_CASE(n) \
390
    case BT_LEAD ## n: \
391
      if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
392
        *nextTokPtr = ptr; \
393
        return XML_TOK_DATA_CHARS; \
394
      } \
395
      ptr += n; \
396
      break;
397






20
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
398
#undef LEAD_CASE
399
    case BT_NONXML:
400
    case BT_MALFORM:
401
    case BT_TRAIL:
402
    case BT_CR:
403
    case BT_LF:
404
    case BT_RSQB:
405
20
      *nextTokPtr = ptr;
406
20
      return XML_TOK_DATA_CHARS;
407
    default:
408
10410
      ptr += MINBPC(enc);
409
10410
      break;
410
    }
411
  }
412
1690
  *nextTokPtr = ptr;
413
1690
  return XML_TOK_DATA_CHARS;
414
4500
}
415
416
/* ptr points to character following "</" */
417
418
static int PTRCALL
419
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
420
                   const char *end, const char **nextTokPtr)
421
{
422

32639310
  REQUIRE_CHAR(enc, ptr, end);
423






16317550
  switch (BYTE_TYPE(enc, ptr)) {
424







16316420
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
425
  default:
426
    *nextTokPtr = ptr;
427
    return XML_TOK_INVALID;
428
  }
429

138912050
  while (HAS_CHAR(enc, ptr, end)) {
430












138848540
    switch (BYTE_TYPE(enc, ptr)) {
431







122593980
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
432
    case BT_S: case BT_CR: case BT_LF:
433

40
      for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
434




10
        switch (BYTE_TYPE(enc, ptr)) {
435
        case BT_S: case BT_CR: case BT_LF:
436
          break;
437
        case BT_GT:
438
10
          *nextTokPtr = ptr + MINBPC(enc);
439
10
          return XML_TOK_END_TAG;
440
        default:
441
          *nextTokPtr = ptr;
442
          return XML_TOK_INVALID;
443
        }
444
      }
445
10
      return XML_TOK_PARTIAL;
446
#ifdef XML_NS
447
    case BT_COLON:
448
      /* no need to check qname syntax here,
449
         since end-tag must match exactly */
450
1740
      ptr += MINBPC(enc);
451
1740
      break;
452
#endif
453
    case BT_GT:
454
16252190
      *nextTokPtr = ptr + MINBPC(enc);
455
16252190
      return XML_TOK_END_TAG;
456
    default:
457
      *nextTokPtr = ptr;
458
      return XML_TOK_INVALID;
459
    }
460
  }
461
64040
  return XML_TOK_PARTIAL;
462
16318540
}
463
464
/* ptr points to character following "&#X" */
465
466
static int PTRCALL
467
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
468
                       const char *end, const char **nextTokPtr)
469
{
470

7360
  if (HAS_CHAR(enc, ptr, end)) {
471


3900
    switch (BYTE_TYPE(enc, ptr)) {
472
    case BT_DIGIT:
473
    case BT_HEX:
474
      break;
475
    default:
476
10
      *nextTokPtr = ptr;
477
10
      return XML_TOK_INVALID;
478
    }
479

19440
    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
480




10160
      switch (BYTE_TYPE(enc, ptr)) {
481
      case BT_DIGIT:
482
      case BT_HEX:
483
        break;
484
      case BT_SEMI:
485
2330
        *nextTokPtr = ptr + MINBPC(enc);
486
2330
        return XML_TOK_CHAR_REF;
487
      default:
488
        *nextTokPtr = ptr;
489
        return XML_TOK_INVALID;
490
      }
491
    }
492
  }
493
1340
  return XML_TOK_PARTIAL;
494
3680
}
495
496
/* ptr points to character following "&#" */
497
498
static int PTRCALL
499
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
500
                    const char *end, const char **nextTokPtr)
501
{
502

21880
  if (HAS_CHAR(enc, ptr, end)) {
503


11170
    if (CHAR_MATCHES(enc, ptr, ASCII_x))
504
3680
      return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
505


7770
    switch (BYTE_TYPE(enc, ptr)) {
506
    case BT_DIGIT:
507
      break;
508
    default:
509
      *nextTokPtr = ptr;
510
      return XML_TOK_INVALID;
511
    }
512

37260
    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
513



20720
      switch (BYTE_TYPE(enc, ptr)) {
514
      case BT_DIGIT:
515
        break;
516
      case BT_SEMI:
517
5540
        *nextTokPtr = ptr + MINBPC(enc);
518
5540
        return XML_TOK_CHAR_REF;
519
      default:
520
        *nextTokPtr = ptr;
521
        return XML_TOK_INVALID;
522
      }
523
    }
524
  }
525
1720
  return XML_TOK_PARTIAL;
526
10940
}
527
528
/* ptr points to character following "&" */
529
530
static int PTRCALL
531
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
532
                const char **nextTokPtr)
533
{
534

10158966
  REQUIRE_CHAR(enc, ptr, end);
535







5076422
  switch (BYTE_TYPE(enc, ptr)) {
536







5062762
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
537
  case BT_NUM:
538
10940
    return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
539
  default:
540
    *nextTokPtr = ptr;
541
    return XML_TOK_INVALID;
542
  }
543

36567770
  while (HAS_CHAR(enc, ptr, end)) {
544









36509302
    switch (BYTE_TYPE(enc, ptr)) {
545







31504198
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
546
    case BT_SEMI:
547
5003984
      *nextTokPtr = ptr + MINBPC(enc);
548
5003984
      return XML_TOK_ENTITY_REF;
549
    default:
550
      *nextTokPtr = ptr;
551
      return XML_TOK_INVALID;
552
    }
553
  }
554
58748
  return XML_TOK_PARTIAL;
555
5077546
}
556
557
/* ptr points to character following first character of attribute name */
558
559
static int PTRCALL
560
PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
561
                 const char **nextTokPtr)
562
{
563
#ifdef XML_NS
564
  int hadColon = 0;
565
#endif
566

1108606810
  while (HAS_CHAR(enc, ptr, end)) {
567












1100963760
    switch (BYTE_TYPE(enc, ptr)) {
568







1091064550
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
569
#ifdef XML_NS
570
    case BT_COLON:
571

2904170
      if (hadColon) {
572
10
        *nextTokPtr = ptr;
573
10
        return XML_TOK_INVALID;
574
      }
575
      hadColon = 1;
576
2904160
      ptr += MINBPC(enc);
577

2908240
      REQUIRE_CHAR(enc, ptr, end);
578






2901950
      switch (BYTE_TYPE(enc, ptr)) {
579







2900220
      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
580
      default:
581
10
        *nextTokPtr = ptr;
582
10
        return XML_TOK_INVALID;
583
      }
584
860
      break;
585
#endif
586
    case BT_S: case BT_CR: case BT_LF:
587
      for (;;) {
588
        int t;
589
590
2300000
        ptr += MINBPC(enc);
591

2300000
        REQUIRE_CHAR(enc, ptr, end);
592

2300000
        t = BYTE_TYPE(enc, ptr);
593

2300000
        if (t == BT_EQUALS)
594
2250000
          break;
595



50000
        switch (t) {
596
        case BT_S:
597
        case BT_LF:
598
        case BT_CR:
599
          break;
600
        default:
601
          *nextTokPtr = ptr;
602
          return XML_TOK_INVALID;
603
        }
604


50000
      }
605
    /* fall through */
606
    case BT_EQUALS:
607
      {
608
        int open;
609
#ifdef XML_NS
610
        hadColon = 0;
611
#endif
612
6980480
        for (;;) {
613
9680480
          ptr += MINBPC(enc);
614

9687280
          REQUIRE_CHAR(enc, ptr, end);
615

9677440
          open = BYTE_TYPE(enc, ptr);
616

9673680
          if (open == BT_QUOT || open == BT_APOS)
617
            break;
618



2700000
          switch (open) {
619
          case BT_S:
620
          case BT_LF:
621
          case BT_CR:
622
            break;
623
          default:
624
            *nextTokPtr = ptr;
625
            return XML_TOK_INVALID;
626
          }
627
        }
628
6973680
        ptr += MINBPC(enc);
629
        /* in attribute value */
630
6973680
        for (;;) {
631
          int t;
632

729106390
          REQUIRE_CHAR(enc, ptr, end);
633

727248510
          t = BYTE_TYPE(enc, ptr);
634

727240190
          if (t == open)
635
6037720
            break;
636






721202470
          switch (t) {
637






50
          INVALID_CASES(ptr, nextTokPtr)
638
          case BT_AMP:
639
            {
640
10850
              int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
641

10850
              if (tok <= 0) {
642

2840
                if (tok == XML_TOK_INVALID)
643
                  *nextTokPtr = ptr;
644
2840
                return tok;
645
              }
646

8010
              break;
647
            }
648
          case BT_LT:
649
            *nextTokPtr = ptr;
650
            return XML_TOK_INVALID;
651
          default:
652
721191600
            ptr += MINBPC(enc);
653
721191600
            break;
654
          }
655


721199610
        }
656
6037720
        ptr += MINBPC(enc);
657

6044500
        REQUIRE_CHAR(enc, ptr, end);
658





6032060
        switch (BYTE_TYPE(enc, ptr)) {
659
        case BT_S:
660
        case BT_CR:
661
        case BT_LF:
662
          break;
663
        case BT_SOL:
664
          goto sol;
665
        case BT_GT:
666
          goto gt;
667
        default:
668
          *nextTokPtr = ptr;
669
          return XML_TOK_INVALID;
670
        }
671
        /* ptr points to closing quote */
672
        for (;;) {
673
2780830
          ptr += MINBPC(enc);
674

2785860
          REQUIRE_CHAR(enc, ptr, end);
675










2776690
          switch (BYTE_TYPE(enc, ptr)) {
676







2471320
          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
677
          case BT_S: case BT_CR: case BT_LF:
678
            continue;
679
          case BT_GT:
680
          gt:
681
3553280
            *nextTokPtr = ptr + MINBPC(enc);
682
3553280
            return XML_TOK_START_TAG_WITH_ATTS;
683
          case BT_SOL:
684
          sol:
685
1460
            ptr += MINBPC(enc);
686

2210
            REQUIRE_CHAR(enc, ptr, end);
687


750
            if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
688
10
              *nextTokPtr = ptr;
689
10
              return XML_TOK_INVALID;
690
            }
691
700
            *nextTokPtr = ptr + MINBPC(enc);
692
700
            return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
693
          default:
694
            *nextTokPtr = ptr;
695
            return XML_TOK_INVALID;
696
          }
697
          break;
698
        }
699
2471170
        break;
700
      }
701
    default:
702
      *nextTokPtr = ptr;
703
      return XML_TOK_INVALID;
704
    }
705
  }
706
1572230
  return XML_TOK_PARTIAL;
707
6085640
}
708
709
/* ptr points to character following "<" */
710
711
static int PTRCALL
712
PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
713
               const char **nextTokPtr)
714
{
715
#ifdef XML_NS
716
  int hadColon;
717
#endif
718

72465732
  REQUIRE_CHAR(enc, ptr, end);
719








36243386
  switch (BYTE_TYPE(enc, ptr)) {
720







19849116
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
721
  case BT_EXCL:
722
55560
    ptr += MINBPC(enc);
723

56370
    REQUIRE_CHAR(enc, ptr, end);
724



59190
    switch (BYTE_TYPE(enc, ptr)) {
725
    case BT_MINUS:
726
50870
      return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
727
    case BT_LSQB:
728
3870
      return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
729
                                      end, nextTokPtr);
730
    }
731
10
    *nextTokPtr = ptr;
732
10
    return XML_TOK_INVALID;
733
  case BT_QUEST:
734
3630
    return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
735
  case BT_SOL:
736
16318540
    return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
737
  default:
738
    *nextTokPtr = ptr;
739
    return XML_TOK_INVALID;
740
  }
741
#ifdef XML_NS
742
  hadColon = 0;
743
#endif
744
  /* we have a start-tag */
745

1523724402
  while (HAS_CHAR(enc, ptr, end)) {
746













1502840404
    switch (BYTE_TYPE(enc, ptr)) {
747







1482043950
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
748
#ifdef XML_NS
749
    case BT_COLON:
750

1985720
      if (hadColon) {
751
10
        *nextTokPtr = ptr;
752
10
        return XML_TOK_INVALID;
753
      }
754
      hadColon = 1;
755
1985710
      ptr += MINBPC(enc);
756

1988070
      REQUIRE_CHAR(enc, ptr, end);
757






1986410
      switch (BYTE_TYPE(enc, ptr)) {
758







1984020
      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
759
      default:
760
10
        *nextTokPtr = ptr;
761
10
        return XML_TOK_INVALID;
762
      }
763
1190
      break;
764
#endif
765
    case BT_S: case BT_CR: case BT_LF:
766
      {
767
6090080
        ptr += MINBPC(enc);
768

12893590
        while (HAS_CHAR(enc, ptr, end)) {
769










6804110
          switch (BYTE_TYPE(enc, ptr)) {
770







6085790
          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
771
          case BT_GT:
772
            goto gt;
773
          case BT_SOL:
774
            goto sol;
775
          case BT_S: case BT_CR: case BT_LF:
776
713430
            ptr += MINBPC(enc);
777
713430
            continue;
778
          default:
779
            *nextTokPtr = ptr;
780
            return XML_TOK_INVALID;
781
          }
782
6085640
          return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
783
        }
784
4390
        return XML_TOK_PARTIAL;
785
      }
786
    case BT_GT:
787
    gt:
788
12706404
      *nextTokPtr = ptr + MINBPC(enc);
789
12706404
      return XML_TOK_START_TAG_NO_ATTS;
790
    case BT_SOL:
791
    sol:
792
2470
      ptr += MINBPC(enc);
793

3580
      REQUIRE_CHAR(enc, ptr, end);
794


1440
      if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
795
        *nextTokPtr = ptr;
796
        return XML_TOK_INVALID;
797
      }
798
1360
      *nextTokPtr = ptr + MINBPC(enc);
799
1360
      return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
800
    default:
801
      *nextTokPtr = ptr;
802
      return XML_TOK_INVALID;
803
    }
804
  }
805
1047512
  return XML_TOK_PARTIAL;
806
36230806
}
807
808
static int PTRCALL
809
PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
810
                   const char **nextTokPtr)
811
{
812

255392412
  if (ptr >= end)
813
27134
    return XML_TOK_NONE;
814
  if (MINBPC(enc) > 1) {
815
10690
    size_t n = end - ptr;
816

10690
    if (n & (MINBPC(enc) - 1)) {
817
5080
      n &= ~(MINBPC(enc) - 1);
818

5080
      if (n == 0)
819
720
        return XML_TOK_PARTIAL;
820
4360
      end = ptr + n;
821
4360
    }
822

9970
  }
823










127688292
  switch (BYTE_TYPE(enc, ptr)) {
824
  case BT_LT:
825
36230806
    return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
826
  case BT_AMP:
827
5066126
    return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
828
  case BT_CR:
829
100
    ptr += MINBPC(enc);
830

100
    if (! HAS_CHAR(enc, ptr, end))
831
80
      return XML_TOK_TRAILING_CR;
832


20
    if (BYTE_TYPE(enc, ptr) == BT_LF)
833
10
      ptr += MINBPC(enc);
834
20
    *nextTokPtr = ptr;
835
20
    return XML_TOK_DATA_NEWLINE;
836
  case BT_LF:
837
35651790
    *nextTokPtr = ptr + MINBPC(enc);
838
35651790
    return XML_TOK_DATA_NEWLINE;
839
  case BT_RSQB:
840
80
    ptr += MINBPC(enc);
841

80
    if (! HAS_CHAR(enc, ptr, end))
842
60
      return XML_TOK_TRAILING_RSQB;
843


20
    if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
844
      break;
845
    ptr += MINBPC(enc);
846
    if (! HAS_CHAR(enc, ptr, end))
847
      return XML_TOK_TRAILING_RSQB;
848
    if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
849
      ptr -= MINBPC(enc);
850
      break;
851
    }
852
    *nextTokPtr = ptr;
853
    return XML_TOK_INVALID;
854






4450
  INVALID_CASES(ptr, nextTokPtr)
855
  default:
856
50717140
    ptr += MINBPC(enc);
857
50717140
    break;
858
  }
859

810978730
  while (HAS_CHAR(enc, ptr, end)) {
860










810961650
    switch (BYTE_TYPE(enc, ptr)) {
861
#define LEAD_CASE(n) \
862
    case BT_LEAD ## n: \
863
      if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
864
        *nextTokPtr = ptr; \
865
        return XML_TOK_DATA_CHARS; \
866
      } \
867
      ptr += n; \
868
      break;
869
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
870
#undef LEAD_CASE
871
    case BT_RSQB:
872

300000
      if (HAS_CHARS(enc, ptr, end, 2)) {
873


300000
         if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
874
           ptr += MINBPC(enc);
875
300000
           break;
876
         }
877
         if (HAS_CHARS(enc, ptr, end, 3)) {
878
           if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
879
             ptr += MINBPC(enc);
880
             break;
881
           }
882
           *nextTokPtr = ptr + 2*MINBPC(enc);
883
           return XML_TOK_INVALID;
884
         }
885
      }
886
      /* fall through */
887
    case BT_AMP:
888
    case BT_LT:
889
    case BT_NONXML:
890
    case BT_MALFORM:
891
    case BT_TRAIL:
892
    case BT_CR:
893
    case BT_LF:
894
50700160
      *nextTokPtr = ptr;
895
50700160
      return XML_TOK_DATA_CHARS;
896
    default:
897
759961490
      ptr += MINBPC(enc);
898
759961490
      break;
899
    }
900
  }
901
17080
  *nextTokPtr = ptr;
902
17080
  return XML_TOK_DATA_CHARS;
903
127696206
}
904
905
/* ptr points to character following "%" */
906
907
static int PTRCALL
908
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
909
                    const char **nextTokPtr)
910
{
911

50750
  REQUIRE_CHAR(enc, ptr, end);
912









23530
  switch (BYTE_TYPE(enc, ptr)) {
913







22290
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
914
  case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
915
960
    *nextTokPtr = ptr;
916
960
    return XML_TOK_PERCENT;
917
  default:
918
    *nextTokPtr = ptr;
919
    return XML_TOK_INVALID;
920
  }
921

10519920
  while (HAS_CHAR(enc, ptr, end)) {
922









10498500
    switch (BYTE_TYPE(enc, ptr)) {
923







10497570
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
924
    case BT_SEMI:
925
810
      *nextTokPtr = ptr + MINBPC(enc);
926
810
      return XML_TOK_PARAM_ENTITY_REF;
927
    default:
928
      *nextTokPtr = ptr;
929
      return XML_TOK_INVALID;
930
    }
931
  }
932
21430
  return XML_TOK_PARTIAL;
933
24650
}
934
935
static int PTRCALL
936
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
937
                      const char **nextTokPtr)
938
{
939

34050
  REQUIRE_CHAR(enc, ptr, end);
940






14520
  switch (BYTE_TYPE(enc, ptr)) {
941







14410
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
942
  default:
943
10
    *nextTokPtr = ptr;
944
10
    return XML_TOK_INVALID;
945
  }
946

64950
  while (HAS_CHAR(enc, ptr, end)) {
947













52390
    switch (BYTE_TYPE(enc, ptr)) {
948







50540
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
949
    case BT_CR: case BT_LF: case BT_S:
950
    case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
951
1770
      *nextTokPtr = ptr;
952
1770
      return XML_TOK_POUND_NAME;
953
    default:
954
      *nextTokPtr = ptr;
955
      return XML_TOK_INVALID;
956
    }
957
  }
958
12590
  return -XML_TOK_POUND_NAME;
959
16140
}
960
961
static int PTRCALL
962
PREFIX(scanLit)(int open, const ENCODING *enc,
963
                const char *ptr, const char *end,
964
                const char **nextTokPtr)
965
{
966

919949086
  while (HAS_CHAR(enc, ptr, end)) {
967

914142516
    int t = BYTE_TYPE(enc, ptr);
968






914122396
    switch (t) {
969






110
    INVALID_CASES(ptr, nextTokPtr)
970
    case BT_QUOT:
971
    case BT_APOS:
972
161806
      ptr += MINBPC(enc);
973

161806
      if (t != open)
974
        break;
975

121016
      if (! HAS_CHAR(enc, ptr, end))
976
10518
        return -XML_TOK_LITERAL;
977
110498
      *nextTokPtr = ptr;
978






110718
      switch (BYTE_TYPE(enc, ptr)) {
979
      case BT_S: case BT_CR: case BT_LF:
980
      case BT_GT: case BT_PERCNT: case BT_LSQB:
981
110498
        return XML_TOK_LITERAL;
982
      default:
983
        return XML_TOK_INVALID;
984
      }
985
    default:
986
913960550
      ptr += MINBPC(enc);
987
913960550
      break;
988
    }
989

914001370
  }
990
1861546
  return XML_TOK_PARTIAL;
991
1982572
}
992
993
static int PTRCALL
994
PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
995
                  const char **nextTokPtr)
996
{
997
  int tok;
998

10170272
  if (ptr >= end)
999
185806
    return XML_TOK_NONE;
1000
  if (MINBPC(enc) > 1) {
1001
35380
    size_t n = end - ptr;
1002

35380
    if (n & (MINBPC(enc) - 1)) {
1003
17100
      n &= ~(MINBPC(enc) - 1);
1004

17100
      if (n == 0)
1005
1650
        return XML_TOK_PARTIAL;
1006
15450
      end = ptr + n;
1007
15450
    }
1008

33730
  }
1009




















4965140
  switch (BYTE_TYPE(enc, ptr)) {
1010
  case BT_QUOT:
1011
100040
    return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
1012
  case BT_APOS:
1013
1882532
    return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
1014
  case BT_LT:
1015
    {
1016
872186
      ptr += MINBPC(enc);
1017

890818
      REQUIRE_CHAR(enc, ptr, end);
1018







906234
      switch (BYTE_TYPE(enc, ptr)) {
1019
      case BT_EXCL:
1020
429170
        return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1021
      case BT_QUEST:
1022
364220
        return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1023
      case BT_NMSTRT:
1024
      case BT_HEX:
1025
      case BT_NONASCII:
1026
      case BT_LEAD2:
1027
      case BT_LEAD3:
1028
      case BT_LEAD4:
1029
60144
        *nextTokPtr = ptr - MINBPC(enc);
1030
60144
        return XML_TOK_INSTANCE_START;
1031
      }
1032
20
      *nextTokPtr = ptr;
1033
20
      return XML_TOK_INVALID;
1034
    }
1035
  case BT_CR:
1036

30
    if (ptr + MINBPC(enc) == end) {
1037
30
      *nextTokPtr = end;
1038
      /* indicate that this might be part of a CR/LF pair */
1039
30
      return -XML_TOK_PROLOG_S;
1040
    }
1041
    /* fall through */
1042
  case BT_S: case BT_LF:
1043
    for (;;) {
1044
2448010
      ptr += MINBPC(enc);
1045

2448010
      if (! HAS_CHAR(enc, ptr, end))
1046
        break;
1047




2309160
      switch (BYTE_TYPE(enc, ptr)) {
1048
      case BT_S: case BT_LF:
1049
        break;
1050
      case BT_CR:
1051
        /* don't split CR/LF pair */
1052
        if (ptr + MINBPC(enc) != end)
1053
          break;
1054
        /* fall through */
1055
      default:
1056
559100
        *nextTokPtr = ptr;
1057
559100
        return XML_TOK_PROLOG_S;
1058
      }
1059
    }
1060
138870
    *nextTokPtr = ptr;
1061
138870
    return XML_TOK_PROLOG_S;
1062
  case BT_PERCNT:
1063
24300
    return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1064
  case BT_COMMA:
1065
3260
    *nextTokPtr = ptr + MINBPC(enc);
1066
3260
    return XML_TOK_COMMA;
1067
  case BT_LSQB:
1068
5694
    *nextTokPtr = ptr + MINBPC(enc);
1069
5694
    return XML_TOK_OPEN_BRACKET;
1070
  case BT_RSQB:
1071
7968
    ptr += MINBPC(enc);
1072

7968
    if (! HAS_CHAR(enc, ptr, end))
1073
3984
      return -XML_TOK_CLOSE_BRACKET;
1074


4034
    if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1075
      REQUIRE_CHARS(enc, ptr, end, 2);
1076
      if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1077
        *nextTokPtr = ptr + 2*MINBPC(enc);
1078
        return XML_TOK_COND_SECT_CLOSE;
1079
      }
1080
    }
1081
3984
    *nextTokPtr = ptr;
1082
3984
    return XML_TOK_CLOSE_BRACKET;
1083
  case BT_LPAR:
1084
4890
    *nextTokPtr = ptr + MINBPC(enc);
1085
4890
    return XML_TOK_OPEN_PAREN;
1086
  case BT_RPAR:
1087
7140
    ptr += MINBPC(enc);
1088

7140
    if (! HAS_CHAR(enc, ptr, end))
1089
3560
      return -XML_TOK_CLOSE_PAREN;
1090









3580
    switch (BYTE_TYPE(enc, ptr)) {
1091
    case BT_AST:
1092
350
      *nextTokPtr = ptr + MINBPC(enc);
1093
350
      return XML_TOK_CLOSE_PAREN_ASTERISK;
1094
    case BT_QUEST:
1095
      *nextTokPtr = ptr + MINBPC(enc);
1096
      return XML_TOK_CLOSE_PAREN_QUESTION;
1097
    case BT_PLUS:
1098
70
      *nextTokPtr = ptr + MINBPC(enc);
1099
70
      return XML_TOK_CLOSE_PAREN_PLUS;
1100
    case BT_CR: case BT_LF: case BT_S:
1101
    case BT_GT: case BT_COMMA: case BT_VERBAR:
1102
    case BT_RPAR:
1103
3160
      *nextTokPtr = ptr;
1104
3160
      return XML_TOK_CLOSE_PAREN;
1105
    }
1106
    *nextTokPtr = ptr;
1107
    return XML_TOK_INVALID;
1108
  case BT_VERBAR:
1109
9050
    *nextTokPtr = ptr + MINBPC(enc);
1110
9050
    return XML_TOK_OR;
1111
  case BT_GT:
1112
65642
    *nextTokPtr = ptr + MINBPC(enc);
1113
65642
    return XML_TOK_DECL_CLOSE;
1114
  case BT_NUM:
1115
16140
    return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1116
#define LEAD_CASE(n) \
1117
  case BT_LEAD ## n: \
1118
    if (end - ptr < n) \
1119
      return XML_TOK_PARTIAL_CHAR; \
1120
    if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1121
      ptr += n; \
1122
      tok = XML_TOK_NAME; \
1123
      break; \
1124
    } \
1125
    if (IS_NAME_CHAR(enc, ptr, n)) { \
1126
      ptr += n; \
1127
      tok = XML_TOK_NMTOKEN; \
1128
      break; \
1129
    } \
1130
    *nextTokPtr = ptr; \
1131
    return XML_TOK_INVALID;
1132







300
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1133
#undef LEAD_CASE
1134
  case BT_NMSTRT:
1135
  case BT_HEX:
1136
    tok = XML_TOK_NAME;
1137
1200508
    ptr += MINBPC(enc);
1138
1200508
    break;
1139
  case BT_DIGIT:
1140
  case BT_NAME:
1141
  case BT_MINUS:
1142
#ifdef XML_NS
1143
  case BT_COLON:
1144
#endif
1145
    tok = XML_TOK_NMTOKEN;
1146
60
    ptr += MINBPC(enc);
1147
60
    break;
1148
  case BT_NONASCII:
1149

80
    if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1150
50
      ptr += MINBPC(enc);
1151
      tok = XML_TOK_NAME;
1152
50
      break;
1153
    }
1154

30
    if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1155
30
      ptr += MINBPC(enc);
1156
      tok = XML_TOK_NMTOKEN;
1157
30
      break;
1158
    }
1159
    /* fall through */
1160
  default:
1161
50
    *nextTokPtr = ptr;
1162
50
    return XML_TOK_INVALID;
1163
  }
1164

429803914
  while (HAS_CHAR(enc, ptr, end)) {
1165


















428765252
    switch (BYTE_TYPE(enc, ptr)) {
1166







428595076
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1167
    case BT_GT: case BT_RPAR: case BT_COMMA:
1168
    case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
1169
    case BT_S: case BT_CR: case BT_LF:
1170
150256
      *nextTokPtr = ptr;
1171
150256
      return tok;
1172
#ifdef XML_NS
1173
    case BT_COLON:
1174
9400
      ptr += MINBPC(enc);
1175


9400
      switch (tok) {
1176
      case XML_TOK_NAME:
1177

10450
        REQUIRE_CHAR(enc, ptr, end);
1178
        tok = XML_TOK_PREFIXED_NAME;
1179








8240
        switch (BYTE_TYPE(enc, ptr)) {
1180







8170
        CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1181
        default:
1182
          tok = XML_TOK_NMTOKEN;
1183
10
          break;
1184
        }
1185
        break;
1186
      case XML_TOK_PREFIXED_NAME:
1187
        tok = XML_TOK_NMTOKEN;
1188
50
        break;
1189
      }
1190
      break;
1191
#endif
1192
    case BT_PLUS:
1193

290
      if (tok == XML_TOK_NMTOKEN)  {
1194
10
        *nextTokPtr = ptr;
1195
10
        return XML_TOK_INVALID;
1196
      }
1197
280
      *nextTokPtr = ptr + MINBPC(enc);
1198
280
      return XML_TOK_NAME_PLUS;
1199
    case BT_AST:
1200

10
      if (tok == XML_TOK_NMTOKEN)  {
1201
10
        *nextTokPtr = ptr;
1202
10
        return XML_TOK_INVALID;
1203
      }
1204
      *nextTokPtr = ptr + MINBPC(enc);
1205
      return XML_TOK_NAME_ASTERISK;
1206
    case BT_QUEST:
1207

3240
      if (tok == XML_TOK_NMTOKEN)  {
1208
10
        *nextTokPtr = ptr;
1209
10
        return XML_TOK_INVALID;
1210
      }
1211
3230
      *nextTokPtr = ptr + MINBPC(enc);
1212
3230
      return XML_TOK_NAME_QUESTION;
1213
    default:
1214
10
      *nextTokPtr = ptr;
1215
10
      return XML_TOK_INVALID;
1216
    }
1217
  }
1218
1045712
  return -tok;
1219
5085136
}
1220
1221
static int PTRCALL
1222
PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
1223
                          const char *end, const char **nextTokPtr)
1224
{
1225
  const char *start;
1226

12320
  if (ptr >= end)
1227
890
    return XML_TOK_NONE;
1228

5270
  else if (! HAS_CHAR(enc, ptr, end)) {
1229
    /* This line cannot be executed.  The incoming data has already
1230
     * been tokenized once, so incomplete characters like this have
1231
     * already been eliminated from the input.  Retaining the paranoia
1232
     * check is still valuable, however.
1233
     */
1234
    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1235
  }
1236
  start = ptr;
1237

401200
  while (HAS_CHAR(enc, ptr, end)) {
1238







395320
    switch (BYTE_TYPE(enc, ptr)) {
1239
#define LEAD_CASE(n) \
1240
    case BT_LEAD ## n: ptr += n; break;
1241
10
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1242
#undef LEAD_CASE
1243
    case BT_AMP:
1244

470
      if (ptr == start)
1245
430
        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1246
40
      *nextTokPtr = ptr;
1247
40
      return XML_TOK_DATA_CHARS;
1248
    case BT_LT:
1249
      /* this is for inside entity references */
1250
10
      *nextTokPtr = ptr;
1251
10
      return XML_TOK_INVALID;
1252
    case BT_LF:
1253

20
      if (ptr == start) {
1254
20
        *nextTokPtr = ptr + MINBPC(enc);
1255
20
        return XML_TOK_DATA_NEWLINE;
1256
      }
1257
      *nextTokPtr = ptr;
1258
      return XML_TOK_DATA_CHARS;
1259
    case BT_CR:
1260

30
      if (ptr == start) {
1261
20
        ptr += MINBPC(enc);
1262

20
        if (! HAS_CHAR(enc, ptr, end))
1263
10
          return XML_TOK_TRAILING_CR;
1264


10
        if (BYTE_TYPE(enc, ptr) == BT_LF)
1265
10
          ptr += MINBPC(enc);
1266
10
        *nextTokPtr = ptr;
1267
10
        return XML_TOK_DATA_NEWLINE;
1268
      }
1269
10
      *nextTokPtr = ptr;
1270
10
      return XML_TOK_DATA_CHARS;
1271
    case BT_S:
1272

4010
      if (ptr == start) {
1273
2400
        *nextTokPtr = ptr + MINBPC(enc);
1274
2400
        return XML_TOK_ATTRIBUTE_VALUE_S;
1275
      }
1276
1610
      *nextTokPtr = ptr;
1277
1610
      return XML_TOK_DATA_CHARS;
1278
    default:
1279
390650
      ptr += MINBPC(enc);
1280
390650
      break;
1281
    }
1282
  }
1283
730
  *nextTokPtr = ptr;
1284
730
  return XML_TOK_DATA_CHARS;
1285
6160
}
1286
1287
static int PTRCALL
1288
PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
1289
                       const char *end, const char **nextTokPtr)
1290
{
1291
  const char *start;
1292

4936
  if (ptr >= end)
1293
1054
    return XML_TOK_NONE;
1294

1414
  else if (! HAS_CHAR(enc, ptr, end)) {
1295
    /* This line cannot be executed.  The incoming data has already
1296
     * been tokenized once, so incomplete characters like this have
1297
     * already been eliminated from the input.  Retaining the paranoia
1298
     * check is still valuable, however.
1299
     */
1300
    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1301
  }
1302
  start = ptr;
1303

382654
  while (HAS_CHAR(enc, ptr, end)) {
1304







381206
    switch (BYTE_TYPE(enc, ptr)) {
1305
#define LEAD_CASE(n) \
1306
    case BT_LEAD ## n: ptr += n; break;
1307
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1308
#undef LEAD_CASE
1309
    case BT_AMP:
1310

180
      if (ptr == start)
1311
140
        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1312
40
      *nextTokPtr = ptr;
1313
40
      return XML_TOK_DATA_CHARS;
1314
    case BT_PERCNT:
1315

350
      if (ptr == start) {
1316
350
        int tok =  PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
1317
                                       end, nextTokPtr);
1318
350
        return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1319
      }
1320
      *nextTokPtr = ptr;
1321
      return XML_TOK_DATA_CHARS;
1322
    case BT_LF:
1323

150
      if (ptr == start) {
1324
120
        *nextTokPtr = ptr + MINBPC(enc);
1325
120
        return XML_TOK_DATA_NEWLINE;
1326
      }
1327
30
      *nextTokPtr = ptr;
1328
30
      return XML_TOK_DATA_CHARS;
1329
    case BT_CR:
1330

20
      if (ptr == start) {
1331
10
        ptr += MINBPC(enc);
1332

10
        if (! HAS_CHAR(enc, ptr, end))
1333
10
          return XML_TOK_TRAILING_CR;
1334
        if (BYTE_TYPE(enc, ptr) == BT_LF)
1335
          ptr += MINBPC(enc);
1336
        *nextTokPtr = ptr;
1337
        return XML_TOK_DATA_NEWLINE;
1338
      }
1339
10
      *nextTokPtr = ptr;
1340
10
      return XML_TOK_DATA_CHARS;
1341
    default:
1342
379826
      ptr += MINBPC(enc);
1343
379826
      break;
1344
    }
1345
  }
1346
714
  *nextTokPtr = ptr;
1347
714
  return XML_TOK_DATA_CHARS;
1348
2468
}
1349
1350
#ifdef XML_DTD
1351
1352
static int PTRCALL
1353
PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
1354
                         const char *end, const char **nextTokPtr)
1355
{
1356
  int level = 0;
1357
  if (MINBPC(enc) > 1) {
1358
2120
    size_t n = end - ptr;
1359

1060
    if (n & (MINBPC(enc) - 1)) {
1360
520
      n &= ~(MINBPC(enc) - 1);
1361
520
      end = ptr + n;
1362
520
    }
1363
  }
1364

19900
  while (HAS_CHAR(enc, ptr, end)) {
1365







41460
    switch (BYTE_TYPE(enc, ptr)) {
1366






100
    INVALID_CASES(ptr, nextTokPtr)
1367
    case BT_LT:
1368
1460
      ptr += MINBPC(enc);
1369

1530
      REQUIRE_CHAR(enc, ptr, end);
1370


2370
      if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1371
1390
        ptr += MINBPC(enc);
1372

1460
        REQUIRE_CHAR(enc, ptr, end);
1373


2260
        if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1374
          ++level;
1375
          ptr += MINBPC(enc);
1376
        }
1377
      }
1378
      break;
1379
    case BT_RSQB:
1380
130
      ptr += MINBPC(enc);
1381

180
      REQUIRE_CHAR(enc, ptr, end);
1382


140
      if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1383
80
        ptr += MINBPC(enc);
1384

130
        REQUIRE_CHAR(enc, ptr, end);
1385


50
        if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1386
30
          ptr += MINBPC(enc);
1387

30
          if (level == 0) {
1388
30
            *nextTokPtr = ptr;
1389
30
            return XML_TOK_IGNORE_SECT;
1390
          }
1391
          --level;
1392
        }
1393
      }
1394
      break;
1395
    default:
1396
14900
      ptr += MINBPC(enc);
1397
14900
      break;
1398
    }
1399
  }
1400
1260
  return XML_TOK_PARTIAL;
1401
1580
}
1402
1403
#endif /* XML_DTD */
1404
1405
static int PTRCALL
1406
PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1407
                   const char **badPtr)
1408
{
1409
101360
  ptr += MINBPC(enc);
1410
50680
  end -= MINBPC(enc);
1411

4182280
  for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
1412


















3148110
    switch (BYTE_TYPE(enc, ptr)) {
1413
    case BT_DIGIT:
1414
    case BT_HEX:
1415
    case BT_MINUS:
1416
    case BT_APOS:
1417
    case BT_LPAR:
1418
    case BT_RPAR:
1419
    case BT_PLUS:
1420
    case BT_COMMA:
1421
    case BT_SOL:
1422
    case BT_EQUALS:
1423
    case BT_QUEST:
1424
    case BT_CR:
1425
    case BT_LF:
1426
    case BT_SEMI:
1427
    case BT_EXCL:
1428
    case BT_AST:
1429
    case BT_PERCNT:
1430
    case BT_NUM:
1431
#ifdef XML_NS
1432
    case BT_COLON:
1433
#endif
1434
      break;
1435
    case BT_S:
1436


150000
      if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1437
        *badPtr = ptr;
1438
        return 0;
1439
      }
1440
      break;
1441
    case BT_NAME:
1442
    case BT_NMSTRT:
1443


957600
      if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1444
        break;
1445
    default:
1446



10
      switch (BYTE_TO_ASCII(enc, ptr)) {
1447
      case 0x24: /* $ */
1448
      case 0x40: /* @ */
1449
        break;
1450
      default:
1451
10
        *badPtr = ptr;
1452
10
        return 0;
1453
      }
1454
      break;
1455
    }
1456
  }
1457
50670
  return 1;
1458
50680
}
1459
1460
/* This must only be called for a well-formed start-tag or empty
1461
   element tag.  Returns the number of attributes.  Pointers to the
1462
   first attsMax attributes are stored in atts.
1463
*/
1464
1465
static int PTRCALL
1466
PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
1467
                int attsMax, ATTRIBUTE *atts)
1468
{
1469
  enum { other, inName, inValue } state = inName;
1470
  int nAtts = 0;
1471
  int open = 0; /* defined when state == inValue;
1472
                   initialization just to shut up compilers */
1473
1474
248242940
  for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1475












435841048
    switch (BYTE_TYPE(enc, ptr)) {
1476
#define START_NAME \
1477
      if (state == other) { \
1478
        if (nAtts < attsMax) { \
1479
          atts[nAtts].name = ptr; \
1480
          atts[nAtts].normalized = 1; \
1481
        } \
1482
        state = inName; \
1483
      }
1484
#define LEAD_CASE(n) \
1485
    case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1486









120
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1487
#undef LEAD_CASE
1488
    case BT_NONASCII:
1489
    case BT_NMSTRT:
1490
    case BT_HEX:
1491



196708062
      START_NAME
1492
      break;
1493
#undef START_NAME
1494
    case BT_QUOT:
1495

9500020
      if (state != inValue) {
1496

4750010
        if (nAtts < attsMax)
1497
4750010
          atts[nAtts].valuePtr = ptr + MINBPC(enc);
1498
        state = inValue;
1499
        open = BT_QUOT;
1500
4750010
      }
1501

4750010
      else if (open == BT_QUOT) {
1502
        state = other;
1503

4750010
        if (nAtts < attsMax)
1504
4750010
          atts[nAtts].valueEnd = ptr;
1505
4750010
        nAtts++;
1506
4750010
      }
1507
      break;
1508
    case BT_APOS:
1509

10680
      if (state != inValue) {
1510

5340
        if (nAtts < attsMax)
1511
5300
          atts[nAtts].valuePtr = ptr + MINBPC(enc);
1512
        state = inValue;
1513
        open = BT_APOS;
1514
5340
      }
1515

5340
      else if (open == BT_APOS) {
1516
        state = other;
1517

5340
        if (nAtts < attsMax)
1518
5300
          atts[nAtts].valueEnd = ptr;
1519
5340
        nAtts++;
1520
5340
      }
1521
      break;
1522
    case BT_AMP:
1523

430
      if (nAtts < attsMax)
1524
430
        atts[nAtts].normalized = 0;
1525
      break;
1526
    case BT_S:
1527

9958520
      if (state == inName)
1528
5352900
        state = other;
1529

4605740
      else if (state == inValue
1530

4606430
               && nAtts < attsMax
1531

1620
               && atts[nAtts].normalized
1532

1000
               && (ptr == atts[nAtts].valuePtr
1533


310
                   || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1534


240
                   || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1535


240
                   || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1536
70
        atts[nAtts].normalized = 0;
1537
      break;
1538
    case BT_CR: case BT_LF:
1539
      /* This case ensures that the first attribute name is counted
1540
         Apart from that we could just change state on the quote. */
1541

650790
      if (state == inName)
1542
450120
        state = other;
1543



200700
      else if (state == inValue && nAtts < attsMax)
1544
30
        atts[nAtts].normalized = 0;
1545
      break;
1546
    case BT_GT:
1547
    case BT_SOL:
1548

17548324
      if (state != inValue)
1549
16258934
        return nAtts;
1550
      break;
1551
    default:
1552
      break;
1553
    }
1554
  }
1555
  /* not reached */
1556
}
1557
1558
static int PTRFASTCALL
1559
PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
1560
{
1561
  int result = 0;
1562
  /* skip &# */
1563
1580
  ptr += 2*MINBPC(enc);
1564


830
  if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1565

2960
    for (ptr += MINBPC(enc);
1566

1680
         !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
1567
1090
         ptr += MINBPC(enc)) {
1568

320
      int c = BYTE_TO_ASCII(enc, ptr);
1569

















2200
      switch (c) {
1570
      case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
1571
      case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
1572
690
        result <<= 4;
1573
690
        result |= (c - ASCII_0);
1574
690
        break;
1575
      case ASCII_A: case ASCII_B: case ASCII_C:
1576
      case ASCII_D: case ASCII_E: case ASCII_F:
1577
410
        result <<= 4;
1578
410
        result += 10 + (c - ASCII_A);
1579
410
        break;
1580
      case ASCII_a: case ASCII_b: case ASCII_c:
1581
      case ASCII_d: case ASCII_e: case ASCII_f:
1582
        result <<= 4;
1583
        result += 10 + (c - ASCII_a);
1584
        break;
1585
      }
1586

1100
      if (result >= 0x110000)
1587
10
        return -1;
1588

1090
    }
1589
  }
1590
  else {
1591


2860
    for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1592

180
      int c = BYTE_TO_ASCII(enc, ptr);
1593
1160
      result *= 10;
1594
1160
      result += (c - ASCII_0);
1595

1160
      if (result >= 0x110000)
1596
10
        return -1;
1597

1150
    }
1598
  }
1599
770
  return checkCharRefNumber(result);
1600
790
}
1601
1602
static int PTRCALL
1603
PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
1604
                             const char *end)
1605
{
1606



10021096
  switch ((end - ptr)/MINBPC(enc)) {
1607
  case 2:
1608


4754992
    if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1609



4750050
      switch (BYTE_TO_ASCII(enc, ptr)) {
1610
      case ASCII_l:
1611
2650020
        return ASCII_LT;
1612
      case ASCII_g:
1613
2100020
        return ASCII_GT;
1614
      }
1615
    }
1616
    break;
1617
  case 3:
1618


250490
    if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1619
250050
      ptr += MINBPC(enc);
1620


250050
      if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1621
250050
        ptr += MINBPC(enc);
1622


250050
        if (CHAR_MATCHES(enc, ptr, ASCII_p))
1623
250040
          return ASCII_AMP;
1624
      }
1625
    }
1626
    break;
1627
  case 4:
1628



100
    switch (BYTE_TO_ASCII(enc, ptr)) {
1629
    case ASCII_q:
1630
30
      ptr += MINBPC(enc);
1631


30
      if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1632
30
        ptr += MINBPC(enc);
1633


30
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1634
20
          ptr += MINBPC(enc);
1635


20
          if (CHAR_MATCHES(enc, ptr, ASCII_t))
1636
20
            return ASCII_QUOT;
1637
        }
1638
      }
1639
      break;
1640
    case ASCII_a:
1641
50
      ptr += MINBPC(enc);
1642


50
      if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1643
50
        ptr += MINBPC(enc);
1644


50
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1645
50
          ptr += MINBPC(enc);
1646


50
          if (CHAR_MATCHES(enc, ptr, ASCII_s))
1647
40
            return ASCII_APOS;
1648
        }
1649
      }
1650
      break;
1651
    }
1652
  }
1653
7732
  return 0;
1654
5007872
}
1655
1656
/* This function does not appear to be called from anywhere within the
1657
 * library code.  It is used via the macro XmlSameName(), which is
1658
 * defined but never used.  Since it appears in the encoding function
1659
 * table, removing it is not a thing to be undertaken lightly.  For
1660
 * the moment, we simply exclude it from coverage tests.
1661
 *
1662
 * LCOV_EXCL_START
1663
 */
1664
static int PTRCALL
1665
PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
1666
{
1667
  for (;;) {
1668
    switch (BYTE_TYPE(enc, ptr1)) {
1669
#define LEAD_CASE(n) \
1670
    case BT_LEAD ## n: \
1671
      if (*ptr1++ != *ptr2++) \
1672
        return 0;
1673
    LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1674
#undef LEAD_CASE
1675
      /* fall through */
1676
      if (*ptr1++ != *ptr2++)
1677
        return 0;
1678
      break;
1679
    case BT_NONASCII:
1680
    case BT_NMSTRT:
1681
#ifdef XML_NS
1682
    case BT_COLON:
1683
#endif
1684
    case BT_HEX:
1685
    case BT_DIGIT:
1686
    case BT_NAME:
1687
    case BT_MINUS:
1688
      if (*ptr2++ != *ptr1++)
1689
        return 0;
1690
      if (MINBPC(enc) > 1) {
1691
        if (*ptr2++ != *ptr1++)
1692
          return 0;
1693
        if (MINBPC(enc) > 2) {
1694
          if (*ptr2++ != *ptr1++)
1695
            return 0;
1696
          if (MINBPC(enc) > 3) {
1697
            if (*ptr2++ != *ptr1++)
1698
              return 0;
1699
          }
1700
        }
1701
      }
1702
      break;
1703
    default:
1704
      if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
1705
        return 1;
1706
      switch (BYTE_TYPE(enc, ptr2)) {
1707
      case BT_LEAD2:
1708
      case BT_LEAD3:
1709
      case BT_LEAD4:
1710
      case BT_NONASCII:
1711
      case BT_NMSTRT:
1712
#ifdef XML_NS
1713
      case BT_COLON:
1714
#endif
1715
      case BT_HEX:
1716
      case BT_DIGIT:
1717
      case BT_NAME:
1718
      case BT_MINUS:
1719
        return 0;
1720
      default:
1721
        return 1;
1722
      }
1723
    }
1724
  }
1725
  /* not reached */
1726
}
1727
/* LCOV_EXCL_STOP */
1728
1729
static int PTRCALL
1730
PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
1731
                         const char *end1, const char *ptr2)
1732
{
1733

4224628
  for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1734

1720940
    if (end1 - ptr1 < MINBPC(enc)) {
1735
      /* This line cannot be executed.  THe incoming data has already
1736
       * been tokenized once, so imcomplete characters like this have
1737
       * already been eliminated from the input.  Retaining the
1738
       * paranoia check is still valuable, however.
1739
       */
1740
      return 0; /* LCOV_EXCL_LINE */
1741
    }
1742


1727110
    if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1743
65160
      return 0;
1744
  }
1745
239196
  return ptr1 == end1;
1746
304356
}
1747
1748
static int PTRFASTCALL
1749
PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
1750
{
1751
  const char *start = ptr;
1752
74535948
  for (;;) {
1753









279453916
    switch (BYTE_TYPE(enc, ptr)) {
1754
#define LEAD_CASE(n) \
1755
    case BT_LEAD ## n: ptr += n; break;
1756
120
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1757
#undef LEAD_CASE
1758
    case BT_NONASCII:
1759
    case BT_NMSTRT:
1760
#ifdef XML_NS
1761
    case BT_COLON:
1762
#endif
1763
    case BT_HEX:
1764
    case BT_DIGIT:
1765
    case BT_NAME:
1766
    case BT_MINUS:
1767
242181342
      ptr += MINBPC(enc);
1768
242181342
      break;
1769
    default:
1770
37267974
      return (int)(ptr - start);
1771
    }
1772
  }
1773
}
1774
1775
static const char * PTRFASTCALL
1776
PREFIX(skipS)(const ENCODING *enc, const char *ptr)
1777
{
1778
640
  for (;;) {
1779




460
    switch (BYTE_TYPE(enc, ptr)) {
1780
    case BT_LF:
1781
    case BT_CR:
1782
    case BT_S:
1783
200
      ptr += MINBPC(enc);
1784
      break;
1785
    default:
1786
220
      return ptr;
1787
    }
1788
  }
1789
}
1790
1791
static void PTRCALL
1792
PREFIX(updatePosition)(const ENCODING *enc,
1793
                       const char *ptr,
1794
                       const char *end,
1795
                       POSITION *pos)
1796
{
1797

30387366
  while (HAS_CHAR(enc, ptr, end)) {
1798





4268276
    switch (BYTE_TYPE(enc, ptr)) {
1799
#define LEAD_CASE(n) \
1800
    case BT_LEAD ## n: \
1801
      ptr += n; \
1802
      break;
1803
180
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1804
#undef LEAD_CASE
1805
    case BT_LF:
1806
22666
      pos->columnNumber = (XML_Size)-1;
1807
22666
      pos->lineNumber++;
1808
22666
      ptr += MINBPC(enc);
1809
22666
      break;
1810
    case BT_CR:
1811
60
      pos->lineNumber++;
1812
60
      ptr += MINBPC(enc);
1813




100
      if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
1814
20
        ptr += MINBPC(enc);
1815
60
      pos->columnNumber = (XML_Size)-1;
1816
60
      break;
1817
    default:
1818
4205030
      ptr += MINBPC(enc);
1819
4205030
      break;
1820
    }
1821
4227936
    pos->columnNumber++;
1822
  }
1823
7310498
}
1824
1825
#undef DO_LEAD_CASE
1826
#undef MULTIBYTE_CASES
1827
#undef INVALID_CASES
1828
#undef CHECK_NAME_CASE
1829
#undef CHECK_NAME_CASES
1830
#undef CHECK_NMSTRT_CASE
1831
#undef CHECK_NMSTRT_CASES
1832
1833
#endif /* XML_TOK_IMPL_C */