GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: lib/libexpat/lib/xmltok_impl.c Lines: 683 800 85.4 %
Date: 2017-11-13 Branches: 1421 3987 35.6 %

Line Branch Exec Source
1
/* This file is included!
2
                            __  __            _
3
                         ___\ \/ /_ __   __ _| |_
4
                        / _ \\  /| '_ \ / _` | __|
5
                       |  __//  \| |_) | (_| | |_
6
                        \___/_/\_\ .__/ \__,_|\__|
7
                                 |_| XML parser
8
9
   Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10
   Copyright (c) 2000-2017 Expat development team
11
   Licensed under the MIT license:
12
13
   Permission is  hereby granted,  free of charge,  to any  person obtaining
14
   a  copy  of  this  software   and  associated  documentation  files  (the
15
   "Software"),  to  deal in  the  Software  without restriction,  including
16
   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17
   distribute, sublicense, and/or sell copies of the Software, and to permit
18
   persons  to whom  the Software  is  furnished to  do so,  subject to  the
19
   following conditions:
20
21
   The above copyright  notice and this permission notice  shall be included
22
   in all copies or substantial portions of the Software.
23
24
   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25
   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27
   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28
   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29
   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30
   USE OR OTHER DEALINGS IN THE SOFTWARE.
31
*/
32
33
#ifdef XML_TOK_IMPL_C
34
35
#ifndef IS_INVALID_CHAR
36
#define IS_INVALID_CHAR(enc, ptr, n) (0)
37
#endif
38
39
#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
40
    case BT_LEAD ## n: \
41
      if (end - ptr < n) \
42
        return XML_TOK_PARTIAL_CHAR; \
43
      if (IS_INVALID_CHAR(enc, ptr, n)) { \
44
        *(nextTokPtr) = (ptr); \
45
        return XML_TOK_INVALID; \
46
      } \
47
      ptr += n; \
48
      break;
49
50
#define INVALID_CASES(ptr, nextTokPtr) \
51
  INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
52
  INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
53
  INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
54
  case BT_NONXML: \
55
  case BT_MALFORM: \
56
  case BT_TRAIL: \
57
    *(nextTokPtr) = (ptr); \
58
    return XML_TOK_INVALID;
59
60
#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
61
   case BT_LEAD ## n: \
62
     if (end - ptr < n) \
63
       return XML_TOK_PARTIAL_CHAR; \
64
     if (!IS_NAME_CHAR(enc, ptr, n)) { \
65
       *nextTokPtr = ptr; \
66
       return XML_TOK_INVALID; \
67
     } \
68
     ptr += n; \
69
     break;
70
71
#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
72
  case BT_NONASCII: \
73
    if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
74
      *nextTokPtr = ptr; \
75
      return XML_TOK_INVALID; \
76
    } \
77
  case BT_NMSTRT: \
78
  case BT_HEX: \
79
  case BT_DIGIT: \
80
  case BT_NAME: \
81
  case BT_MINUS: \
82
    ptr += MINBPC(enc); \
83
    break; \
84
  CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
85
  CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
86
  CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
87
88
#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
89
   case BT_LEAD ## n: \
90
     if (end - ptr < n) \
91
       return XML_TOK_PARTIAL_CHAR; \
92
     if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
93
       *nextTokPtr = ptr; \
94
       return XML_TOK_INVALID; \
95
     } \
96
     ptr += n; \
97
     break;
98
99
#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
100
  case BT_NONASCII: \
101
    if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
102
      *nextTokPtr = ptr; \
103
      return XML_TOK_INVALID; \
104
    } \
105
  case BT_NMSTRT: \
106
  case BT_HEX: \
107
    ptr += MINBPC(enc); \
108
    break; \
109
  CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
110
  CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
111
  CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
112
113
#ifndef PREFIX
114
#define PREFIX(ident) ident
115
#endif
116
117
118
#define HAS_CHARS(enc, ptr, end, count) \
119
    (end - ptr >= count * MINBPC(enc))
120
121
#define HAS_CHAR(enc, ptr, end) \
122
    HAS_CHARS(enc, ptr, end, 1)
123
124
#define REQUIRE_CHARS(enc, ptr, end, count) \
125
    { \
126
      if (! HAS_CHARS(enc, ptr, end, count)) { \
127
        return XML_TOK_PARTIAL; \
128
      } \
129
    }
130
131
#define REQUIRE_CHAR(enc, ptr, end) \
132
    REQUIRE_CHARS(enc, ptr, end, 1)
133
134
135
/* ptr points to character following "<!-" */
136
137
static int PTRCALL
138
PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
139
                    const char *end, const char **nextTokPtr)
140
{
141

306792
  if (HAS_CHAR(enc, ptr, end)) {
142


153606
    if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
143
      *nextTokPtr = ptr;
144
      return XML_TOK_INVALID;
145
    }
146
153258
    ptr += MINBPC(enc);
147

99204120
    while (HAS_CHAR(enc, ptr, end)) {
148







49603812
      switch (BYTE_TYPE(enc, ptr)) {
149
      INVALID_CASES(ptr, nextTokPtr)
150
      case BT_MINUS:
151
1020402
        ptr += MINBPC(enc);
152

1020540
        REQUIRE_CHAR(enc, ptr, end);
153


1020300
        if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
154
150264
          ptr += MINBPC(enc);
155

150402
          REQUIRE_CHAR(enc, ptr, end);
156


150138
          if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
157
            *nextTokPtr = ptr;
158
            return XML_TOK_INVALID;
159
          }
160
150126
          *nextTokPtr = ptr + MINBPC(enc);
161
150126
          return XML_TOK_COMMENT;
162
        }
163
        break;
164
      default:
165
48578802
        ptr += MINBPC(enc);
166
48578802
        break;
167
      }
168
    }
169
  }
170
2994
  return XML_TOK_PARTIAL;
171
153396
}
172
173
/* ptr points to character following "<!" */
174
175
static int PTRCALL
176
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
177
                 const char *end, const char **nextTokPtr)
178
{
179

527175
  REQUIRE_CHAR(enc, ptr, end);
180




249342
  switch (BYTE_TYPE(enc, ptr)) {
181
  case BT_MINUS:
182
122874
    return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
183
  case BT_LSQB:
184
42
    *nextTokPtr = ptr + MINBPC(enc);
185
42
    return XML_TOK_COND_SECT_OPEN;
186
  case BT_NMSTRT:
187
  case BT_HEX:
188
122766
    ptr += MINBPC(enc);
189
    break;
190
  default:
191
    *nextTokPtr = ptr;
192
    return XML_TOK_INVALID;
193
  }
194

1219302
  while (HAS_CHAR(enc, ptr, end)) {
195






539316
    switch (BYTE_TYPE(enc, ptr)) {
196
    case BT_PERCNT:
197
      REQUIRE_CHARS(enc, ptr, end, 2);
198
      /* don't allow <!ENTITY% foo "whatever"> */
199
      switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
200
      case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
201
        *nextTokPtr = ptr;
202
        return XML_TOK_INVALID;
203
      }
204
      /* fall through */
205
    case BT_S: case BT_CR: case BT_LF:
206
43527
      *nextTokPtr = ptr;
207
43527
      return XML_TOK_DECL_OPEN;
208
    case BT_NMSTRT:
209
    case BT_HEX:
210
486885
      ptr += MINBPC(enc);
211
      break;
212
    default:
213
      *nextTokPtr = ptr;
214
      return XML_TOK_INVALID;
215
    }
216
  }
217
79239
  return XML_TOK_PARTIAL;
218
257619
}
219
220
static int PTRCALL
221
PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
222
                      const char *end, int *tokPtr)
223
{
224
  int upper = 0;
225
425388
  *tokPtr = XML_TOK_PI;
226

212694
  if (end - ptr != MINBPC(enc)*3)
227
67914
    return 1;
228



168480
  switch (BYTE_TO_ASCII(enc, ptr)) {
229
  case ASCII_x:
230
    break;
231
  case ASCII_X:
232
    upper = 1;
233
    break;
234
  default:
235
132
    return 1;
236
  }
237
144648
  ptr += MINBPC(enc);
238



168348
  switch (BYTE_TO_ASCII(enc, ptr)) {
239
  case ASCII_m:
240
    break;
241
  case ASCII_M:
242
    upper = 1;
243
    break;
244
  default:
245
120
    return 1;
246
  }
247
144528
  ptr += MINBPC(enc);
248



168228
  switch (BYTE_TO_ASCII(enc, ptr)) {
249
  case ASCII_l:
250
    break;
251
  case ASCII_L:
252
    upper = 1;
253
    break;
254
  default:
255
138
    return 1;
256
  }
257

144390
  if (upper)
258
    return 0;
259
144390
  *tokPtr = XML_TOK_XML_DECL;
260
144390
  return 1;
261
212694
}
262
263
/* ptr points to character following "<?" */
264
265
static int PTRCALL
266
PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
267
               const char *end, const char **nextTokPtr)
268
{
269
441420
  int tok;
270
  const char *target = ptr;
271

222732
  REQUIRE_CHAR(enc, ptr, end);
272






244800
  switch (BYTE_TYPE(enc, ptr)) {
273







218772
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
274
  default:
275
    *nextTokPtr = ptr;
276
    return XML_TOK_INVALID;
277
  }
278

2032164
  while (HAS_CHAR(enc, ptr, end)) {
279











1083600
    switch (BYTE_TYPE(enc, ptr)) {
280







797454
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
281
    case BT_S: case BT_CR: case BT_LF:
282

212634
      if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
283
        *nextTokPtr = ptr;
284
        return XML_TOK_INVALID;
285
      }
286
212634
      ptr += MINBPC(enc);
287

105176676
      while (HAS_CHAR(enc, ptr, end)) {
288







52821288
        switch (BYTE_TYPE(enc, ptr)) {
289






66
        INVALID_CASES(ptr, nextTokPtr)
290
        case BT_QUEST:
291
63882
          ptr += MINBPC(enc);
292

65868
          REQUIRE_CHAR(enc, ptr, end);
293


62082
          if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
294
61896
            *nextTokPtr = ptr + MINBPC(enc);
295
61896
            return tok;
296
          }
297
          break;
298
        default:
299
52375686
          ptr += MINBPC(enc);
300
52375686
          break;
301
        }
302
      }
303
148746
      return XML_TOK_PARTIAL;
304
    case BT_QUEST:
305

60
      if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
306
        *nextTokPtr = ptr;
307
        return XML_TOK_INVALID;
308
      }
309
60
      ptr += MINBPC(enc);
310

96
      REQUIRE_CHAR(enc, ptr, end);
311


36
      if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
312
24
        *nextTokPtr = ptr + MINBPC(enc);
313
24
        return tok;
314
      }
315
      /* fall through */
316
    default:
317
      *nextTokPtr = ptr;
318
      return XML_TOK_INVALID;
319
    }
320
  }
321
5994
  return XML_TOK_PARTIAL;
322
220710
}
323
324
static int PTRCALL
325
PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
326
                         const char *end, const char **nextTokPtr)
327
{
328
  static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
329
                                     ASCII_T, ASCII_A, ASCII_LSQB };
330
  int i;
331
  /* CDATA[ */
332

6780
  REQUIRE_CHARS(enc, ptr, end, 6);
333

2484
  for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
334


1512
    if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
335
24
      *nextTokPtr = ptr;
336
24
      return XML_TOK_INVALID;
337
    }
338
  }
339
162
  *nextTokPtr = ptr;
340
162
  return XML_TOK_CDATA_SECT_OPEN;
341
2322
}
342
343
static int PTRCALL
344
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
345
                        const char *end, const char **nextTokPtr)
346
{
347

5400
  if (ptr >= end)
348
1164
    return XML_TOK_NONE;
349
  if (MINBPC(enc) > 1) {
350
426
    size_t n = end - ptr;
351

426
    if (n & (MINBPC(enc) - 1)) {
352
216
      n &= ~(MINBPC(enc) - 1);
353

216
      if (n == 0)
354
156
        return XML_TOK_PARTIAL;
355
60
      end = ptr + n;
356
60
    }
357

270
  }
358








1920
  switch (BYTE_TYPE(enc, ptr)) {
359
  case BT_RSQB:
360
288
    ptr += MINBPC(enc);
361

396
    REQUIRE_CHAR(enc, ptr, end);
362


240
    if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
363
      break;
364
162
    ptr += MINBPC(enc);
365

252
    REQUIRE_CHAR(enc, ptr, end);
366


96
    if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
367
6
      ptr -= MINBPC(enc);
368
6
      break;
369
    }
370
66
    *nextTokPtr = ptr + MINBPC(enc);
371
66
    return XML_TOK_CDATA_SECT_CLOSE;
372
  case BT_CR:
373
    ptr += MINBPC(enc);
374
    REQUIRE_CHAR(enc, ptr, end);
375
    if (BYTE_TYPE(enc, ptr) == BT_LF)
376
      ptr += MINBPC(enc);
377
    *nextTokPtr = ptr;
378
    return XML_TOK_DATA_NEWLINE;
379
  case BT_LF:
380
6
    *nextTokPtr = ptr + MINBPC(enc);
381
6
    return XML_TOK_DATA_NEWLINE;
382






240
  INVALID_CASES(ptr, nextTokPtr)
383
  default:
384
972
    ptr += MINBPC(enc);
385
972
    break;
386
  }
387

14556
  while (HAS_CHAR(enc, ptr, end)) {
388








18744
    switch (BYTE_TYPE(enc, ptr)) {
389
#define LEAD_CASE(n) \
390
    case BT_LEAD ## n: \
391
      if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
392
        *nextTokPtr = ptr; \
393
        return XML_TOK_DATA_CHARS; \
394
      } \
395
      ptr += n; \
396
      break;
397






12
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
398
#undef LEAD_CASE
399
    case BT_NONXML:
400
    case BT_MALFORM:
401
    case BT_TRAIL:
402
    case BT_CR:
403
    case BT_LF:
404
    case BT_RSQB:
405
12
      *nextTokPtr = ptr;
406
12
      return XML_TOK_DATA_CHARS;
407
    default:
408
6246
      ptr += MINBPC(enc);
409
6246
      break;
410
    }
411
  }
412
1014
  *nextTokPtr = ptr;
413
1014
  return XML_TOK_DATA_CHARS;
414
2700
}
415
416
/* ptr points to character following "</" */
417
418
static int PTRCALL
419
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
420
                   const char *end, const char **nextTokPtr)
421
{
422

19583652
  REQUIRE_CHAR(enc, ptr, end);
423






9790554
  switch (BYTE_TYPE(enc, ptr)) {
424







9789876
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
425
  default:
426
    *nextTokPtr = ptr;
427
    return XML_TOK_INVALID;
428
  }
429

166693824
  while (HAS_CHAR(enc, ptr, end)) {
430












83309160
    switch (BYTE_TYPE(enc, ptr)) {
431







73556418
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
432
    case BT_S: case BT_CR: case BT_LF:
433

24
      for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
434




6
        switch (BYTE_TYPE(enc, ptr)) {
435
        case BT_S: case BT_CR: case BT_LF:
436
          break;
437
        case BT_GT:
438
6
          *nextTokPtr = ptr + MINBPC(enc);
439
6
          return XML_TOK_END_TAG;
440
        default:
441
          *nextTokPtr = ptr;
442
          return XML_TOK_INVALID;
443
        }
444
      }
445
6
      return XML_TOK_PARTIAL;
446
#ifdef XML_NS
447
    case BT_COLON:
448
      /* no need to check qname syntax here,
449
         since end-tag must match exactly */
450
1044
      ptr += MINBPC(enc);
451
1044
      break;
452
#endif
453
    case BT_GT:
454
9751320
      *nextTokPtr = ptr + MINBPC(enc);
455
9751320
      return XML_TOK_END_TAG;
456
    default:
457
      *nextTokPtr = ptr;
458
      return XML_TOK_INVALID;
459
    }
460
  }
461
38442
  return XML_TOK_PARTIAL;
462
9791154
}
463
464
/* ptr points to character following "&#X" */
465
466
static int PTRCALL
467
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
468
                       const char *end, const char **nextTokPtr)
469
{
470

4416
  if (HAS_CHAR(enc, ptr, end)) {
471


2340
    switch (BYTE_TYPE(enc, ptr)) {
472
    case BT_DIGIT:
473
    case BT_HEX:
474
      break;
475
    default:
476
6
      *nextTokPtr = ptr;
477
6
      return XML_TOK_INVALID;
478
    }
479

11664
    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
480




6096
      switch (BYTE_TYPE(enc, ptr)) {
481
      case BT_DIGIT:
482
      case BT_HEX:
483
        break;
484
      case BT_SEMI:
485
1398
        *nextTokPtr = ptr + MINBPC(enc);
486
1398
        return XML_TOK_CHAR_REF;
487
      default:
488
        *nextTokPtr = ptr;
489
        return XML_TOK_INVALID;
490
      }
491
    }
492
  }
493
804
  return XML_TOK_PARTIAL;
494
2208
}
495
496
/* ptr points to character following "&#" */
497
498
static int PTRCALL
499
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
500
                    const char *end, const char **nextTokPtr)
501
{
502

13128
  if (HAS_CHAR(enc, ptr, end)) {
503


6702
    if (CHAR_MATCHES(enc, ptr, ASCII_x))
504
2208
      return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
505


4662
    switch (BYTE_TYPE(enc, ptr)) {
506
    case BT_DIGIT:
507
      break;
508
    default:
509
      *nextTokPtr = ptr;
510
      return XML_TOK_INVALID;
511
    }
512

22356
    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
513



12432
      switch (BYTE_TYPE(enc, ptr)) {
514
      case BT_DIGIT:
515
        break;
516
      case BT_SEMI:
517
3324
        *nextTokPtr = ptr + MINBPC(enc);
518
3324
        return XML_TOK_CHAR_REF;
519
      default:
520
        *nextTokPtr = ptr;
521
        return XML_TOK_INVALID;
522
      }
523
    }
524
  }
525
1032
  return XML_TOK_PARTIAL;
526
6564
}
527
528
/* ptr points to character following "&" */
529
530
static int PTRCALL
531
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
532
                const char **nextTokPtr)
533
{
534

6095451
  REQUIRE_CHAR(enc, ptr, end);
535







3045879
  switch (BYTE_TYPE(enc, ptr)) {
536







3037683
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
537
  case BT_NUM:
538
6564
    return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
539
  default:
540
    *nextTokPtr = ptr;
541
    return XML_TOK_INVALID;
542
  }
543

43880394
  while (HAS_CHAR(enc, ptr, end)) {
544









21905619
    switch (BYTE_TYPE(enc, ptr)) {
545







18902550
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
546
    case BT_SEMI:
547
3002397
      *nextTokPtr = ptr + MINBPC(enc);
548
3002397
      return XML_TOK_ENTITY_REF;
549
    default:
550
      *nextTokPtr = ptr;
551
      return XML_TOK_INVALID;
552
    }
553
  }
554
35268
  return XML_TOK_PARTIAL;
555
3046560
}
556
557
/* ptr points to character following first character of attribute name */
558
559
static int PTRCALL
560
PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
561
                 const char **nextTokPtr)
562
{
563
#ifdef XML_NS
564
  int hadColon = 0;
565
#endif
566

1323025404
  while (HAS_CHAR(enc, ptr, end)) {
567












660578256
    switch (BYTE_TYPE(enc, ptr)) {
568







654638730
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
569
#ifdef XML_NS
570
    case BT_COLON:
571

1742502
      if (hadColon) {
572
6
        *nextTokPtr = ptr;
573
6
        return XML_TOK_INVALID;
574
      }
575
      hadColon = 1;
576
1742496
      ptr += MINBPC(enc);
577

1744944
      REQUIRE_CHAR(enc, ptr, end);
578






1741170
      switch (BYTE_TYPE(enc, ptr)) {
579







1740132
      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
580
      default:
581
6
        *nextTokPtr = ptr;
582
6
        return XML_TOK_INVALID;
583
      }
584
516
      break;
585
#endif
586
    case BT_S: case BT_CR: case BT_LF:
587
1380000
      for (;;) {
588
        int t;
589
590
1380000
        ptr += MINBPC(enc);
591

1380000
        REQUIRE_CHAR(enc, ptr, end);
592

1380000
        t = BYTE_TYPE(enc, ptr);
593

1380000
        if (t == BT_EQUALS)
594
1350000
          break;
595



30000
        switch (t) {
596
        case BT_S:
597
        case BT_LF:
598
        case BT_CR:
599
          break;
600
        default:
601
          *nextTokPtr = ptr;
602
          return XML_TOK_INVALID;
603
        }
604


30000
      }
605
    /* fall through */
606
    case BT_EQUALS:
607
      {
608
        int open;
609
#ifdef XML_NS
610
        hadColon = 0;
611
#endif
612
5808288
        for (;;) {
613
5808288
          ptr += MINBPC(enc);
614

5812368
          REQUIRE_CHAR(enc, ptr, end);
615

5806464
          open = BYTE_TYPE(enc, ptr);
616

5804208
          if (open == BT_QUOT || open == BT_APOS)
617
            break;
618



1620000
          switch (open) {
619
          case BT_S:
620
          case BT_LF:
621
          case BT_CR:
622
            break;
623
          default:
624
            *nextTokPtr = ptr;
625
            return XML_TOK_INVALID;
626
          }
627
        }
628
4184208
        ptr += MINBPC(enc);
629
        /* in attribute value */
630
436903974
        for (;;) {
631
          int t;
632

437463834
          REQUIRE_CHAR(enc, ptr, end);
633

436349106
          t = BYTE_TYPE(enc, ptr);
634

436344114
          if (t == open)
635
3622632
            break;
636






432721482
          switch (t) {
637






30
          INVALID_CASES(ptr, nextTokPtr)
638
          case BT_AMP:
639
            {
640
6510
              int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
641

6510
              if (tok <= 0) {
642

1704
                if (tok == XML_TOK_INVALID)
643
                  *nextTokPtr = ptr;
644
1704
                return tok;
645
              }
646

4806
              break;
647
            }
648
          case BT_LT:
649
            *nextTokPtr = ptr;
650
            return XML_TOK_INVALID;
651
          default:
652
432714960
            ptr += MINBPC(enc);
653
432714960
            break;
654
          }
655


432719766
        }
656
3622632
        ptr += MINBPC(enc);
657

3626700
        REQUIRE_CHAR(enc, ptr, end);
658





3619236
        switch (BYTE_TYPE(enc, ptr)) {
659
        case BT_S:
660
        case BT_CR:
661
        case BT_LF:
662
          break;
663
        case BT_SOL:
664
          goto sol;
665
        case BT_GT:
666
          goto gt;
667
        default:
668
          *nextTokPtr = ptr;
669
          return XML_TOK_INVALID;
670
        }
671
        /* ptr points to closing quote */
672
1485906
        for (;;) {
673
1668498
          ptr += MINBPC(enc);
674

1671516
          REQUIRE_CHAR(enc, ptr, end);
675










1666014
          switch (BYTE_TYPE(enc, ptr)) {
676







1482792
          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
677
          case BT_S: case BT_CR: case BT_LF:
678
182592
            continue;
679
          case BT_GT:
680
          gt:
681
2131968
            *nextTokPtr = ptr + MINBPC(enc);
682
2131968
            return XML_TOK_START_TAG_WITH_ATTS;
683
          case BT_SOL:
684
          sol:
685
876
            ptr += MINBPC(enc);
686

1326
            REQUIRE_CHAR(enc, ptr, end);
687


450
            if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
688
6
              *nextTokPtr = ptr;
689
6
              return XML_TOK_INVALID;
690
            }
691
420
            *nextTokPtr = ptr + MINBPC(enc);
692
420
            return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
693
          default:
694
            *nextTokPtr = ptr;
695
            return XML_TOK_INVALID;
696
          }
697
          break;
698
        }
699
1482702
        break;
700
      }
701
    default:
702
      *nextTokPtr = ptr;
703
      return XML_TOK_INVALID;
704
    }
705
  }
706
943338
  return XML_TOK_PARTIAL;
707
3651384
}
708
709
/* ptr points to character following "<" */
710
711
static int PTRCALL
712
PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
713
               const char **nextTokPtr)
714
{
715
#ifdef XML_NS
716
  int hadColon;
717
#endif
718

43479654
  REQUIRE_CHAR(enc, ptr, end);
719








21746130
  switch (BYTE_TYPE(enc, ptr)) {
720







11909538
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
721
  case BT_EXCL:
722
33336
    ptr += MINBPC(enc);
723

33822
    REQUIRE_CHAR(enc, ptr, end);
724



35514
    switch (BYTE_TYPE(enc, ptr)) {
725
    case BT_MINUS:
726
30522
      return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
727
    case BT_LSQB:
728
2322
      return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
729
                                      end, nextTokPtr);
730
    }
731
6
    *nextTokPtr = ptr;
732
6
    return XML_TOK_INVALID;
733
  case BT_QUEST:
734
2178
    return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
735
  case BT_SOL:
736
9791154
    return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
737
  default:
738
    *nextTokPtr = ptr;
739
    return XML_TOK_INVALID;
740
  }
741
#ifdef XML_NS
742
  hadColon = 0;
743
#endif
744
  /* we have a start-tag */
745

1804651014
  while (HAS_CHAR(enc, ptr, end)) {
746













901704402
    switch (BYTE_TYPE(enc, ptr)) {
747







889226511
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
748
#ifdef XML_NS
749
    case BT_COLON:
750

1191432
      if (hadColon) {
751
6
        *nextTokPtr = ptr;
752
6
        return XML_TOK_INVALID;
753
      }
754
      hadColon = 1;
755
1191426
      ptr += MINBPC(enc);
756

1192842
      REQUIRE_CHAR(enc, ptr, end);
757






1191846
      switch (BYTE_TYPE(enc, ptr)) {
758







1190412
      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
759
      default:
760
6
        *nextTokPtr = ptr;
761
6
        return XML_TOK_INVALID;
762
      }
763
714
      break;
764
#endif
765
    case BT_S: case BT_CR: case BT_LF:
766
      {
767
3654048
        ptr += MINBPC(enc);
768

7736154
        while (HAS_CHAR(enc, ptr, end)) {
769










4082466
          switch (BYTE_TYPE(enc, ptr)) {
770







3651474
          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
771
          case BT_GT:
772
            goto gt;
773
          case BT_SOL:
774
            goto sol;
775
          case BT_S: case BT_CR: case BT_LF:
776
428058
            ptr += MINBPC(enc);
777
428058
            continue;
778
          default:
779
            *nextTokPtr = ptr;
780
            return XML_TOK_INVALID;
781
          }
782
3651384
          return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
783
        }
784
2634
        return XML_TOK_PARTIAL;
785
      }
786
    case BT_GT:
787
    gt:
788
7623849
      *nextTokPtr = ptr + MINBPC(enc);
789
7623849
      return XML_TOK_START_TAG_NO_ATTS;
790
    case BT_SOL:
791
    sol:
792
1494
      ptr += MINBPC(enc);
793

2166
      REQUIRE_CHAR(enc, ptr, end);
794


870
      if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
795
        *nextTokPtr = ptr;
796
        return XML_TOK_INVALID;
797
      }
798
822
      *nextTokPtr = ptr + MINBPC(enc);
799
822
      return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
800
    default:
801
      *nextTokPtr = ptr;
802
      return XML_TOK_INVALID;
803
    }
804
  }
805
628557
  return XML_TOK_PARTIAL;
806
21738588
}
807
808
static int PTRCALL
809
PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
810
                   const char **nextTokPtr)
811
{
812

153235746
  if (ptr >= end)
813
16293
    return XML_TOK_NONE;
814
  if (MINBPC(enc) > 1) {
815
6414
    size_t n = end - ptr;
816

6414
    if (n & (MINBPC(enc) - 1)) {
817
3048
      n &= ~(MINBPC(enc) - 1);
818

3048
      if (n == 0)
819
432
        return XML_TOK_PARTIAL;
820
2616
      end = ptr + n;
821
2616
    }
822

5982
  }
823










76613112
  switch (BYTE_TYPE(enc, ptr)) {
824
  case BT_LT:
825
21738588
    return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
826
  case BT_AMP:
827
3039708
    return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
828
  case BT_CR:
829
60
    ptr += MINBPC(enc);
830

60
    if (! HAS_CHAR(enc, ptr, end))
831
48
      return XML_TOK_TRAILING_CR;
832


12
    if (BYTE_TYPE(enc, ptr) == BT_LF)
833
6
      ptr += MINBPC(enc);
834
12
    *nextTokPtr = ptr;
835
12
    return XML_TOK_DATA_NEWLINE;
836
  case BT_LF:
837
21391074
    *nextTokPtr = ptr + MINBPC(enc);
838
21391074
    return XML_TOK_DATA_NEWLINE;
839
  case BT_RSQB:
840
48
    ptr += MINBPC(enc);
841

48
    if (! HAS_CHAR(enc, ptr, end))
842
36
      return XML_TOK_TRAILING_RSQB;
843


12
    if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
844
      break;
845
    ptr += MINBPC(enc);
846
    if (! HAS_CHAR(enc, ptr, end))
847
      return XML_TOK_TRAILING_RSQB;
848
    if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
849
      ptr -= MINBPC(enc);
850
      break;
851
    }
852
    *nextTokPtr = ptr;
853
    return XML_TOK_INVALID;
854






2670
  INVALID_CASES(ptr, nextTokPtr)
855
  default:
856
30430284
    ptr += MINBPC(enc);
857
30430284
    break;
858
  }
859

973174476
  while (HAS_CHAR(enc, ptr, end)) {
860










486576990
    switch (BYTE_TYPE(enc, ptr)) {
861
#define LEAD_CASE(n) \
862
    case BT_LEAD ## n: \
863
      if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
864
        *nextTokPtr = ptr; \
865
        return XML_TOK_DATA_CHARS; \
866
      } \
867
      ptr += n; \
868
      break;
869
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
870
#undef LEAD_CASE
871
    case BT_RSQB:
872

180000
      if (HAS_CHARS(enc, ptr, end, 2)) {
873


180000
         if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
874
           ptr += MINBPC(enc);
875
180000
           break;
876
         }
877
         if (HAS_CHARS(enc, ptr, end, 3)) {
878
           if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
879
             ptr += MINBPC(enc);
880
             break;
881
           }
882
           *nextTokPtr = ptr + 2*MINBPC(enc);
883
           return XML_TOK_INVALID;
884
         }
885
      }
886
      /* fall through */
887
    case BT_AMP:
888
    case BT_LT:
889
    case BT_NONXML:
890
    case BT_MALFORM:
891
    case BT_TRAIL:
892
    case BT_CR:
893
    case BT_LF:
894
30420096
      *nextTokPtr = ptr;
895
30420096
      return XML_TOK_DATA_CHARS;
896
    default:
897
455976894
      ptr += MINBPC(enc);
898
455976894
      break;
899
    }
900
  }
901
10248
  *nextTokPtr = ptr;
902
10248
  return XML_TOK_DATA_CHARS;
903
76617873
}
904
905
/* ptr points to character following "%" */
906
907
static int PTRCALL
908
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
909
                    const char **nextTokPtr)
910
{
911

30450
  REQUIRE_CHAR(enc, ptr, end);
912









14118
  switch (BYTE_TYPE(enc, ptr)) {
913







13374
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
914
  case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
915
576
    *nextTokPtr = ptr;
916
576
    return XML_TOK_PERCENT;
917
  default:
918
    *nextTokPtr = ptr;
919
    return XML_TOK_INVALID;
920
  }
921

12623736
  while (HAS_CHAR(enc, ptr, end)) {
922









6299100
    switch (BYTE_TYPE(enc, ptr)) {
923







6298542
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
924
    case BT_SEMI:
925
486
      *nextTokPtr = ptr + MINBPC(enc);
926
486
      return XML_TOK_PARAM_ENTITY_REF;
927
    default:
928
      *nextTokPtr = ptr;
929
      return XML_TOK_INVALID;
930
    }
931
  }
932
12858
  return XML_TOK_PARTIAL;
933
14790
}
934
935
static int PTRCALL
936
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
937
                      const char **nextTokPtr)
938
{
939

20430
  REQUIRE_CHAR(enc, ptr, end);
940






8712
  switch (BYTE_TYPE(enc, ptr)) {
941







8646
  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
942
  default:
943
6
    *nextTokPtr = ptr;
944
6
    return XML_TOK_INVALID;
945
  }
946

77844
  while (HAS_CHAR(enc, ptr, end)) {
947













31434
    switch (BYTE_TYPE(enc, ptr)) {
948







30324
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
949
    case BT_CR: case BT_LF: case BT_S:
950
    case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
951
1062
      *nextTokPtr = ptr;
952
1062
      return XML_TOK_POUND_NAME;
953
    default:
954
      *nextTokPtr = ptr;
955
      return XML_TOK_INVALID;
956
    }
957
  }
958
7554
  return -XML_TOK_POUND_NAME;
959
9684
}
960
961
static int PTRCALL
962
PREFIX(scanLit)(int open, const ENCODING *enc,
963
                const char *ptr, const char *end,
964
                const char **nextTokPtr)
965
{
966

1101006123
  while (HAS_CHAR(enc, ptr, end)) {
967

548802300
    int t = BYTE_TYPE(enc, ptr);
968






548790228
    switch (t) {
969






66
    INVALID_CASES(ptr, nextTokPtr)
970
    case BT_QUOT:
971
    case BT_APOS:
972
97122
      ptr += MINBPC(enc);
973

97122
      if (t != open)
974
        break;
975

72648
      if (! HAS_CHAR(enc, ptr, end))
976
6330
        return -XML_TOK_LITERAL;
977
66318
      *nextTokPtr = ptr;
978






66450
      switch (BYTE_TYPE(enc, ptr)) {
979
      case BT_S: case BT_CR: case BT_LF:
980
      case BT_GT: case BT_PERCNT: case BT_LSQB:
981
66318
        return XML_TOK_LITERAL;
982
      default:
983
        return XML_TOK_INVALID;
984
      }
985
    default:
986
548693082
      ptr += MINBPC(enc);
987
548693082
      break;
988
    }
989

548717574
  }
990
1117671
  return XML_TOK_PARTIAL;
991
1190325
}
992
993
static int PTRCALL
994
PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
995
                  const char **nextTokPtr)
996
{
997
  int tok;
998

6104658
  if (ptr >= end)
999
111591
    return XML_TOK_NONE;
1000
  if (MINBPC(enc) > 1) {
1001
21228
    size_t n = end - ptr;
1002

21228
    if (n & (MINBPC(enc) - 1)) {
1003
10260
      n &= ~(MINBPC(enc) - 1);
1004

10260
      if (n == 0)
1005
990
        return XML_TOK_PARTIAL;
1006
9270
      end = ptr + n;
1007
9270
    }
1008

20238
  }
1009




















2980224
  switch (BYTE_TYPE(enc, ptr)) {
1010
  case BT_QUOT:
1011
60024
    return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
1012
  case BT_APOS:
1013
1130301
    return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
1014
  case BT_LT:
1015
    {
1016
523455
      ptr += MINBPC(enc);
1017

534648
      REQUIRE_CHAR(enc, ptr, end);
1018







543870
      switch (BYTE_TYPE(enc, ptr)) {
1019
      case BT_EXCL:
1020
257619
        return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1021
      case BT_QUEST:
1022
218532
        return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1023
      case BT_NMSTRT:
1024
      case BT_HEX:
1025
      case BT_NONASCII:
1026
      case BT_LEAD2:
1027
      case BT_LEAD3:
1028
      case BT_LEAD4:
1029
36099
        *nextTokPtr = ptr - MINBPC(enc);
1030
36099
        return XML_TOK_INSTANCE_START;
1031
      }
1032
12
      *nextTokPtr = ptr;
1033
12
      return XML_TOK_INVALID;
1034
    }
1035
  case BT_CR:
1036

18
    if (ptr + MINBPC(enc) == end) {
1037
18
      *nextTokPtr = end;
1038
      /* indicate that this might be part of a CR/LF pair */
1039
18
      return -XML_TOK_PROLOG_S;
1040
    }
1041
    /* fall through */
1042
  case BT_S: case BT_LF:
1043
1468887
    for (;;) {
1044
1468887
      ptr += MINBPC(enc);
1045

1468887
      if (! HAS_CHAR(enc, ptr, end))
1046
        break;
1047




1385496
      switch (BYTE_TYPE(enc, ptr)) {
1048
      case BT_S: case BT_LF:
1049
        break;
1050
      case BT_CR:
1051
        /* don't split CR/LF pair */
1052
        if (ptr + MINBPC(enc) != end)
1053
          break;
1054
        /* fall through */
1055
      default:
1056
335460
        *nextTokPtr = ptr;
1057
335460
        return XML_TOK_PROLOG_S;
1058
      }
1059
    }
1060
83403
    *nextTokPtr = ptr;
1061
83403
    return XML_TOK_PROLOG_S;
1062
  case BT_PERCNT:
1063
14580
    return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1064
  case BT_COMMA:
1065
1956
    *nextTokPtr = ptr + MINBPC(enc);
1066
1956
    return XML_TOK_COMMA;
1067
  case BT_LSQB:
1068
3423
    *nextTokPtr = ptr + MINBPC(enc);
1069
3423
    return XML_TOK_OPEN_BRACKET;
1070
  case BT_RSQB:
1071
4794
    ptr += MINBPC(enc);
1072

4794
    if (! HAS_CHAR(enc, ptr, end))
1073
2397
      return -XML_TOK_CLOSE_BRACKET;
1074


2427
    if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1075
      REQUIRE_CHARS(enc, ptr, end, 2);
1076
      if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1077
        *nextTokPtr = ptr + 2*MINBPC(enc);
1078
        return XML_TOK_COND_SECT_CLOSE;
1079
      }
1080
    }
1081
2397
    *nextTokPtr = ptr;
1082
2397
    return XML_TOK_CLOSE_BRACKET;
1083
  case BT_LPAR:
1084
2934
    *nextTokPtr = ptr + MINBPC(enc);
1085
2934
    return XML_TOK_OPEN_PAREN;
1086
  case BT_RPAR:
1087
4284
    ptr += MINBPC(enc);
1088

4284
    if (! HAS_CHAR(enc, ptr, end))
1089
2136
      return -XML_TOK_CLOSE_PAREN;
1090









2148
    switch (BYTE_TYPE(enc, ptr)) {
1091
    case BT_AST:
1092
210
      *nextTokPtr = ptr + MINBPC(enc);
1093
210
      return XML_TOK_CLOSE_PAREN_ASTERISK;
1094
    case BT_QUEST:
1095
      *nextTokPtr = ptr + MINBPC(enc);
1096
      return XML_TOK_CLOSE_PAREN_QUESTION;
1097
    case BT_PLUS:
1098
42
      *nextTokPtr = ptr + MINBPC(enc);
1099
42
      return XML_TOK_CLOSE_PAREN_PLUS;
1100
    case BT_CR: case BT_LF: case BT_S:
1101
    case BT_GT: case BT_COMMA: case BT_VERBAR:
1102
    case BT_RPAR:
1103
1896
      *nextTokPtr = ptr;
1104
1896
      return XML_TOK_CLOSE_PAREN;
1105
    }
1106
    *nextTokPtr = ptr;
1107
    return XML_TOK_INVALID;
1108
  case BT_VERBAR:
1109
5430
    *nextTokPtr = ptr + MINBPC(enc);
1110
5430
    return XML_TOK_OR;
1111
  case BT_GT:
1112
39399
    *nextTokPtr = ptr + MINBPC(enc);
1113
39399
    return XML_TOK_DECL_CLOSE;
1114
  case BT_NUM:
1115
9684
    return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1116
#define LEAD_CASE(n) \
1117
  case BT_LEAD ## n: \
1118
    if (end - ptr < n) \
1119
      return XML_TOK_PARTIAL_CHAR; \
1120
    if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1121
      ptr += n; \
1122
      tok = XML_TOK_NAME; \
1123
      break; \
1124
    } \
1125
    if (IS_NAME_CHAR(enc, ptr, n)) { \
1126
      ptr += n; \
1127
      tok = XML_TOK_NMTOKEN; \
1128
      break; \
1129
    } \
1130
    *nextTokPtr = ptr; \
1131
    return XML_TOK_INVALID;
1132







180
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1133
#undef LEAD_CASE
1134
  case BT_NMSTRT:
1135
  case BT_HEX:
1136
    tok = XML_TOK_NAME;
1137
720405
    ptr += MINBPC(enc);
1138
720405
    break;
1139
  case BT_DIGIT:
1140
  case BT_NAME:
1141
  case BT_MINUS:
1142
#ifdef XML_NS
1143
  case BT_COLON:
1144
#endif
1145
    tok = XML_TOK_NMTOKEN;
1146
36
    ptr += MINBPC(enc);
1147
36
    break;
1148
  case BT_NONASCII:
1149

48
    if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1150
30
      ptr += MINBPC(enc);
1151
      tok = XML_TOK_NAME;
1152
30
      break;
1153
    }
1154

18
    if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1155
18
      ptr += MINBPC(enc);
1156
      tok = XML_TOK_NMTOKEN;
1157
18
      break;
1158
    }
1159
    /* fall through */
1160
  default:
1161
30
    *nextTokPtr = ptr;
1162
30
    return XML_TOK_INVALID;
1163
  }
1164

515765292
  while (HAS_CHAR(enc, ptr, end)) {
1165


















257259369
    switch (BYTE_TYPE(enc, ptr)) {
1166







257157243
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1167
    case BT_GT: case BT_RPAR: case BT_COMMA:
1168
    case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
1169
    case BT_S: case BT_CR: case BT_LF:
1170
90174
      *nextTokPtr = ptr;
1171
90174
      return tok;
1172
#ifdef XML_NS
1173
    case BT_COLON:
1174
5640
      ptr += MINBPC(enc);
1175


5640
      switch (tok) {
1176
      case XML_TOK_NAME:
1177

6270
        REQUIRE_CHAR(enc, ptr, end);
1178
        tok = XML_TOK_PREFIXED_NAME;
1179








4944
        switch (BYTE_TYPE(enc, ptr)) {
1180







4902
        CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1181
        default:
1182
          tok = XML_TOK_NMTOKEN;
1183
6
          break;
1184
        }
1185
        break;
1186
      case XML_TOK_PREFIXED_NAME:
1187
        tok = XML_TOK_NMTOKEN;
1188
30
        break;
1189
      }
1190
      break;
1191
#endif
1192
    case BT_PLUS:
1193

174
      if (tok == XML_TOK_NMTOKEN)  {
1194
6
        *nextTokPtr = ptr;
1195
6
        return XML_TOK_INVALID;
1196
      }
1197
168
      *nextTokPtr = ptr + MINBPC(enc);
1198
168
      return XML_TOK_NAME_PLUS;
1199
    case BT_AST:
1200

6
      if (tok == XML_TOK_NMTOKEN)  {
1201
6
        *nextTokPtr = ptr;
1202
6
        return XML_TOK_INVALID;
1203
      }
1204
      *nextTokPtr = ptr + MINBPC(enc);
1205
      return XML_TOK_NAME_ASTERISK;
1206
    case BT_QUEST:
1207

1944
      if (tok == XML_TOK_NMTOKEN)  {
1208
6
        *nextTokPtr = ptr;
1209
6
        return XML_TOK_INVALID;
1210
      }
1211
1938
      *nextTokPtr = ptr + MINBPC(enc);
1212
1938
      return XML_TOK_NAME_QUESTION;
1213
    default:
1214
6
      *nextTokPtr = ptr;
1215
6
      return XML_TOK_INVALID;
1216
    }
1217
  }
1218
627507
  return -tok;
1219
3052329
}
1220
1221
static int PTRCALL
1222
PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
1223
                          const char *end, const char **nextTokPtr)
1224
{
1225
  const char *start;
1226

7392
  if (ptr >= end)
1227
534
    return XML_TOK_NONE;
1228

3162
  else if (! HAS_CHAR(enc, ptr, end)) {
1229
    /* This line cannot be executed.  The incoming data has already
1230
     * been tokenized once, so incomplete characters like this have
1231
     * already been eliminated from the input.  Retaining the paranoia
1232
     * check is still valuable, however.
1233
     */
1234
    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1235
  }
1236
  start = ptr;
1237

475116
  while (HAS_CHAR(enc, ptr, end)) {
1238







237192
    switch (BYTE_TYPE(enc, ptr)) {
1239
#define LEAD_CASE(n) \
1240
    case BT_LEAD ## n: ptr += n; break;
1241
6
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1242
#undef LEAD_CASE
1243
    case BT_AMP:
1244

282
      if (ptr == start)
1245
258
        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1246
24
      *nextTokPtr = ptr;
1247
24
      return XML_TOK_DATA_CHARS;
1248
    case BT_LT:
1249
      /* this is for inside entity references */
1250
6
      *nextTokPtr = ptr;
1251
6
      return XML_TOK_INVALID;
1252
    case BT_LF:
1253

12
      if (ptr == start) {
1254
12
        *nextTokPtr = ptr + MINBPC(enc);
1255
12
        return XML_TOK_DATA_NEWLINE;
1256
      }
1257
      *nextTokPtr = ptr;
1258
      return XML_TOK_DATA_CHARS;
1259
    case BT_CR:
1260

18
      if (ptr == start) {
1261
12
        ptr += MINBPC(enc);
1262

12
        if (! HAS_CHAR(enc, ptr, end))
1263
6
          return XML_TOK_TRAILING_CR;
1264


6
        if (BYTE_TYPE(enc, ptr) == BT_LF)
1265
6
          ptr += MINBPC(enc);
1266
6
        *nextTokPtr = ptr;
1267
6
        return XML_TOK_DATA_NEWLINE;
1268
      }
1269
6
      *nextTokPtr = ptr;
1270
6
      return XML_TOK_DATA_CHARS;
1271
    case BT_S:
1272

2406
      if (ptr == start) {
1273
1440
        *nextTokPtr = ptr + MINBPC(enc);
1274
1440
        return XML_TOK_ATTRIBUTE_VALUE_S;
1275
      }
1276
966
      *nextTokPtr = ptr;
1277
966
      return XML_TOK_DATA_CHARS;
1278
    default:
1279
234390
      ptr += MINBPC(enc);
1280
234390
      break;
1281
    }
1282
  }
1283
438
  *nextTokPtr = ptr;
1284
438
  return XML_TOK_DATA_CHARS;
1285
3696
}
1286
1287
static int PTRCALL
1288
PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
1289
                       const char *end, const char **nextTokPtr)
1290
{
1291
  const char *start;
1292

2988
  if (ptr >= end)
1293
639
    return XML_TOK_NONE;
1294

855
  else if (! HAS_CHAR(enc, ptr, end)) {
1295
    /* This line cannot be executed.  The incoming data has already
1296
     * been tokenized once, so incomplete characters like this have
1297
     * already been eliminated from the input.  Retaining the paranoia
1298
     * check is still valuable, however.
1299
     */
1300
    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1301
  }
1302
  start = ptr;
1303

458766
  while (HAS_CHAR(enc, ptr, end)) {
1304







229356
    switch (BYTE_TYPE(enc, ptr)) {
1305
#define LEAD_CASE(n) \
1306
    case BT_LEAD ## n: ptr += n; break;
1307
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1308
#undef LEAD_CASE
1309
    case BT_AMP:
1310

108
      if (ptr == start)
1311
84
        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1312
24
      *nextTokPtr = ptr;
1313
24
      return XML_TOK_DATA_CHARS;
1314
    case BT_PERCNT:
1315

210
      if (ptr == start) {
1316
210
        int tok =  PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
1317
                                       end, nextTokPtr);
1318
210
        return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1319
      }
1320
      *nextTokPtr = ptr;
1321
      return XML_TOK_DATA_CHARS;
1322
    case BT_LF:
1323

90
      if (ptr == start) {
1324
72
        *nextTokPtr = ptr + MINBPC(enc);
1325
72
        return XML_TOK_DATA_NEWLINE;
1326
      }
1327
18
      *nextTokPtr = ptr;
1328
18
      return XML_TOK_DATA_CHARS;
1329
    case BT_CR:
1330

12
      if (ptr == start) {
1331
6
        ptr += MINBPC(enc);
1332

6
        if (! HAS_CHAR(enc, ptr, end))
1333
6
          return XML_TOK_TRAILING_CR;
1334
        if (BYTE_TYPE(enc, ptr) == BT_LF)
1335
          ptr += MINBPC(enc);
1336
        *nextTokPtr = ptr;
1337
        return XML_TOK_DATA_NEWLINE;
1338
      }
1339
6
      *nextTokPtr = ptr;
1340
6
      return XML_TOK_DATA_CHARS;
1341
    default:
1342
228528
      ptr += MINBPC(enc);
1343
228528
      break;
1344
    }
1345
  }
1346
435
  *nextTokPtr = ptr;
1347
435
  return XML_TOK_DATA_CHARS;
1348
1494
}
1349
1350
#ifdef XML_DTD
1351
1352
static int PTRCALL
1353
PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
1354
                         const char *end, const char **nextTokPtr)
1355
{
1356
  int level = 0;
1357
  if (MINBPC(enc) > 1) {
1358
1272
    size_t n = end - ptr;
1359

636
    if (n & (MINBPC(enc) - 1)) {
1360
312
      n &= ~(MINBPC(enc) - 1);
1361
312
      end = ptr + n;
1362
312
    }
1363
  }
1364

21672
  while (HAS_CHAR(enc, ptr, end)) {
1365







24876
    switch (BYTE_TYPE(enc, ptr)) {
1366






60
    INVALID_CASES(ptr, nextTokPtr)
1367
    case BT_LT:
1368
876
      ptr += MINBPC(enc);
1369

918
      REQUIRE_CHAR(enc, ptr, end);
1370


1422
      if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1371
834
        ptr += MINBPC(enc);
1372

876
        REQUIRE_CHAR(enc, ptr, end);
1373


1356
        if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1374
          ++level;
1375
          ptr += MINBPC(enc);
1376
        }
1377
      }
1378
      break;
1379
    case BT_RSQB:
1380
78
      ptr += MINBPC(enc);
1381

108
      REQUIRE_CHAR(enc, ptr, end);
1382


84
      if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1383
48
        ptr += MINBPC(enc);
1384

78
        REQUIRE_CHAR(enc, ptr, end);
1385


30
        if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1386
18
          ptr += MINBPC(enc);
1387

18
          if (level == 0) {
1388
18
            *nextTokPtr = ptr;
1389
18
            return XML_TOK_IGNORE_SECT;
1390
          }
1391
          --level;
1392
        }
1393
      }
1394
      break;
1395
    default:
1396
8940
      ptr += MINBPC(enc);
1397
8940
      break;
1398
    }
1399
  }
1400
756
  return XML_TOK_PARTIAL;
1401
948
}
1402
1403
#endif /* XML_DTD */
1404
1405
static int PTRCALL
1406
PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1407
                   const char **badPtr)
1408
{
1409
60828
  ptr += MINBPC(enc);
1410
30414
  end -= MINBPC(enc);
1411

2509464
  for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
1412


















1888932
    switch (BYTE_TYPE(enc, ptr)) {
1413
    case BT_DIGIT:
1414
    case BT_HEX:
1415
    case BT_MINUS:
1416
    case BT_APOS:
1417
    case BT_LPAR:
1418
    case BT_RPAR:
1419
    case BT_PLUS:
1420
    case BT_COMMA:
1421
    case BT_SOL:
1422
    case BT_EQUALS:
1423
    case BT_QUEST:
1424
    case BT_CR:
1425
    case BT_LF:
1426
    case BT_SEMI:
1427
    case BT_EXCL:
1428
    case BT_AST:
1429
    case BT_PERCNT:
1430
    case BT_NUM:
1431
#ifdef XML_NS
1432
    case BT_COLON:
1433
#endif
1434
      break;
1435
    case BT_S:
1436


90000
      if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1437
        *badPtr = ptr;
1438
        return 0;
1439
      }
1440
      break;
1441
    case BT_NAME:
1442
    case BT_NMSTRT:
1443


574584
      if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1444
        break;
1445
    default:
1446



6
      switch (BYTE_TO_ASCII(enc, ptr)) {
1447
      case 0x24: /* $ */
1448
      case 0x40: /* @ */
1449
        break;
1450
      default:
1451
6
        *badPtr = ptr;
1452
6
        return 0;
1453
      }
1454
      break;
1455
    }
1456
  }
1457
30408
  return 1;
1458
30414
}
1459
1460
/* This must only be called for a well-formed start-tag or empty
1461
   element tag.  Returns the number of attributes.  Pointers to the
1462
   first attsMax attributes are stored in atts.
1463
*/
1464
1465
static int PTRCALL
1466
PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
1467
                int attsMax, ATTRIBUTE *atts)
1468
{
1469
  enum { other, inName, inValue } state = inName;
1470
  int nAtts = 0;
1471
  int open = 0; /* defined when state == inValue;
1472
                   initialization just to shut up compilers */
1473
1474
148945839
  for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1475












261504741
    switch (BYTE_TYPE(enc, ptr)) {
1476
#define START_NAME \
1477
      if (state == other) { \
1478
        if (nAtts < attsMax) { \
1479
          atts[nAtts].name = ptr; \
1480
          atts[nAtts].normalized = 1; \
1481
        } \
1482
        state = inName; \
1483
      }
1484
#define LEAD_CASE(n) \
1485
    case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1486









72
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1487
#undef LEAD_CASE
1488
    case BT_NONASCII:
1489
    case BT_NMSTRT:
1490
    case BT_HEX:
1491



118024887
      START_NAME
1492
      break;
1493
#undef START_NAME
1494
    case BT_QUOT:
1495

5700012
      if (state != inValue) {
1496

2850006
        if (nAtts < attsMax)
1497
2850006
          atts[nAtts].valuePtr = ptr + MINBPC(enc);
1498
        state = inValue;
1499
        open = BT_QUOT;
1500
2850006
      }
1501

2850006
      else if (open == BT_QUOT) {
1502
        state = other;
1503

2850006
        if (nAtts < attsMax)
1504
2850006
          atts[nAtts].valueEnd = ptr;
1505
2850006
        nAtts++;
1506
2850006
      }
1507
      break;
1508
    case BT_APOS:
1509

6408
      if (state != inValue) {
1510

3204
        if (nAtts < attsMax)
1511
3180
          atts[nAtts].valuePtr = ptr + MINBPC(enc);
1512
        state = inValue;
1513
        open = BT_APOS;
1514
3204
      }
1515

3204
      else if (open == BT_APOS) {
1516
        state = other;
1517

3204
        if (nAtts < attsMax)
1518
3180
          atts[nAtts].valueEnd = ptr;
1519
3204
        nAtts++;
1520
3204
      }
1521
      break;
1522
    case BT_AMP:
1523

258
      if (nAtts < attsMax)
1524
258
        atts[nAtts].normalized = 0;
1525
      break;
1526
    case BT_S:
1527

5975112
      if (state == inName)
1528
3211740
        state = other;
1529

2763444
      else if (state == inValue
1530

2763858
               && nAtts < attsMax
1531

972
               && atts[nAtts].normalized
1532

600
               && (ptr == atts[nAtts].valuePtr
1533


186
                   || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1534


144
                   || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1535


144
                   || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1536
42
        atts[nAtts].normalized = 0;
1537
      break;
1538
    case BT_CR: case BT_LF:
1539
      /* This case ensures that the first attribute name is counted
1540
         Apart from that we could just change state on the quote. */
1541

390474
      if (state == inName)
1542
270072
        state = other;
1543



120420
      else if (state == inValue && nAtts < attsMax)
1544
18
        atts[nAtts].normalized = 0;
1545
      break;
1546
    case BT_GT:
1547
    case BT_SOL:
1548

10529007
      if (state != inValue)
1549
9755373
        return nAtts;
1550
      break;
1551
    default:
1552
      break;
1553
    }
1554
  }
1555
  /* not reached */
1556
}
1557
1558
static int PTRFASTCALL
1559
PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
1560
{
1561
  int result = 0;
1562
  /* skip &# */
1563
948
  ptr += 2*MINBPC(enc);
1564


498
  if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1565

1776
    for (ptr += MINBPC(enc);
1566

1008
         !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
1567
654
         ptr += MINBPC(enc)) {
1568

192
      int c = BYTE_TO_ASCII(enc, ptr);
1569

















1320
      switch (c) {
1570
      case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
1571
      case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
1572
414
        result <<= 4;
1573
414
        result |= (c - ASCII_0);
1574
414
        break;
1575
      case ASCII_A: case ASCII_B: case ASCII_C:
1576
      case ASCII_D: case ASCII_E: case ASCII_F:
1577
246
        result <<= 4;
1578
246
        result += 10 + (c - ASCII_A);
1579
246
        break;
1580
      case ASCII_a: case ASCII_b: case ASCII_c:
1581
      case ASCII_d: case ASCII_e: case ASCII_f:
1582
        result <<= 4;
1583
        result += 10 + (c - ASCII_a);
1584
        break;
1585
      }
1586

660
      if (result >= 0x110000)
1587
6
        return -1;
1588

654
    }
1589
  }
1590
  else {
1591


1956
    for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1592

108
      int c = BYTE_TO_ASCII(enc, ptr);
1593
696
      result *= 10;
1594
696
      result += (c - ASCII_0);
1595

696
      if (result >= 0x110000)
1596
6
        return -1;
1597

690
    }
1598
  }
1599
462
  return checkCharRefNumber(result);
1600
474
}
1601
1602
static int PTRCALL
1603
PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
1604
                             const char *end)
1605
{
1606



6012699
  switch ((end - ptr)/MINBPC(enc)) {
1607
  case 2:
1608


2852997
    if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1609



2850030
      switch (BYTE_TO_ASCII(enc, ptr)) {
1610
      case ASCII_l:
1611
1590012
        return ASCII_LT;
1612
      case ASCII_g:
1613
1260012
        return ASCII_GT;
1614
      }
1615
    }
1616
    break;
1617
  case 3:
1618


150306
    if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1619
150030
      ptr += MINBPC(enc);
1620


150030
      if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1621
150030
        ptr += MINBPC(enc);
1622


150030
        if (CHAR_MATCHES(enc, ptr, ASCII_p))
1623
150024
          return ASCII_AMP;
1624
      }
1625
    }
1626
    break;
1627
  case 4:
1628



60
    switch (BYTE_TO_ASCII(enc, ptr)) {
1629
    case ASCII_q:
1630
18
      ptr += MINBPC(enc);
1631


18
      if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1632
18
        ptr += MINBPC(enc);
1633


18
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1634
12
          ptr += MINBPC(enc);
1635


12
          if (CHAR_MATCHES(enc, ptr, ASCII_t))
1636
12
            return ASCII_QUOT;
1637
        }
1638
      }
1639
      break;
1640
    case ASCII_a:
1641
30
      ptr += MINBPC(enc);
1642


30
      if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1643
30
        ptr += MINBPC(enc);
1644


30
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1645
30
          ptr += MINBPC(enc);
1646


30
          if (CHAR_MATCHES(enc, ptr, ASCII_s))
1647
24
            return ASCII_APOS;
1648
        }
1649
      }
1650
      break;
1651
    }
1652
  }
1653
4653
  return 0;
1654
3004737
}
1655
1656
static int PTRCALL
1657
PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
1658
                         const char *end1, const char *ptr2)
1659
{
1660

2535114
  for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1661

1032699
    if (end1 - ptr1 < MINBPC(enc)) {
1662
      /* This line cannot be executed.  THe incoming data has already
1663
       * been tokenized once, so imcomplete characters like this have
1664
       * already been eliminated from the input.  Retaining the
1665
       * paranoia check is still valuable, however.
1666
       */
1667
      return 0; /* LCOV_EXCL_LINE */
1668
    }
1669


1036401
    if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1670
39102
      return 0;
1671
  }
1672
143538
  return ptr1 == end1;
1673
182640
}
1674
1675
static int PTRFASTCALL
1676
PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
1677
{
1678
  const char *start = ptr;
1679
190030551
  for (;;) {
1680









167672436
    switch (BYTE_TYPE(enc, ptr)) {
1681
#define LEAD_CASE(n) \
1682
    case BT_LEAD ## n: ptr += n; break;
1683
72
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1684
#undef LEAD_CASE
1685
    case BT_NONASCII:
1686
    case BT_NMSTRT:
1687
#ifdef XML_NS
1688
    case BT_COLON:
1689
#endif
1690
    case BT_HEX:
1691
    case BT_DIGIT:
1692
    case BT_NAME:
1693
    case BT_MINUS:
1694
145308873
      ptr += MINBPC(enc);
1695
145308873
      break;
1696
    default:
1697
22360803
      return (int)(ptr - start);
1698
    }
1699
  }
1700
}
1701
1702
static const char * PTRFASTCALL
1703
PREFIX(skipS)(const ENCODING *enc, const char *ptr)
1704
{
1705
384
  for (;;) {
1706




276
    switch (BYTE_TYPE(enc, ptr)) {
1707
    case BT_LF:
1708
    case BT_CR:
1709
    case BT_S:
1710
120
      ptr += MINBPC(enc);
1711
      break;
1712
    default:
1713
132
      return ptr;
1714
    }
1715
  }
1716
}
1717
1718
static void PTRCALL
1719
PREFIX(updatePosition)(const ENCODING *enc,
1720
                       const char *ptr,
1721
                       const char *end,
1722
                       POSITION *pos)
1723
{
1724

18238203
  while (HAS_CHAR(enc, ptr, end)) {
1725





2562060
    switch (BYTE_TYPE(enc, ptr)) {
1726
#define LEAD_CASE(n) \
1727
    case BT_LEAD ## n: \
1728
      ptr += n; \
1729
      break;
1730
108
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1731
#undef LEAD_CASE
1732
    case BT_LF:
1733
13620
      pos->columnNumber = (XML_Size)-1;
1734
13620
      pos->lineNumber++;
1735
13620
      ptr += MINBPC(enc);
1736
13620
      break;
1737
    case BT_CR:
1738
36
      pos->lineNumber++;
1739
36
      ptr += MINBPC(enc);
1740




60
      if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
1741
12
        ptr += MINBPC(enc);
1742
36
      pos->columnNumber = (XML_Size)-1;
1743
36
      break;
1744
    default:
1745
2524092
      ptr += MINBPC(enc);
1746
2524092
      break;
1747
    }
1748
2537856
    pos->columnNumber++;
1749
  }
1750
4387497
}
1751
1752
#undef DO_LEAD_CASE
1753
#undef MULTIBYTE_CASES
1754
#undef INVALID_CASES
1755
#undef CHECK_NAME_CASE
1756
#undef CHECK_NAME_CASES
1757
#undef CHECK_NMSTRT_CASE
1758
#undef CHECK_NMSTRT_CASES
1759
1760
#endif /* XML_TOK_IMPL_C */