GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: lib/libexpat/lib/xmltok.c Lines: 366 477 76.7 %
Date: 2017-11-13 Branches: 264 458 57.6 %

Line Branch Exec Source
1
/*
2
                            __  __            _
3
                         ___\ \/ /_ __   __ _| |_
4
                        / _ \\  /| '_ \ / _` | __|
5
                       |  __//  \| |_) | (_| | |_
6
                        \___/_/\_\ .__/ \__,_|\__|
7
                                 |_| XML parser
8
9
   Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10
   Copyright (c) 2000-2017 Expat development team
11
   Licensed under the MIT license:
12
13
   Permission is  hereby granted,  free of charge,  to any  person obtaining
14
   a  copy  of  this  software   and  associated  documentation  files  (the
15
   "Software"),  to  deal in  the  Software  without restriction,  including
16
   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17
   distribute, sublicense, and/or sell copies of the Software, and to permit
18
   persons  to whom  the Software  is  furnished to  do so,  subject to  the
19
   following conditions:
20
21
   The above copyright  notice and this permission notice  shall be included
22
   in all copies or substantial portions of the Software.
23
24
   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25
   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27
   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28
   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29
   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30
   USE OR OTHER DEALINGS IN THE SOFTWARE.
31
*/
32
33
#include <stddef.h>
34
#include <string.h>  /* memcpy */
35
36
#if defined(_MSC_VER) && (_MSC_VER <= 1700)
37
  /* for vs2012/11.0/1700 and earlier Visual Studio compilers */
38
# define bool   int
39
# define false  0
40
# define true   1
41
#else
42
# include <stdbool.h>
43
#endif
44
45
46
#ifdef _WIN32
47
#include "winconfig.h"
48
#else
49
#ifdef HAVE_EXPAT_CONFIG_H
50
#include <expat_config.h>
51
#endif
52
#endif /* ndef _WIN32 */
53
54
#include "expat_external.h"
55
#include "internal.h"
56
#include "xmltok.h"
57
#include "nametab.h"
58
59
#ifdef XML_DTD
60
#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
61
#else
62
#define IGNORE_SECTION_TOK_VTABLE /* as nothing */
63
#endif
64
65
#define VTABLE1 \
66
  { PREFIX(prologTok), PREFIX(contentTok), \
67
    PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
68
  { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
69
  PREFIX(nameMatchesAscii), \
70
  PREFIX(nameLength), \
71
  PREFIX(skipS), \
72
  PREFIX(getAtts), \
73
  PREFIX(charRefNumber), \
74
  PREFIX(predefinedEntityName), \
75
  PREFIX(updatePosition), \
76
  PREFIX(isPublicId)
77
78
#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
79
80
#define UCS2_GET_NAMING(pages, hi, lo) \
81
   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
82
83
/* A 2 byte UTF-8 representation splits the characters 11 bits between
84
   the bottom 5 and 6 bits of the bytes.  We need 8 bits to index into
85
   pages, 3 bits to add to that index and 5 bits to generate the mask.
86
*/
87
#define UTF8_GET_NAMING2(pages, byte) \
88
    (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
89
                      + ((((byte)[0]) & 3) << 1) \
90
                      + ((((byte)[1]) >> 5) & 1)] \
91
         & (1u << (((byte)[1]) & 0x1F)))
92
93
/* A 3 byte UTF-8 representation splits the characters 16 bits between
94
   the bottom 4, 6 and 6 bits of the bytes.  We need 8 bits to index
95
   into pages, 3 bits to add to that index and 5 bits to generate the
96
   mask.
97
*/
98
#define UTF8_GET_NAMING3(pages, byte) \
99
  (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
100
                             + ((((byte)[1]) >> 2) & 0xF)] \
101
                       << 3) \
102
                      + ((((byte)[1]) & 3) << 1) \
103
                      + ((((byte)[2]) >> 5) & 1)] \
104
         & (1u << (((byte)[2]) & 0x1F)))
105
106
#define UTF8_GET_NAMING(pages, p, n) \
107
  ((n) == 2 \
108
  ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
109
  : ((n) == 3 \
110
     ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
111
     : 0))
112
113
/* Detection of invalid UTF-8 sequences is based on Table 3.1B
114
   of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
115
   with the additional restriction of not allowing the Unicode
116
   code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
117
   Implementation details:
118
     (A & 0x80) == 0     means A < 0x80
119
   and
120
     (A & 0xC0) == 0xC0  means A > 0xBF
121
*/
122
123
#define UTF8_INVALID2(p) \
124
  ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
125
126
#define UTF8_INVALID3(p) \
127
  (((p)[2] & 0x80) == 0 \
128
  || \
129
  ((*p) == 0xEF && (p)[1] == 0xBF \
130
    ? \
131
    (p)[2] > 0xBD \
132
    : \
133
    ((p)[2] & 0xC0) == 0xC0) \
134
  || \
135
  ((*p) == 0xE0 \
136
    ? \
137
    (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
138
    : \
139
    ((p)[1] & 0x80) == 0 \
140
    || \
141
    ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
142
143
#define UTF8_INVALID4(p) \
144
  (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \
145
  || \
146
  ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \
147
  || \
148
  ((*p) == 0xF0 \
149
    ? \
150
    (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
151
    : \
152
    ((p)[1] & 0x80) == 0 \
153
    || \
154
    ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
155
156
static int PTRFASTCALL
157
isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p))
158
{
159
12
  return 0;
160
}
161
162
static int PTRFASTCALL
163
utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p)
164
{
165
  return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
166
}
167
168
static int PTRFASTCALL
169
utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p)
170
{
171
12
  return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
172
}
173
174
#define utf8_isName4 isNever
175
176
static int PTRFASTCALL
177
utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p)
178
{
179
  return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
180
}
181
182
static int PTRFASTCALL
183
utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p)
184
{
185
12
  return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
186
}
187
188
#define utf8_isNmstrt4 isNever
189
190
static int PTRFASTCALL
191
utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p)
192
{
193

1146
  return UTF8_INVALID2((const unsigned char *)p);
194
}
195
196
static int PTRFASTCALL
197
utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p)
198
{
199




528
  return UTF8_INVALID3((const unsigned char *)p);
200
}
201
202
static int PTRFASTCALL
203
utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p)
204
{
205



90
  return UTF8_INVALID4((const unsigned char *)p);
206
}
207
208
struct normal_encoding {
209
  ENCODING enc;
210
  unsigned char type[256];
211
#ifdef XML_MIN_SIZE
212
  int (PTRFASTCALL *byteType)(const ENCODING *, const char *);
213
  int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
214
  int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
215
  int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
216
  int (PTRCALL *charMatches)(const ENCODING *, const char *, int);
217
#endif /* XML_MIN_SIZE */
218
  int (PTRFASTCALL *isName2)(const ENCODING *, const char *);
219
  int (PTRFASTCALL *isName3)(const ENCODING *, const char *);
220
  int (PTRFASTCALL *isName4)(const ENCODING *, const char *);
221
  int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
222
  int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
223
  int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
224
  int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
225
  int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
226
  int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
227
};
228
229
#define AS_NORMAL_ENCODING(enc)   ((const struct normal_encoding *) (enc))
230
231
#ifdef XML_MIN_SIZE
232
233
#define STANDARD_VTABLE(E) \
234
 E ## byteType, \
235
 E ## isNameMin, \
236
 E ## isNmstrtMin, \
237
 E ## byteToAscii, \
238
 E ## charMatches,
239
240
#else
241
242
#define STANDARD_VTABLE(E) /* as nothing */
243
244
#endif
245
246
#define NORMAL_VTABLE(E) \
247
 E ## isName2, \
248
 E ## isName3, \
249
 E ## isName4, \
250
 E ## isNmstrt2, \
251
 E ## isNmstrt3, \
252
 E ## isNmstrt4, \
253
 E ## isInvalid2, \
254
 E ## isInvalid3, \
255
 E ## isInvalid4
256
257
#define NULL_VTABLE \
258
 /* isName2 */ NULL, \
259
 /* isName3 */ NULL, \
260
 /* isName4 */ NULL, \
261
 /* isNmstrt2 */ NULL, \
262
 /* isNmstrt3 */ NULL, \
263
 /* isNmstrt4 */ NULL, \
264
 /* isInvalid2 */ NULL, \
265
 /* isInvalid3 */ NULL, \
266
 /* isInvalid4 */ NULL
267
268
static int FASTCALL checkCharRefNumber(int);
269
270
#include "xmltok_impl.h"
271
#include "ascii.h"
272
273
#ifdef XML_MIN_SIZE
274
#define sb_isNameMin isNever
275
#define sb_isNmstrtMin isNever
276
#endif
277
278
#ifdef XML_MIN_SIZE
279
#define MINBPC(enc) ((enc)->minBytesPerChar)
280
#else
281
/* minimum bytes per character */
282
#define MINBPC(enc) 1
283
#endif
284
285
#define SB_BYTE_TYPE(enc, p) \
286
  (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
287
288
#ifdef XML_MIN_SIZE
289
static int PTRFASTCALL
290
sb_byteType(const ENCODING *enc, const char *p)
291
{
292
  return SB_BYTE_TYPE(enc, p);
293
}
294
#define BYTE_TYPE(enc, p) \
295
 (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
296
#else
297
#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
298
#endif
299
300
#ifdef XML_MIN_SIZE
301
#define BYTE_TO_ASCII(enc, p) \
302
 (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
303
static int PTRFASTCALL
304
sb_byteToAscii(const ENCODING *enc, const char *p)
305
{
306
  return *p;
307
}
308
#else
309
#define BYTE_TO_ASCII(enc, p) (*(p))
310
#endif
311
312
#define IS_NAME_CHAR(enc, p, n) \
313
 (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p))
314
#define IS_NMSTRT_CHAR(enc, p, n) \
315
 (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p))
316
#define IS_INVALID_CHAR(enc, p, n) \
317
 (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p))
318
319
#ifdef XML_MIN_SIZE
320
#define IS_NAME_CHAR_MINBPC(enc, p) \
321
 (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
322
#define IS_NMSTRT_CHAR_MINBPC(enc, p) \
323
 (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
324
#else
325
#define IS_NAME_CHAR_MINBPC(enc, p) (0)
326
#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
327
#endif
328
329
#ifdef XML_MIN_SIZE
330
#define CHAR_MATCHES(enc, p, c) \
331
 (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
332
static int PTRCALL
333
sb_charMatches(const ENCODING *enc, const char *p, int c)
334
{
335
  return *p == c;
336
}
337
#else
338
/* c is an ASCII character */
339
#define CHAR_MATCHES(enc, p, c) (*(p) == c)
340
#endif
341
342
#define PREFIX(ident) normal_ ## ident
343
#define XML_TOK_IMPL_C
344
#include "xmltok_impl.c"
345
#undef XML_TOK_IMPL_C
346
347
#undef MINBPC
348
#undef BYTE_TYPE
349
#undef BYTE_TO_ASCII
350
#undef CHAR_MATCHES
351
#undef IS_NAME_CHAR
352
#undef IS_NAME_CHAR_MINBPC
353
#undef IS_NMSTRT_CHAR
354
#undef IS_NMSTRT_CHAR_MINBPC
355
#undef IS_INVALID_CHAR
356
357
enum {  /* UTF8_cvalN is value of masked first byte of N byte sequence */
358
  UTF8_cval1 = 0x00,
359
  UTF8_cval2 = 0xc0,
360
  UTF8_cval3 = 0xe0,
361
  UTF8_cval4 = 0xf0
362
};
363
364
void
365
_INTERNAL_trim_to_complete_utf8_characters(const char * from, const char ** fromLimRef)
366
{
367
2689284
  const char * fromLim = *fromLimRef;
368
  size_t walked = 0;
369
2689536
  for (; fromLim > from; fromLim--, walked++) {
370
1344300
    const unsigned char prev = (unsigned char)fromLim[-1];
371
1344300
    if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
372
24
      if (walked + 1 >= 4) {
373
6
        fromLim += 4 - 1;
374
6
        break;
375
      } else {
376
        walked = 0;
377
      }
378
1344294
    } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
379
24
      if (walked + 1 >= 3) {
380
12
        fromLim += 3 - 1;
381
12
        break;
382
      } else {
383
        walked = 0;
384
      }
385
1344264
    } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
386
30
      if (walked + 1 >= 2) {
387
12
        fromLim += 2 - 1;
388
12
        break;
389
      } else {
390
        walked = 0;
391
      }
392
1344240
    } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
393
1344144
      break;
394
    }
395
126
  }
396
1344642
  *fromLimRef = fromLim;
397
1344642
}
398
399
static enum XML_Convert_Result PTRCALL
400
utf8_toUtf8(const ENCODING *UNUSED_P(enc),
401
            const char **fromP, const char *fromLim,
402
            char **toP, const char *toLim)
403
{
404
  bool input_incomplete = false;
405
  bool output_exhausted = false;
406
407
  /* Avoid copying partial characters (due to limited space). */
408
1344576
  const ptrdiff_t bytesAvailable = fromLim - *fromP;
409
1344576
  const ptrdiff_t bytesStorable = toLim - *toP;
410
1344576
  if (bytesAvailable > bytesStorable) {
411
1247712
    fromLim = *fromP + bytesStorable;
412
    output_exhausted = true;
413
1247712
  }
414
415
  /* Avoid copying partial characters (from incomplete input). */
416
  {
417
1344576
    const char * const fromLimBefore = fromLim;
418
1344576
    _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim);
419
1344576
    if (fromLim < fromLimBefore) {
420
      input_incomplete = true;
421
12
    }
422
  }
423
424
  {
425
1344576
    const ptrdiff_t bytesToCopy = fromLim - *fromP;
426
1344576
    memcpy(*toP, *fromP, bytesToCopy);
427
1344576
    *fromP += bytesToCopy;
428
1344576
    *toP += bytesToCopy;
429
  }
430
431
1344576
  if (output_exhausted)  /* needs to go first */
432
1247712
    return XML_CONVERT_OUTPUT_EXHAUSTED;
433
96864
  else if (input_incomplete)
434
    return XML_CONVERT_INPUT_INCOMPLETE;
435
  else
436
96864
    return XML_CONVERT_COMPLETED;
437
1344576
}
438
439
static enum XML_Convert_Result PTRCALL
440
utf8_toUtf16(const ENCODING *enc,
441
             const char **fromP, const char *fromLim,
442
             unsigned short **toP, const unsigned short *toLim)
443
{
444
  enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
445
  unsigned short *to = *toP;
446
  const char *from = *fromP;
447
  while (from < fromLim && to < toLim) {
448
    switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
449
    case BT_LEAD2:
450
      if (fromLim - from < 2) {
451
        res = XML_CONVERT_INPUT_INCOMPLETE;
452
        goto after;
453
      }
454
      *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
455
      from += 2;
456
      break;
457
    case BT_LEAD3:
458
      if (fromLim - from < 3) {
459
        res = XML_CONVERT_INPUT_INCOMPLETE;
460
        goto after;
461
      }
462
      *to++ = (unsigned short)(((from[0] & 0xf) << 12)
463
                               | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
464
      from += 3;
465
      break;
466
    case BT_LEAD4:
467
      {
468
        unsigned long n;
469
        if (toLim - to < 2) {
470
          res = XML_CONVERT_OUTPUT_EXHAUSTED;
471
          goto after;
472
        }
473
        if (fromLim - from < 4) {
474
          res = XML_CONVERT_INPUT_INCOMPLETE;
475
          goto after;
476
        }
477
        n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
478
            | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
479
        n -= 0x10000;
480
        to[0] = (unsigned short)((n >> 10) | 0xD800);
481
        to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
482
        to += 2;
483
        from += 4;
484
      }
485
      break;
486
    default:
487
      *to++ = *from++;
488
      break;
489
    }
490
  }
491
  if (from < fromLim)
492
    res = XML_CONVERT_OUTPUT_EXHAUSTED;
493
after:
494
  *fromP = from;
495
  *toP = to;
496
  return res;
497
}
498
499
#ifdef XML_NS
500
static const struct normal_encoding utf8_encoding_ns = {
501
  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
502
  {
503
#include "asciitab.h"
504
#include "utf8tab.h"
505
  },
506
  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
507
};
508
#endif
509
510
static const struct normal_encoding utf8_encoding = {
511
  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
512
  {
513
#define BT_COLON BT_NMSTRT
514
#include "asciitab.h"
515
#undef BT_COLON
516
#include "utf8tab.h"
517
  },
518
  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
519
};
520
521
#ifdef XML_NS
522
523
static const struct normal_encoding internal_utf8_encoding_ns = {
524
  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
525
  {
526
#include "iasciitab.h"
527
#include "utf8tab.h"
528
  },
529
  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
530
};
531
532
#endif
533
534
static const struct normal_encoding internal_utf8_encoding = {
535
  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
536
  {
537
#define BT_COLON BT_NMSTRT
538
#include "iasciitab.h"
539
#undef BT_COLON
540
#include "utf8tab.h"
541
  },
542
  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
543
};
544
545
static enum XML_Convert_Result PTRCALL
546
latin1_toUtf8(const ENCODING *UNUSED_P(enc),
547
              const char **fromP, const char *fromLim,
548
              char **toP, const char *toLim)
549
{
550
146835582
  for (;;) {
551
    unsigned char c;
552
131324838
    if (*fromP == fromLim)
553
15510732
      return XML_CONVERT_COMPLETED;
554
115814106
    c = (unsigned char)**fromP;
555
115814106
    if (c & 0x80) {
556
282
      if (toLim - *toP < 2)
557
6
        return XML_CONVERT_OUTPUT_EXHAUSTED;
558
276
      *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
559
276
      *(*toP)++ = (char)((c & 0x3f) | 0x80);
560
276
      (*fromP)++;
561
276
    }
562
    else {
563
115813824
      if (*toP == toLim)
564
6
        return XML_CONVERT_OUTPUT_EXHAUSTED;
565
115813818
      *(*toP)++ = *(*fromP)++;
566
    }
567
115814094
  }
568
15510744
}
569
570
static enum XML_Convert_Result PTRCALL
571
latin1_toUtf16(const ENCODING *UNUSED_P(enc),
572
               const char **fromP, const char *fromLim,
573
               unsigned short **toP, const unsigned short *toLim)
574
{
575
  while (*fromP < fromLim && *toP < toLim)
576
    *(*toP)++ = (unsigned char)*(*fromP)++;
577
578
  if ((*toP == toLim) && (*fromP < fromLim))
579
    return XML_CONVERT_OUTPUT_EXHAUSTED;
580
  else
581
    return XML_CONVERT_COMPLETED;
582
}
583
584
#ifdef XML_NS
585
586
static const struct normal_encoding latin1_encoding_ns = {
587
  { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
588
  {
589
#include "asciitab.h"
590
#include "latin1tab.h"
591
  },
592
  STANDARD_VTABLE(sb_) NULL_VTABLE
593
};
594
595
#endif
596
597
static const struct normal_encoding latin1_encoding = {
598
  { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
599
  {
600
#define BT_COLON BT_NMSTRT
601
#include "asciitab.h"
602
#undef BT_COLON
603
#include "latin1tab.h"
604
  },
605
  STANDARD_VTABLE(sb_) NULL_VTABLE
606
};
607
608
static enum XML_Convert_Result PTRCALL
609
ascii_toUtf8(const ENCODING *UNUSED_P(enc),
610
             const char **fromP, const char *fromLim,
611
             char **toP, const char *toLim)
612
{
613

24828
  while (*fromP < fromLim && *toP < toLim)
614
7866
    *(*toP)++ = *(*fromP)++;
615
616

414
  if ((*toP == toLim) && (*fromP < fromLim))
617
6
    return XML_CONVERT_OUTPUT_EXHAUSTED;
618
  else
619
402
    return XML_CONVERT_COMPLETED;
620
408
}
621
622
#ifdef XML_NS
623
624
static const struct normal_encoding ascii_encoding_ns = {
625
  { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
626
  {
627
#include "asciitab.h"
628
/* BT_NONXML == 0 */
629
  },
630
  STANDARD_VTABLE(sb_) NULL_VTABLE
631
};
632
633
#endif
634
635
static const struct normal_encoding ascii_encoding = {
636
  { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
637
  {
638
#define BT_COLON BT_NMSTRT
639
#include "asciitab.h"
640
#undef BT_COLON
641
/* BT_NONXML == 0 */
642
  },
643
  STANDARD_VTABLE(sb_) NULL_VTABLE
644
};
645
646
static int PTRFASTCALL
647
unicode_byte_type(char hi, char lo)
648
{
649


3432
  switch ((unsigned char)hi) {
650
  case 0xD8: case 0xD9: case 0xDA: case 0xDB:
651
84
    return BT_LEAD4;
652
  case 0xDC: case 0xDD: case 0xDE: case 0xDF:
653
6
    return BT_TRAIL;
654
  case 0xFF:
655
12
    switch ((unsigned char)lo) {
656
    case 0xFF:
657
    case 0xFE:
658
      return BT_NONXML;
659
    }
660
    break;
661
  }
662
1620
  return BT_NONASCII;
663
1710
}
664
665
#define DEFINE_UTF16_TO_UTF8(E) \
666
static enum XML_Convert_Result  PTRCALL \
667
E ## toUtf8(const ENCODING *UNUSED_P(enc), \
668
            const char **fromP, const char *fromLim, \
669
            char **toP, const char *toLim) \
670
{ \
671
  const char *from = *fromP; \
672
  fromLim = from + (((fromLim - from) >> 1) << 1);  /* shrink to even */ \
673
  for (; from < fromLim; from += 2) { \
674
    int plane; \
675
    unsigned char lo2; \
676
    unsigned char lo = GET_LO(from); \
677
    unsigned char hi = GET_HI(from); \
678
    switch (hi) { \
679
    case 0: \
680
      if (lo < 0x80) { \
681
        if (*toP == toLim) { \
682
          *fromP = from; \
683
          return XML_CONVERT_OUTPUT_EXHAUSTED; \
684
        } \
685
        *(*toP)++ = lo; \
686
        break; \
687
      } \
688
      /* fall through */ \
689
    case 0x1: case 0x2: case 0x3: \
690
    case 0x4: case 0x5: case 0x6: case 0x7: \
691
      if (toLim -  *toP < 2) { \
692
        *fromP = from; \
693
        return XML_CONVERT_OUTPUT_EXHAUSTED; \
694
      } \
695
      *(*toP)++ = ((lo >> 6) | (hi << 2) |  UTF8_cval2); \
696
      *(*toP)++ = ((lo & 0x3f) | 0x80); \
697
      break; \
698
    default: \
699
      if (toLim -  *toP < 3)  { \
700
        *fromP = from; \
701
        return XML_CONVERT_OUTPUT_EXHAUSTED; \
702
      } \
703
      /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
704
      *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
705
      *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
706
      *(*toP)++ = ((lo & 0x3f) | 0x80); \
707
      break; \
708
    case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
709
      if (toLim -  *toP < 4) { \
710
        *fromP = from; \
711
        return XML_CONVERT_OUTPUT_EXHAUSTED; \
712
      } \
713
      if (fromLim - from < 4) { \
714
        *fromP = from; \
715
        return XML_CONVERT_INPUT_INCOMPLETE; \
716
      } \
717
      plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
718
      *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
719
      *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
720
      from += 2; \
721
      lo2 = GET_LO(from); \
722
      *(*toP)++ = (((lo & 0x3) << 4) \
723
                   | ((GET_HI(from) & 0x3) << 2) \
724
                   | (lo2 >> 6) \
725
                   | 0x80); \
726
      *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
727
      break; \
728
    } \
729
  } \
730
  *fromP = from; \
731
  if (from < fromLim) \
732
    return XML_CONVERT_INPUT_INCOMPLETE; \
733
  else \
734
    return XML_CONVERT_COMPLETED; \
735
}
736
737
#define DEFINE_UTF16_TO_UTF16(E) \
738
static enum XML_Convert_Result  PTRCALL \
739
E ## toUtf16(const ENCODING *UNUSED_P(enc), \
740
             const char **fromP, const char *fromLim, \
741
             unsigned short **toP, const unsigned short *toLim) \
742
{ \
743
  enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
744
  fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1);  /* shrink to even */ \
745
  /* Avoid copying first half only of surrogate */ \
746
  if (fromLim - *fromP > ((toLim - *toP) << 1) \
747
      && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
748
    fromLim -= 2; \
749
    res = XML_CONVERT_INPUT_INCOMPLETE; \
750
  } \
751
  for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
752
    *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
753
  if ((*toP == toLim) && (*fromP < fromLim)) \
754
    return XML_CONVERT_OUTPUT_EXHAUSTED; \
755
  else \
756
    return res; \
757
}
758
759
#define SET2(ptr, ch) \
760
  (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
761
#define GET_LO(ptr) ((unsigned char)(ptr)[0])
762
#define GET_HI(ptr) ((unsigned char)(ptr)[1])
763
764







12816
DEFINE_UTF16_TO_UTF8(little2_)
765
DEFINE_UTF16_TO_UTF16(little2_)
766
767
#undef SET2
768
#undef GET_LO
769
#undef GET_HI
770
771
#define SET2(ptr, ch) \
772
  (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
773
#define GET_LO(ptr) ((unsigned char)(ptr)[1])
774
#define GET_HI(ptr) ((unsigned char)(ptr)[0])
775
776







156348
DEFINE_UTF16_TO_UTF8(big2_)
777
DEFINE_UTF16_TO_UTF16(big2_)
778
779
#undef SET2
780
#undef GET_LO
781
#undef GET_HI
782
783
#define LITTLE2_BYTE_TYPE(enc, p) \
784
 ((p)[1] == 0 \
785
  ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
786
  : unicode_byte_type((p)[1], (p)[0]))
787
#define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)
788
#define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
789
#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \
790
  UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
791
#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
792
  UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
793
794
#ifdef XML_MIN_SIZE
795
796
static int PTRFASTCALL
797
little2_byteType(const ENCODING *enc, const char *p)
798
{
799
  return LITTLE2_BYTE_TYPE(enc, p);
800
}
801
802
static int PTRFASTCALL
803
little2_byteToAscii(const ENCODING *enc, const char *p)
804
{
805
  return LITTLE2_BYTE_TO_ASCII(enc, p);
806
}
807
808
static int PTRCALL
809
little2_charMatches(const ENCODING *enc, const char *p, int c)
810
{
811
  return LITTLE2_CHAR_MATCHES(enc, p, c);
812
}
813
814
static int PTRFASTCALL
815
little2_isNameMin(const ENCODING *enc, const char *p)
816
{
817
  return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p);
818
}
819
820
static int PTRFASTCALL
821
little2_isNmstrtMin(const ENCODING *enc, const char *p)
822
{
823
  return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p);
824
}
825
826
#undef VTABLE
827
#define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
828
829
#else /* not XML_MIN_SIZE */
830
831
#undef PREFIX
832
#define PREFIX(ident) little2_ ## ident
833
#define MINBPC(enc) 2
834
/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
835
#define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
836
#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
837
#define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)
838
#define IS_NAME_CHAR(enc, p, n) 0
839
#define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
840
#define IS_NMSTRT_CHAR(enc, p, n) (0)
841
#define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
842
843
#define XML_TOK_IMPL_C
844
#include "xmltok_impl.c"
845
#undef XML_TOK_IMPL_C
846
847
#undef MINBPC
848
#undef BYTE_TYPE
849
#undef BYTE_TO_ASCII
850
#undef CHAR_MATCHES
851
#undef IS_NAME_CHAR
852
#undef IS_NAME_CHAR_MINBPC
853
#undef IS_NMSTRT_CHAR
854
#undef IS_NMSTRT_CHAR_MINBPC
855
#undef IS_INVALID_CHAR
856
857
#endif /* not XML_MIN_SIZE */
858
859
#ifdef XML_NS
860
861
static const struct normal_encoding little2_encoding_ns = {
862
  { VTABLE, 2, 0,
863
#if BYTEORDER == 1234
864
    1
865
#else
866
    0
867
#endif
868
  },
869
  {
870
#include "asciitab.h"
871
#include "latin1tab.h"
872
  },
873
  STANDARD_VTABLE(little2_) NULL_VTABLE
874
};
875
876
#endif
877
878
static const struct normal_encoding little2_encoding = {
879
  { VTABLE, 2, 0,
880
#if BYTEORDER == 1234
881
    1
882
#else
883
    0
884
#endif
885
  },
886
  {
887
#define BT_COLON BT_NMSTRT
888
#include "asciitab.h"
889
#undef BT_COLON
890
#include "latin1tab.h"
891
  },
892
  STANDARD_VTABLE(little2_) NULL_VTABLE
893
};
894
895
#if BYTEORDER != 4321
896
897
#ifdef XML_NS
898
899
static const struct normal_encoding internal_little2_encoding_ns = {
900
  { VTABLE, 2, 0, 1 },
901
  {
902
#include "iasciitab.h"
903
#include "latin1tab.h"
904
  },
905
  STANDARD_VTABLE(little2_) NULL_VTABLE
906
};
907
908
#endif
909
910
static const struct normal_encoding internal_little2_encoding = {
911
  { VTABLE, 2, 0, 1 },
912
  {
913
#define BT_COLON BT_NMSTRT
914
#include "iasciitab.h"
915
#undef BT_COLON
916
#include "latin1tab.h"
917
  },
918
  STANDARD_VTABLE(little2_) NULL_VTABLE
919
};
920
921
#endif
922
923
924
#define BIG2_BYTE_TYPE(enc, p) \
925
 ((p)[0] == 0 \
926
  ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
927
  : unicode_byte_type((p)[0], (p)[1]))
928
#define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
929
#define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
930
#define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \
931
  UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
932
#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
933
  UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
934
935
#ifdef XML_MIN_SIZE
936
937
static int PTRFASTCALL
938
big2_byteType(const ENCODING *enc, const char *p)
939
{
940
  return BIG2_BYTE_TYPE(enc, p);
941
}
942
943
static int PTRFASTCALL
944
big2_byteToAscii(const ENCODING *enc, const char *p)
945
{
946
  return BIG2_BYTE_TO_ASCII(enc, p);
947
}
948
949
static int PTRCALL
950
big2_charMatches(const ENCODING *enc, const char *p, int c)
951
{
952
  return BIG2_CHAR_MATCHES(enc, p, c);
953
}
954
955
static int PTRFASTCALL
956
big2_isNameMin(const ENCODING *enc, const char *p)
957
{
958
  return BIG2_IS_NAME_CHAR_MINBPC(enc, p);
959
}
960
961
static int PTRFASTCALL
962
big2_isNmstrtMin(const ENCODING *enc, const char *p)
963
{
964
  return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p);
965
}
966
967
#undef VTABLE
968
#define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
969
970
#else /* not XML_MIN_SIZE */
971
972
#undef PREFIX
973
#define PREFIX(ident) big2_ ## ident
974
#define MINBPC(enc) 2
975
/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
976
#define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
977
#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
978
#define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)
979
#define IS_NAME_CHAR(enc, p, n) 0
980
#define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)
981
#define IS_NMSTRT_CHAR(enc, p, n) (0)
982
#define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
983
984
#define XML_TOK_IMPL_C
985
#include "xmltok_impl.c"
986
#undef XML_TOK_IMPL_C
987
988
#undef MINBPC
989
#undef BYTE_TYPE
990
#undef BYTE_TO_ASCII
991
#undef CHAR_MATCHES
992
#undef IS_NAME_CHAR
993
#undef IS_NAME_CHAR_MINBPC
994
#undef IS_NMSTRT_CHAR
995
#undef IS_NMSTRT_CHAR_MINBPC
996
#undef IS_INVALID_CHAR
997
998
#endif /* not XML_MIN_SIZE */
999
1000
#ifdef XML_NS
1001
1002
static const struct normal_encoding big2_encoding_ns = {
1003
  { VTABLE, 2, 0,
1004
#if BYTEORDER == 4321
1005
  1
1006
#else
1007
  0
1008
#endif
1009
  },
1010
  {
1011
#include "asciitab.h"
1012
#include "latin1tab.h"
1013
  },
1014
  STANDARD_VTABLE(big2_) NULL_VTABLE
1015
};
1016
1017
#endif
1018
1019
static const struct normal_encoding big2_encoding = {
1020
  { VTABLE, 2, 0,
1021
#if BYTEORDER == 4321
1022
  1
1023
#else
1024
  0
1025
#endif
1026
  },
1027
  {
1028
#define BT_COLON BT_NMSTRT
1029
#include "asciitab.h"
1030
#undef BT_COLON
1031
#include "latin1tab.h"
1032
  },
1033
  STANDARD_VTABLE(big2_) NULL_VTABLE
1034
};
1035
1036
#if BYTEORDER != 1234
1037
1038
#ifdef XML_NS
1039
1040
static const struct normal_encoding internal_big2_encoding_ns = {
1041
  { VTABLE, 2, 0, 1 },
1042
  {
1043
#include "iasciitab.h"
1044
#include "latin1tab.h"
1045
  },
1046
  STANDARD_VTABLE(big2_) NULL_VTABLE
1047
};
1048
1049
#endif
1050
1051
static const struct normal_encoding internal_big2_encoding = {
1052
  { VTABLE, 2, 0, 1 },
1053
  {
1054
#define BT_COLON BT_NMSTRT
1055
#include "iasciitab.h"
1056
#undef BT_COLON
1057
#include "latin1tab.h"
1058
  },
1059
  STANDARD_VTABLE(big2_) NULL_VTABLE
1060
};
1061
1062
#endif
1063
1064
#undef PREFIX
1065
1066
static int FASTCALL
1067
streqci(const char *s1, const char *s2)
1068
{
1069
440286
  for (;;) {
1070
375474
    char c1 = *s1++;
1071
375474
    char c2 = *s2++;
1072

385926
    if (ASCII_a <= c1 && c1 <= ASCII_z)
1073
10452
      c1 += ASCII_A - ASCII_a;
1074

375474
    if (ASCII_a <= c2 && c2 <= ASCII_z)
1075
      /* The following line will never get executed.  streqci() is
1076
       * only called from two places, both of which guarantee to put
1077
       * upper-case strings into s2.
1078
       */
1079
      c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */
1080
375474
    if (c1 != c2)
1081
33732
      return 0;
1082
341742
    if (!c1)
1083
31080
      break;
1084

310662
  }
1085
31080
  return 1;
1086
64812
}
1087
1088
static void PTRCALL
1089
initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr,
1090
                   const char *end, POSITION *pos)
1091
{
1092
19158
  normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
1093
9579
}
1094
1095
static int
1096
toAscii(const ENCODING *enc, const char *ptr, const char *end)
1097
{
1098
1280340
  char buf[1];
1099
1280340
  char *p = buf;
1100
1280340
  XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
1101
1280340
  if (p == buf)
1102
180
    return -1;
1103
  else
1104
1280160
    return buf[0];
1105
1280340
}
1106
1107
static int FASTCALL
1108
isSpace(int c)
1109
{
1110

1325592
  switch (c) {
1111
  case 0x20:
1112
  case 0xD:
1113
  case 0xA:
1114
  case 0x9:
1115
63102
    return 1;
1116
  }
1117
599694
  return 0;
1118
662796
}
1119
1120
/* Return 1 if there's just optional white space or there's an S
1121
   followed by name=val.
1122
*/
1123
static int
1124
parsePseudoAttribute(const ENCODING *enc,
1125
                     const char *ptr,
1126
                     const char *end,
1127
                     const char **namePtr,
1128
                     const char **nameEndPtr,
1129
                     const char **valPtr,
1130
                     const char **nextTokPtr)
1131
{
1132
  int c;
1133
  char open;
1134
189204
  if (ptr == end) {
1135
31506
    *namePtr = NULL;
1136
31506
    return 1;
1137
  }
1138
63096
  if (!isSpace(toAscii(enc, ptr, end))) {
1139
    *nextTokPtr = ptr;
1140
    return 0;
1141
  }
1142
63096
  do {
1143
63096
    ptr += enc->minBytesPerChar;
1144
63096
  } while (isSpace(toAscii(enc, ptr, end)));
1145
63096
  if (ptr == end) {
1146
    *namePtr = NULL;
1147
    return 1;
1148
  }
1149
63096
  *namePtr = ptr;
1150
536454
  for (;;) {
1151
536454
    c = toAscii(enc, ptr, end);
1152
536454
    if (c == -1) {
1153
12
      *nextTokPtr = ptr;
1154
12
      return 0;
1155
    }
1156
536442
    if (c == ASCII_EQUALS) {
1157
63084
      *nameEndPtr = ptr;
1158
63084
      break;
1159
    }
1160
473358
    if (isSpace(c)) {
1161
      *nameEndPtr = ptr;
1162
      do {
1163
        ptr += enc->minBytesPerChar;
1164
      } while (isSpace(c = toAscii(enc, ptr, end)));
1165
      if (c != ASCII_EQUALS) {
1166
        *nextTokPtr = ptr;
1167
        return 0;
1168
      }
1169
      break;
1170
    }
1171
473358
    ptr += enc->minBytesPerChar;
1172
  }
1173
63084
  if (ptr == *namePtr) {
1174
6
    *nextTokPtr = ptr;
1175
6
    return 0;
1176
  }
1177
63078
  ptr += enc->minBytesPerChar;
1178
63078
  c = toAscii(enc, ptr, end);
1179
126156
  while (isSpace(c)) {
1180
    ptr += enc->minBytesPerChar;
1181
    c = toAscii(enc, ptr, end);
1182
  }
1183
63078
  if (c != ASCII_QUOT && c != ASCII_APOS) {
1184
    *nextTokPtr = ptr;
1185
    return 0;
1186
  }
1187
63078
  open = (char)c;
1188
63078
  ptr += enc->minBytesPerChar;
1189
63078
  *valPtr = ptr;
1190
523182
  for (;; ptr += enc->minBytesPerChar) {
1191
523182
    c = toAscii(enc, ptr, end);
1192
523182
    if (c == open)
1193
      break;
1194
460104
    if (!(ASCII_a <= c && c <= ASCII_z)
1195
450366
        && !(ASCII_A <= c && c <= ASCII_Z)
1196
1231872
        && !(ASCII_0 <= c && c <= ASCII_9)
1197
307968
        && c != ASCII_PERIOD
1198
307968
        && c != ASCII_MINUS
1199
307968
        && c != ASCII_UNDERSCORE) {
1200
      *nextTokPtr = ptr;
1201
      return 0;
1202
    }
1203
  }
1204
63078
  *nextTokPtr = ptr + enc->minBytesPerChar;
1205
63078
  return 1;
1206
94602
}
1207
1208
static const char KW_version[] = {
1209
  ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'
1210
};
1211
1212
static const char KW_encoding[] = {
1213
  ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0'
1214
};
1215
1216
static const char KW_standalone[] = {
1217
  ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o,
1218
  ASCII_n, ASCII_e, '\0'
1219
};
1220
1221
static const char KW_yes[] = {
1222
  ASCII_y, ASCII_e, ASCII_s,  '\0'
1223
};
1224
1225
static const char KW_no[] = {
1226
  ASCII_n, ASCII_o,  '\0'
1227
};
1228
1229
static int
1230
doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
1231
                                                 const char *,
1232
                                                 const char *),
1233
               int isGeneralTextEntity,
1234
               const ENCODING *enc,
1235
               const char *ptr,
1236
               const char *end,
1237
               const char **badPtr,
1238
               const char **versionPtr,
1239
               const char **versionEndPtr,
1240
               const char **encodingName,
1241
               const ENCODING **encoding,
1242
               int *standalone)
1243
{
1244
31686
  const char *val = NULL;
1245
31686
  const char *name = NULL;
1246
31686
  const char *nameEnd = NULL;
1247
31686
  ptr += 5 * enc->minBytesPerChar;
1248
31686
  end -= 2 * enc->minBytesPerChar;
1249
31686
  if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
1250
31686
      || !name) {
1251
18
    *badPtr = ptr;
1252
18
    return 0;
1253
  }
1254
31668
  if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
1255
18
    if (!isGeneralTextEntity) {
1256
      *badPtr = name;
1257
      return 0;
1258
    }
1259
  }
1260
  else {
1261
31650
    if (versionPtr)
1262
31650
      *versionPtr = val;
1263
31650
    if (versionEndPtr)
1264
31650
      *versionEndPtr = ptr;
1265
31650
    if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1266
6
      *badPtr = ptr;
1267
6
      return 0;
1268
    }
1269
31644
    if (!name) {
1270
252
      if (isGeneralTextEntity) {
1271
        /* a TextDecl must have an EncodingDecl */
1272
        *badPtr = ptr;
1273
        return 0;
1274
      }
1275
252
      return 1;
1276
    }
1277
  }
1278
31410
  if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) {
1279
31266
    int c = toAscii(enc, val, end);
1280

61326
    if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) {
1281
      *badPtr = val;
1282
      return 0;
1283
    }
1284
31266
    if (encodingName)
1285
31266
      *encodingName = val;
1286
31266
    if (encoding)
1287
31266
      *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
1288
31266
    if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1289
6
      *badPtr = ptr;
1290
6
      return 0;
1291
    }
1292
31260
    if (!name)
1293
31242
      return 1;
1294
18
  }
1295
162
  if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
1296
162
      || isGeneralTextEntity) {
1297
    *badPtr = name;
1298
    return 0;
1299
  }
1300
162
  if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) {
1301
30
    if (standalone)
1302
30
      *standalone = 1;
1303
  }
1304
132
  else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
1305
132
    if (standalone)
1306
132
      *standalone = 0;
1307
  }
1308
  else {
1309
    *badPtr = val;
1310
    return 0;
1311
  }
1312
336
  while (isSpace(toAscii(enc, ptr, end)))
1313
6
    ptr += enc->minBytesPerChar;
1314
162
  if (ptr != end) {
1315
    *badPtr = ptr;
1316
    return 0;
1317
  }
1318
162
  return 1;
1319
31686
}
1320
1321
static int FASTCALL
1322
checkCharRefNumber(int result)
1323
{
1324


40572
  switch (result >> 8) {
1325
  case 0xD8: case 0xD9: case 0xDA: case 0xDB:
1326
  case 0xDC: case 0xDD: case 0xDE: case 0xDF:
1327
6
    return -1;
1328
  case 0:
1329
13518
    if (latin1_encoding.type[result] == BT_NONXML)
1330
6
      return -1;
1331
    break;
1332
  case 0xFF:
1333
    if (result == 0xFFFE || result == 0xFFFF)
1334
      return -1;
1335
    break;
1336
  }
1337
13518
  return result;
1338
13530
}
1339
1340
int FASTCALL
1341
XmlUtf8Encode(int c, char *buf)
1342
{
1343
  enum {
1344
    /* minN is minimum legal resulting value for N byte sequence */
1345
    min2 = 0x80,
1346
    min3 = 0x800,
1347
    min4 = 0x10000
1348
  };
1349
1350
26964
  if (c < 0)
1351
    return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */
1352
13482
  if (c < min2) {
1353
114
    buf[0] = (char)(c | UTF8_cval1);
1354
114
    return 1;
1355
  }
1356
13368
  if (c < min3) {
1357
13368
    buf[0] = (char)((c >> 6) | UTF8_cval2);
1358
13368
    buf[1] = (char)((c & 0x3f) | 0x80);
1359
13368
    return 2;
1360
  }
1361
  if (c < min4) {
1362
    buf[0] = (char)((c >> 12) | UTF8_cval3);
1363
    buf[1] = (char)(((c >> 6) & 0x3f) | 0x80);
1364
    buf[2] = (char)((c & 0x3f) | 0x80);
1365
    return 3;
1366
  }
1367
  if (c < 0x110000) {
1368
    buf[0] = (char)((c >> 18) | UTF8_cval4);
1369
    buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
1370
    buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
1371
    buf[3] = (char)((c & 0x3f) | 0x80);
1372
    return 4;
1373
  }
1374
  return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */
1375
13482
}
1376
1377
int FASTCALL
1378
XmlUtf16Encode(int charNum, unsigned short *buf)
1379
{
1380
  if (charNum < 0)
1381
    return 0;
1382
  if (charNum < 0x10000) {
1383
    buf[0] = (unsigned short)charNum;
1384
    return 1;
1385
  }
1386
  if (charNum < 0x110000) {
1387
    charNum -= 0x10000;
1388
    buf[0] = (unsigned short)((charNum >> 10) + 0xD800);
1389
    buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00);
1390
    return 2;
1391
  }
1392
  return 0;
1393
}
1394
1395
struct unknown_encoding {
1396
  struct normal_encoding normal;
1397
  CONVERTER convert;
1398
  void *userData;
1399
  unsigned short utf16[256];
1400
  char utf8[256][4];
1401
};
1402
1403
#define AS_UNKNOWN_ENCODING(enc)  ((const struct unknown_encoding *) (enc))
1404
1405
int
1406
XmlSizeOfUnknownEncoding(void)
1407
{
1408
456
  return sizeof(struct unknown_encoding);
1409
}
1410
1411
static int PTRFASTCALL
1412
unknown_isName(const ENCODING *enc, const char *p)
1413
{
1414
564
  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1415
282
  int c = uenc->convert(uenc->userData, p);
1416
282
  if (c & ~0xFFFF)
1417
6
    return 0;
1418
276
  return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
1419
282
}
1420
1421
static int PTRFASTCALL
1422
unknown_isNmstrt(const ENCODING *enc, const char *p)
1423
{
1424
156
  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1425
78
  int c = uenc->convert(uenc->userData, p);
1426
78
  if (c & ~0xFFFF)
1427
6
    return 0;
1428
72
  return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
1429
78
}
1430
1431
static int PTRFASTCALL
1432
unknown_isInvalid(const ENCODING *enc, const char *p)
1433
{
1434
36
  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1435
18
  int c = uenc->convert(uenc->userData, p);
1436
36
  return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
1437
}
1438
1439
static enum XML_Convert_Result PTRCALL
1440
unknown_toUtf8(const ENCODING *enc,
1441
               const char **fromP, const char *fromLim,
1442
               char **toP, const char *toLim)
1443
{
1444
864
  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1445
432
  char buf[XML_UTF8_ENCODE_MAX];
1446
1860
  for (;;) {
1447
    const char *utf8;
1448
    int n;
1449
1860
    if (*fromP == fromLim)
1450
420
      return XML_CONVERT_COMPLETED;
1451
1440
    utf8 = uenc->utf8[(unsigned char)**fromP];
1452
1440
    n = *utf8++;
1453
1440
    if (n == 0) {
1454
42
      int c = uenc->convert(uenc->userData, *fromP);
1455
42
      n = XmlUtf8Encode(c, buf);
1456
42
      if (n > toLim - *toP)
1457
6
        return XML_CONVERT_OUTPUT_EXHAUSTED;
1458
      utf8 = buf;
1459
72
      *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1460
36
                 - (BT_LEAD2 - 2));
1461
36
    }
1462
    else {
1463
1398
      if (n > toLim - *toP)
1464
6
        return XML_CONVERT_OUTPUT_EXHAUSTED;
1465
1392
      (*fromP)++;
1466
    }
1467
1428
    memcpy(*toP, utf8, n);
1468
1428
    *toP += n;
1469
1428
  }
1470
432
}
1471
1472
static enum XML_Convert_Result PTRCALL
1473
unknown_toUtf16(const ENCODING *enc,
1474
                const char **fromP, const char *fromLim,
1475
                unsigned short **toP, const unsigned short *toLim)
1476
{
1477
  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1478
  while (*fromP < fromLim && *toP < toLim) {
1479
    unsigned short c = uenc->utf16[(unsigned char)**fromP];
1480
    if (c == 0) {
1481
      c = (unsigned short)
1482
          uenc->convert(uenc->userData, *fromP);
1483
      *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1484
                 - (BT_LEAD2 - 2));
1485
    }
1486
    else
1487
      (*fromP)++;
1488
    *(*toP)++ = c;
1489
  }
1490
1491
  if ((*toP == toLim) && (*fromP < fromLim))
1492
    return XML_CONVERT_OUTPUT_EXHAUSTED;
1493
  else
1494
    return XML_CONVERT_COMPLETED;
1495
}
1496
1497
ENCODING *
1498
XmlInitUnknownEncoding(void *mem,
1499
                       int *table,
1500
                       CONVERTER convert,
1501
                       void *userData)
1502
{
1503
  int i;
1504
432
  struct unknown_encoding *e = (struct unknown_encoding *)mem;
1505
200880
  for (i = 0; i < (int)sizeof(struct normal_encoding); i++)
1506
100224
    ((char *)mem)[i] = ((char *)&latin1_encoding)[i];
1507
54300
  for (i = 0; i < 128; i++)
1508
45846
    if (latin1_encoding.type[i] != BT_OTHER
1509
51990
        && latin1_encoding.type[i] != BT_NONXML
1510
43956
        && table[i] != i)
1511
6
      return 0;
1512
101880
  for (i = 0; i < 256; i++) {
1513
50754
    int c = table[i];
1514
50754
    if (c == -1) {
1515
2340
      e->normal.type[i] = BT_MALFORM;
1516
      /* This shouldn't really get used. */
1517
2340
      e->utf16[i] = 0xFFFF;
1518
2340
      e->utf8[i][0] = 1;
1519
2340
      e->utf8[i][1] = 0;
1520
2340
    }
1521
48414
    else if (c < 0) {
1522
8460
      if (c < -4)
1523
6
        return 0;
1524
      /* Multi-byte sequences need a converter function */
1525
8454
      if (!convert)
1526
6
        return 0;
1527
8448
      e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
1528
8448
      e->utf8[i][0] = 0;
1529
8448
      e->utf16[i] = 0;
1530
8448
    }
1531
39954
    else if (c < 0x80) {
1532
45792
      if (latin1_encoding.type[c] != BT_OTHER
1533
51882
          && latin1_encoding.type[c] != BT_NONXML
1534
43902
          && c != i)
1535
6
        return 0;
1536
26880
      e->normal.type[i] = latin1_encoding.type[c];
1537
26880
      e->utf8[i][0] = 1;
1538
26880
      e->utf8[i][1] = (char)c;
1539
26880
      e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c);
1540
26880
    }
1541
13068
    else if (checkCharRefNumber(c) < 0) {
1542
6
      e->normal.type[i] = BT_NONXML;
1543
      /* This shouldn't really get used. */
1544
6
      e->utf16[i] = 0xFFFF;
1545
6
      e->utf8[i][0] = 1;
1546
6
      e->utf8[i][1] = 0;
1547
6
    }
1548
    else {
1549
13062
      if (c > 0xFFFF)
1550
6
        return 0;
1551
13056
      if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
1552
6324
        e->normal.type[i] = BT_NMSTRT;
1553
6732
      else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
1554
        e->normal.type[i] = BT_NAME;
1555
      else
1556
        e->normal.type[i] = BT_OTHER;
1557
13056
      e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
1558
13056
      e->utf16[i] = (unsigned short)c;
1559
    }
1560
50730
  }
1561
186
  e->userData = userData;
1562
186
  e->convert = convert;
1563
186
  if (convert) {
1564
66
    e->normal.isName2 = unknown_isName;
1565
66
    e->normal.isName3 = unknown_isName;
1566
66
    e->normal.isName4 = unknown_isName;
1567
66
    e->normal.isNmstrt2 = unknown_isNmstrt;
1568
66
    e->normal.isNmstrt3 = unknown_isNmstrt;
1569
66
    e->normal.isNmstrt4 = unknown_isNmstrt;
1570
66
    e->normal.isInvalid2 = unknown_isInvalid;
1571
66
    e->normal.isInvalid3 = unknown_isInvalid;
1572
66
    e->normal.isInvalid4 = unknown_isInvalid;
1573
66
  }
1574
186
  e->normal.enc.utf8Convert = unknown_toUtf8;
1575
186
  e->normal.enc.utf16Convert = unknown_toUtf16;
1576
186
  return &(e->normal.enc);
1577
216
}
1578
1579
/* If this enumeration is changed, getEncodingIndex and encodings
1580
must also be changed. */
1581
enum {
1582
  UNKNOWN_ENC = -1,
1583
  ISO_8859_1_ENC = 0,
1584
  US_ASCII_ENC,
1585
  UTF_8_ENC,
1586
  UTF_16_ENC,
1587
  UTF_16BE_ENC,
1588
  UTF_16LE_ENC,
1589
  /* must match encodingNames up to here */
1590
  NO_ENC
1591
};
1592
1593
static const char KW_ISO_8859_1[] = {
1594
  ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9,
1595
  ASCII_MINUS, ASCII_1, '\0'
1596
};
1597
static const char KW_US_ASCII[] = {
1598
  ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I,
1599
  '\0'
1600
};
1601
static const char KW_UTF_8[] =  {
1602
  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'
1603
};
1604
static const char KW_UTF_16[] = {
1605
  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'
1606
};
1607
static const char KW_UTF_16BE[] = {
1608
  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E,
1609
  '\0'
1610
};
1611
static const char KW_UTF_16LE[] = {
1612
  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E,
1613
  '\0'
1614
};
1615
1616
static int FASTCALL
1617
getEncodingIndex(const char *name)
1618
{
1619
  static const char * const encodingNames[] = {
1620
    KW_ISO_8859_1,
1621
    KW_US_ASCII,
1622
    KW_UTF_8,
1623
    KW_UTF_16,
1624
    KW_UTF_16BE,
1625
    KW_UTF_16LE,
1626
  };
1627
  int i;
1628
225192
  if (name == NULL)
1629
81468
    return NO_ENC;
1630
67668
  for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++)
1631
33600
    if (streqci(name, encodingNames[i]))
1632
30894
      return i;
1633
234
  return UNKNOWN_ENC;
1634
112596
}
1635
1636
/* For binary compatibility, we store the index of the encoding
1637
   specified at initialization in the isUtf16 member.
1638
*/
1639
1640
#define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
1641
#define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
1642
1643
/* This is what detects the encoding.  encodingTable maps from
1644
   encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of
1645
   the external (protocol) specified encoding; state is
1646
   XML_CONTENT_STATE if we're parsing an external text entity, and
1647
   XML_PROLOG_STATE otherwise.
1648
*/
1649
1650
1651
static int
1652
initScan(const ENCODING * const *encodingTable,
1653
         const INIT_ENCODING *enc,
1654
         int state,
1655
         const char *ptr,
1656
         const char *end,
1657
         const char **nextTokPtr)
1658
{
1659
  const ENCODING **encPtr;
1660
1661
98928
  if (ptr >= end)
1662
12
    return XML_TOK_NONE;
1663
49452
  encPtr = enc->encPtr;
1664
49452
  if (ptr + 1 == end) {
1665
    /* only a single byte available for auto-detection */
1666
#ifndef XML_DTD /* FIXME */
1667
    /* a well-formed document entity must have more than one byte */
1668
    if (state != XML_CONTENT_STATE)
1669
      return XML_TOK_PARTIAL;
1670
#endif
1671
    /* so we're parsing an external text entity... */
1672
    /* if UTF-16 was externally specified, then we need at least 2 bytes */
1673
9651
    switch (INIT_ENC_INDEX(enc)) {
1674
    case UTF_16_ENC:
1675
    case UTF_16LE_ENC:
1676
    case UTF_16BE_ENC:
1677
18
      return XML_TOK_PARTIAL;
1678
    }
1679

9705
    switch ((unsigned char)*ptr) {
1680
    case 0xFE:
1681
    case 0xFF:
1682
    case 0xEF: /* possibly first byte of UTF-8 BOM */
1683
72
      if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
1684
72
          && state == XML_CONTENT_STATE)
1685
        break;
1686
      /* fall through */
1687
    case 0x00:
1688
    case 0x3C:
1689
9531
      return XML_TOK_PARTIAL;
1690
    }
1691
  }
1692
  else {
1693

39801
    switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
1694
    case 0xFEFF:
1695
12
      if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
1696
12
          && state == XML_CONTENT_STATE)
1697
        break;
1698
6
      *nextTokPtr = ptr + 2;
1699
6
      *encPtr = encodingTable[UTF_16BE_ENC];
1700
6
      return XML_TOK_BOM;
1701
    /* 00 3C is handled in the default case */
1702
    case 0x3C00:
1703
84
      if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
1704
150
           || INIT_ENC_INDEX(enc) == UTF_16_ENC)
1705
78
          && state == XML_CONTENT_STATE)
1706
        break;
1707
72
      *encPtr = encodingTable[UTF_16LE_ENC];
1708
72
      return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1709
    case 0xFFFE:
1710
30
      if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
1711
30
          && state == XML_CONTENT_STATE)
1712
        break;
1713
24
      *nextTokPtr = ptr + 2;
1714
24
      *encPtr = encodingTable[UTF_16LE_ENC];
1715
24
      return XML_TOK_BOM;
1716
    case 0xEFBB:
1717
      /* Maybe a UTF-8 BOM (EF BB BF) */
1718
      /* If there's an explicitly specified (external) encoding
1719
         of ISO-8859-1 or some flavour of UTF-16
1720
         and this is an external text entity,
1721
         don't look for the BOM,
1722
         because it might be a legal data.
1723
      */
1724
60
      if (state == XML_CONTENT_STATE) {
1725
24
        int e = INIT_ENC_INDEX(enc);
1726
72
        if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC
1727
48
            || e == UTF_16LE_ENC || e == UTF_16_ENC)
1728
          break;
1729
24
      }
1730
60
      if (ptr + 2 == end)
1731
30
        return XML_TOK_PARTIAL;
1732
30
      if ((unsigned char)ptr[2] == 0xBF) {
1733
24
        *nextTokPtr = ptr + 3;
1734
24
        *encPtr = encodingTable[UTF_8_ENC];
1735
24
        return XML_TOK_BOM;
1736
      }
1737
      break;
1738
    default:
1739
39621
      if (ptr[0] == '\0') {
1740
        /* 0 isn't a legal data character. Furthermore a document
1741
           entity can only start with ASCII characters.  So the only
1742
           way this can fail to be big-endian UTF-16 if it it's an
1743
           external parsed general entity that's labelled as
1744
           UTF-16LE.
1745
        */
1746

264
        if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
1747
          break;
1748
252
        *encPtr = encodingTable[UTF_16BE_ENC];
1749
252
        return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1750
      }
1751
78726
      else if (ptr[1] == '\0') {
1752
        /* We could recover here in the case:
1753
            - parsing an external entity
1754
            - second byte is 0
1755
            - no externally specified encoding
1756
            - no encoding declaration
1757
           by assuming UTF-16LE.  But we don't, because this would mean when
1758
           presented just with a single byte, we couldn't reliably determine
1759
           whether we needed further bytes.
1760
        */
1761
39363
        if (state == XML_CONTENT_STATE)
1762
          break;
1763
6
        *encPtr = encodingTable[UTF_16LE_ENC];
1764
6
        return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1765
      }
1766
      break;
1767
    }
1768
  }
1769
39489
  *encPtr = encodingTable[INIT_ENC_INDEX(enc)];
1770
39489
  return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1771
49464
}
1772
1773
1774
#define NS(x) x
1775
#define ns(x) x
1776
#define XML_TOK_NS_C
1777
#include "xmltok_ns.c"
1778
#undef XML_TOK_NS_C
1779
#undef NS
1780
#undef ns
1781
1782
#ifdef XML_NS
1783
1784
#define NS(x) x ## NS
1785
#define ns(x) x ## _ns
1786
1787
#define XML_TOK_NS_C
1788
#include "xmltok_ns.c"
1789
#undef XML_TOK_NS_C
1790
1791
#undef NS
1792
#undef ns
1793
1794
ENCODING *
1795
XmlInitUnknownEncodingNS(void *mem,
1796
                         int *table,
1797
                         CONVERTER convert,
1798
                         void *userData)
1799
{
1800
12
  ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
1801
6
  if (enc)
1802
6
    ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
1803
6
  return enc;
1804
}
1805
1806
#endif /* XML_NS */