GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: lib/libexpat/lib/xmltok.c Lines: 365 476 76.7 %
Date: 2017-11-07 Branches: 265 460 57.6 %

Line Branch Exec Source
1
/*
2
                            __  __            _
3
                         ___\ \/ /_ __   __ _| |_
4
                        / _ \\  /| '_ \ / _` | __|
5
                       |  __//  \| |_) | (_| | |_
6
                        \___/_/\_\ .__/ \__,_|\__|
7
                                 |_| XML parser
8
9
   Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10
   Copyright (c) 2000-2017 Expat development team
11
   Licensed under the MIT license:
12
13
   Permission is  hereby granted,  free of charge,  to any  person obtaining
14
   a  copy  of  this  software   and  associated  documentation  files  (the
15
   "Software"),  to  deal in  the  Software  without restriction,  including
16
   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17
   distribute, sublicense, and/or sell copies of the Software, and to permit
18
   persons  to whom  the Software  is  furnished to  do so,  subject to  the
19
   following conditions:
20
21
   The above copyright  notice and this permission notice  shall be included
22
   in all copies or substantial portions of the Software.
23
24
   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25
   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27
   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28
   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29
   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30
   USE OR OTHER DEALINGS IN THE SOFTWARE.
31
*/
32
33
#include <stddef.h>
34
#include <stdbool.h>
35
#include <string.h>  // memcpy
36
37
#ifdef _WIN32
38
#include "winconfig.h"
39
#else
40
#ifdef HAVE_EXPAT_CONFIG_H
41
#include <expat_config.h>
42
#endif
43
#endif /* ndef _WIN32 */
44
45
#include "expat_external.h"
46
#include "internal.h"
47
#include "xmltok.h"
48
#include "nametab.h"
49
50
#ifdef XML_DTD
51
#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
52
#else
53
#define IGNORE_SECTION_TOK_VTABLE /* as nothing */
54
#endif
55
56
#define VTABLE1 \
57
  { PREFIX(prologTok), PREFIX(contentTok), \
58
    PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
59
  { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
60
  PREFIX(sameName), \
61
  PREFIX(nameMatchesAscii), \
62
  PREFIX(nameLength), \
63
  PREFIX(skipS), \
64
  PREFIX(getAtts), \
65
  PREFIX(charRefNumber), \
66
  PREFIX(predefinedEntityName), \
67
  PREFIX(updatePosition), \
68
  PREFIX(isPublicId)
69
70
#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
71
72
#define UCS2_GET_NAMING(pages, hi, lo) \
73
   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
74
75
/* A 2 byte UTF-8 representation splits the characters 11 bits between
76
   the bottom 5 and 6 bits of the bytes.  We need 8 bits to index into
77
   pages, 3 bits to add to that index and 5 bits to generate the mask.
78
*/
79
#define UTF8_GET_NAMING2(pages, byte) \
80
    (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
81
                      + ((((byte)[0]) & 3) << 1) \
82
                      + ((((byte)[1]) >> 5) & 1)] \
83
         & (1u << (((byte)[1]) & 0x1F)))
84
85
/* A 3 byte UTF-8 representation splits the characters 16 bits between
86
   the bottom 4, 6 and 6 bits of the bytes.  We need 8 bits to index
87
   into pages, 3 bits to add to that index and 5 bits to generate the
88
   mask.
89
*/
90
#define UTF8_GET_NAMING3(pages, byte) \
91
  (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
92
                             + ((((byte)[1]) >> 2) & 0xF)] \
93
                       << 3) \
94
                      + ((((byte)[1]) & 3) << 1) \
95
                      + ((((byte)[2]) >> 5) & 1)] \
96
         & (1u << (((byte)[2]) & 0x1F)))
97
98
#define UTF8_GET_NAMING(pages, p, n) \
99
  ((n) == 2 \
100
  ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
101
  : ((n) == 3 \
102
     ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
103
     : 0))
104
105
/* Detection of invalid UTF-8 sequences is based on Table 3.1B
106
   of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
107
   with the additional restriction of not allowing the Unicode
108
   code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
109
   Implementation details:
110
     (A & 0x80) == 0     means A < 0x80
111
   and
112
     (A & 0xC0) == 0xC0  means A > 0xBF
113
*/
114
115
#define UTF8_INVALID2(p) \
116
  ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
117
118
#define UTF8_INVALID3(p) \
119
  (((p)[2] & 0x80) == 0 \
120
  || \
121
  ((*p) == 0xEF && (p)[1] == 0xBF \
122
    ? \
123
    (p)[2] > 0xBD \
124
    : \
125
    ((p)[2] & 0xC0) == 0xC0) \
126
  || \
127
  ((*p) == 0xE0 \
128
    ? \
129
    (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
130
    : \
131
    ((p)[1] & 0x80) == 0 \
132
    || \
133
    ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
134
135
#define UTF8_INVALID4(p) \
136
  (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \
137
  || \
138
  ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \
139
  || \
140
  ((*p) == 0xF0 \
141
    ? \
142
    (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
143
    : \
144
    ((p)[1] & 0x80) == 0 \
145
    || \
146
    ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
147
148
static int PTRFASTCALL
149
isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p))
150
{
151
20
  return 0;
152
}
153
154
static int PTRFASTCALL
155
utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p)
156
{
157
  return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
158
}
159
160
static int PTRFASTCALL
161
utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p)
162
{
163
20
  return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
164
}
165
166
#define utf8_isName4 isNever
167
168
static int PTRFASTCALL
169
utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p)
170
{
171
  return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
172
}
173
174
static int PTRFASTCALL
175
utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p)
176
{
177
20
  return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
178
}
179
180
#define utf8_isNmstrt4 isNever
181
182
static int PTRFASTCALL
183
utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p)
184
{
185

1910
  return UTF8_INVALID2((const unsigned char *)p);
186
}
187
188
static int PTRFASTCALL
189
utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p)
190
{
191




880
  return UTF8_INVALID3((const unsigned char *)p);
192
}
193
194
static int PTRFASTCALL
195
utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p)
196
{
197



150
  return UTF8_INVALID4((const unsigned char *)p);
198
}
199
200
struct normal_encoding {
201
  ENCODING enc;
202
  unsigned char type[256];
203
#ifdef XML_MIN_SIZE
204
  int (PTRFASTCALL *byteType)(const ENCODING *, const char *);
205
  int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
206
  int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
207
  int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
208
  int (PTRCALL *charMatches)(const ENCODING *, const char *, int);
209
#endif /* XML_MIN_SIZE */
210
  int (PTRFASTCALL *isName2)(const ENCODING *, const char *);
211
  int (PTRFASTCALL *isName3)(const ENCODING *, const char *);
212
  int (PTRFASTCALL *isName4)(const ENCODING *, const char *);
213
  int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
214
  int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
215
  int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
216
  int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
217
  int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
218
  int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
219
};
220
221
#define AS_NORMAL_ENCODING(enc)   ((const struct normal_encoding *) (enc))
222
223
#ifdef XML_MIN_SIZE
224
225
#define STANDARD_VTABLE(E) \
226
 E ## byteType, \
227
 E ## isNameMin, \
228
 E ## isNmstrtMin, \
229
 E ## byteToAscii, \
230
 E ## charMatches,
231
232
#else
233
234
#define STANDARD_VTABLE(E) /* as nothing */
235
236
#endif
237
238
#define NORMAL_VTABLE(E) \
239
 E ## isName2, \
240
 E ## isName3, \
241
 E ## isName4, \
242
 E ## isNmstrt2, \
243
 E ## isNmstrt3, \
244
 E ## isNmstrt4, \
245
 E ## isInvalid2, \
246
 E ## isInvalid3, \
247
 E ## isInvalid4
248
249
#define NULL_VTABLE \
250
 /* isName2 */ NULL, \
251
 /* isName3 */ NULL, \
252
 /* isName4 */ NULL, \
253
 /* isNmstrt2 */ NULL, \
254
 /* isNmstrt3 */ NULL, \
255
 /* isNmstrt4 */ NULL, \
256
 /* isInvalid2 */ NULL, \
257
 /* isInvalid3 */ NULL, \
258
 /* isInvalid4 */ NULL
259
260
static int FASTCALL checkCharRefNumber(int);
261
262
#include "xmltok_impl.h"
263
#include "ascii.h"
264
265
#ifdef XML_MIN_SIZE
266
#define sb_isNameMin isNever
267
#define sb_isNmstrtMin isNever
268
#endif
269
270
#ifdef XML_MIN_SIZE
271
#define MINBPC(enc) ((enc)->minBytesPerChar)
272
#else
273
/* minimum bytes per character */
274
#define MINBPC(enc) 1
275
#endif
276
277
#define SB_BYTE_TYPE(enc, p) \
278
  (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
279
280
#ifdef XML_MIN_SIZE
281
static int PTRFASTCALL
282
sb_byteType(const ENCODING *enc, const char *p)
283
{
284
  return SB_BYTE_TYPE(enc, p);
285
}
286
#define BYTE_TYPE(enc, p) \
287
 (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
288
#else
289
#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
290
#endif
291
292
#ifdef XML_MIN_SIZE
293
#define BYTE_TO_ASCII(enc, p) \
294
 (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
295
static int PTRFASTCALL
296
sb_byteToAscii(const ENCODING *enc, const char *p)
297
{
298
  return *p;
299
}
300
#else
301
#define BYTE_TO_ASCII(enc, p) (*(p))
302
#endif
303
304
#define IS_NAME_CHAR(enc, p, n) \
305
 (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p))
306
#define IS_NMSTRT_CHAR(enc, p, n) \
307
 (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p))
308
#define IS_INVALID_CHAR(enc, p, n) \
309
 (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p))
310
311
#ifdef XML_MIN_SIZE
312
#define IS_NAME_CHAR_MINBPC(enc, p) \
313
 (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
314
#define IS_NMSTRT_CHAR_MINBPC(enc, p) \
315
 (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
316
#else
317
#define IS_NAME_CHAR_MINBPC(enc, p) (0)
318
#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
319
#endif
320
321
#ifdef XML_MIN_SIZE
322
#define CHAR_MATCHES(enc, p, c) \
323
 (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
324
static int PTRCALL
325
sb_charMatches(const ENCODING *enc, const char *p, int c)
326
{
327
  return *p == c;
328
}
329
#else
330
/* c is an ASCII character */
331
#define CHAR_MATCHES(enc, p, c) (*(p) == c)
332
#endif
333
334
#define PREFIX(ident) normal_ ## ident
335
#define XML_TOK_IMPL_C
336
#include "xmltok_impl.c"
337
#undef XML_TOK_IMPL_C
338
339
#undef MINBPC
340
#undef BYTE_TYPE
341
#undef BYTE_TO_ASCII
342
#undef CHAR_MATCHES
343
#undef IS_NAME_CHAR
344
#undef IS_NAME_CHAR_MINBPC
345
#undef IS_NMSTRT_CHAR
346
#undef IS_NMSTRT_CHAR_MINBPC
347
#undef IS_INVALID_CHAR
348
349
enum {  /* UTF8_cvalN is value of masked first byte of N byte sequence */
350
  UTF8_cval1 = 0x00,
351
  UTF8_cval2 = 0xc0,
352
  UTF8_cval3 = 0xe0,
353
  UTF8_cval4 = 0xf0
354
};
355
356
void
357
align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
358
{
359
4481988
  const char * fromLim = *fromLimRef;
360
  size_t walked = 0;
361
4482408
  for (; fromLim > from; fromLim--, walked++) {
362
2240424
    const unsigned char prev = (unsigned char)fromLim[-1];
363
2240424
    if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
364
40
      if (walked + 1 >= 4) {
365
10
        fromLim += 4 - 1;
366
10
        break;
367
      } else {
368
        walked = 0;
369
      }
370
2240414
    } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
371
40
      if (walked + 1 >= 3) {
372
20
        fromLim += 3 - 1;
373
20
        break;
374
      } else {
375
        walked = 0;
376
      }
377
2240364
    } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
378
50
      if (walked + 1 >= 2) {
379
20
        fromLim += 2 - 1;
380
20
        break;
381
      } else {
382
        walked = 0;
383
      }
384
2240324
    } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
385
2240164
      break;
386
    }
387
210
  }
388
2240994
  *fromLimRef = fromLim;
389
2240994
}
390
391
static enum XML_Convert_Result PTRCALL
392
utf8_toUtf8(const ENCODING *UNUSED_P(enc),
393
            const char **fromP, const char *fromLim,
394
            char **toP, const char *toLim)
395
{
396
  bool input_incomplete = false;
397
  bool output_exhausted = false;
398
399
  /* Avoid copying partial characters (due to limited space). */
400
2240884
  const ptrdiff_t bytesAvailable = fromLim - *fromP;
401
2240884
  const ptrdiff_t bytesStorable = toLim - *toP;
402
2240884
  if (bytesAvailable > bytesStorable) {
403
2079520
    fromLim = *fromP + bytesStorable;
404
    output_exhausted = true;
405
2079520
  }
406
407
  /* Avoid copying partial characters (from incomplete input). */
408
2240884
  const char * const fromLimBefore = fromLim;
409
2240884
  align_limit_to_full_utf8_characters(*fromP, &fromLim);
410
2240884
  if (fromLim < fromLimBefore) {
411
    input_incomplete = true;
412
20
  }
413
414
2240884
  const ptrdiff_t bytesToCopy = fromLim - *fromP;
415
2240884
  memcpy((void *)*toP, (const void *)*fromP, (size_t)bytesToCopy);
416
2240884
  *fromP += bytesToCopy;
417
2240884
  *toP += bytesToCopy;
418
419
2240884
  if (output_exhausted)  // needs to go first
420
2079520
    return XML_CONVERT_OUTPUT_EXHAUSTED;
421
161364
  else if (input_incomplete)
422
    return XML_CONVERT_INPUT_INCOMPLETE;
423
  else
424
161364
    return XML_CONVERT_COMPLETED;
425
2240884
}
426
427
static enum XML_Convert_Result PTRCALL
428
utf8_toUtf16(const ENCODING *enc,
429
             const char **fromP, const char *fromLim,
430
             unsigned short **toP, const unsigned short *toLim)
431
{
432
  enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
433
  unsigned short *to = *toP;
434
  const char *from = *fromP;
435
  while (from < fromLim && to < toLim) {
436
    switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
437
    case BT_LEAD2:
438
      if (fromLim - from < 2) {
439
        res = XML_CONVERT_INPUT_INCOMPLETE;
440
        goto after;
441
      }
442
      *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
443
      from += 2;
444
      break;
445
    case BT_LEAD3:
446
      if (fromLim - from < 3) {
447
        res = XML_CONVERT_INPUT_INCOMPLETE;
448
        goto after;
449
      }
450
      *to++ = (unsigned short)(((from[0] & 0xf) << 12)
451
                               | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
452
      from += 3;
453
      break;
454
    case BT_LEAD4:
455
      {
456
        unsigned long n;
457
        if (toLim - to < 2) {
458
          res = XML_CONVERT_OUTPUT_EXHAUSTED;
459
          goto after;
460
        }
461
        if (fromLim - from < 4) {
462
          res = XML_CONVERT_INPUT_INCOMPLETE;
463
          goto after;
464
        }
465
        n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
466
            | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
467
        n -= 0x10000;
468
        to[0] = (unsigned short)((n >> 10) | 0xD800);
469
        to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
470
        to += 2;
471
        from += 4;
472
      }
473
      break;
474
    default:
475
      *to++ = *from++;
476
      break;
477
    }
478
  }
479
  if (from < fromLim)
480
    res = XML_CONVERT_OUTPUT_EXHAUSTED;
481
after:
482
  *fromP = from;
483
  *toP = to;
484
  return res;
485
}
486
487
#ifdef XML_NS
488
static const struct normal_encoding utf8_encoding_ns = {
489
  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
490
  {
491
#include "asciitab.h"
492
#include "utf8tab.h"
493
  },
494
  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
495
};
496
#endif
497
498
static const struct normal_encoding utf8_encoding = {
499
  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
500
  {
501
#define BT_COLON BT_NMSTRT
502
#include "asciitab.h"
503
#undef BT_COLON
504
#include "utf8tab.h"
505
  },
506
  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
507
};
508
509
#ifdef XML_NS
510
511
static const struct normal_encoding internal_utf8_encoding_ns = {
512
  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
513
  {
514
#include "iasciitab.h"
515
#include "utf8tab.h"
516
  },
517
  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
518
};
519
520
#endif
521
522
static const struct normal_encoding internal_utf8_encoding = {
523
  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
524
  {
525
#define BT_COLON BT_NMSTRT
526
#include "iasciitab.h"
527
#undef BT_COLON
528
#include "utf8tab.h"
529
  },
530
  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
531
};
532
533
static enum XML_Convert_Result PTRCALL
534
latin1_toUtf8(const ENCODING *UNUSED_P(enc),
535
              const char **fromP, const char *fromLim,
536
              char **toP, const char *toLim)
537
{
538
51702480
  for (;;) {
539
    unsigned char c;
540
218874730
    if (*fromP == fromLim)
541
25851220
      return XML_CONVERT_COMPLETED;
542
193023510
    c = (unsigned char)**fromP;
543
193023510
    if (c & 0x80) {
544
470
      if (toLim - *toP < 2)
545
10
        return XML_CONVERT_OUTPUT_EXHAUSTED;
546
460
      *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
547
460
      *(*toP)++ = (char)((c & 0x3f) | 0x80);
548
460
      (*fromP)++;
549
460
    }
550
    else {
551
193023040
      if (*toP == toLim)
552
10
        return XML_CONVERT_OUTPUT_EXHAUSTED;
553
193023030
      *(*toP)++ = *(*fromP)++;
554
    }
555
193023490
  }
556
25851240
}
557
558
static enum XML_Convert_Result PTRCALL
559
latin1_toUtf16(const ENCODING *UNUSED_P(enc),
560
               const char **fromP, const char *fromLim,
561
               unsigned short **toP, const unsigned short *toLim)
562
{
563
  while (*fromP < fromLim && *toP < toLim)
564
    *(*toP)++ = (unsigned char)*(*fromP)++;
565
566
  if ((*toP == toLim) && (*fromP < fromLim))
567
    return XML_CONVERT_OUTPUT_EXHAUSTED;
568
  else
569
    return XML_CONVERT_COMPLETED;
570
}
571
572
#ifdef XML_NS
573
574
static const struct normal_encoding latin1_encoding_ns = {
575
  { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
576
  {
577
#include "asciitab.h"
578
#include "latin1tab.h"
579
  },
580
  STANDARD_VTABLE(sb_) NULL_VTABLE
581
};
582
583
#endif
584
585
static const struct normal_encoding latin1_encoding = {
586
  { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
587
  {
588
#define BT_COLON BT_NMSTRT
589
#include "asciitab.h"
590
#undef BT_COLON
591
#include "latin1tab.h"
592
  },
593
  STANDARD_VTABLE(sb_) NULL_VTABLE
594
};
595
596
static enum XML_Convert_Result PTRCALL
597
ascii_toUtf8(const ENCODING *UNUSED_P(enc),
598
             const char **fromP, const char *fromLim,
599
             char **toP, const char *toLim)
600
{
601

41380
  while (*fromP < fromLim && *toP < toLim)
602
13110
    *(*toP)++ = *(*fromP)++;
603
604

690
  if ((*toP == toLim) && (*fromP < fromLim))
605
10
    return XML_CONVERT_OUTPUT_EXHAUSTED;
606
  else
607
670
    return XML_CONVERT_COMPLETED;
608
680
}
609
610
#ifdef XML_NS
611
612
static const struct normal_encoding ascii_encoding_ns = {
613
  { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
614
  {
615
#include "asciitab.h"
616
/* BT_NONXML == 0 */
617
  },
618
  STANDARD_VTABLE(sb_) NULL_VTABLE
619
};
620
621
#endif
622
623
static const struct normal_encoding ascii_encoding = {
624
  { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
625
  {
626
#define BT_COLON BT_NMSTRT
627
#include "asciitab.h"
628
#undef BT_COLON
629
/* BT_NONXML == 0 */
630
  },
631
  STANDARD_VTABLE(sb_) NULL_VTABLE
632
};
633
634
static int PTRFASTCALL
635
unicode_byte_type(char hi, char lo)
636
{
637


5720
  switch ((unsigned char)hi) {
638
  case 0xD8: case 0xD9: case 0xDA: case 0xDB:
639
140
    return BT_LEAD4;
640
  case 0xDC: case 0xDD: case 0xDE: case 0xDF:
641
10
    return BT_TRAIL;
642
  case 0xFF:
643
20
    switch ((unsigned char)lo) {
644
    case 0xFF:
645
    case 0xFE:
646
      return BT_NONXML;
647
    }
648
    break;
649
  }
650
2700
  return BT_NONASCII;
651
2850
}
652
653
#define DEFINE_UTF16_TO_UTF8(E) \
654
static enum XML_Convert_Result  PTRCALL \
655
E ## toUtf8(const ENCODING *UNUSED_P(enc), \
656
            const char **fromP, const char *fromLim, \
657
            char **toP, const char *toLim) \
658
{ \
659
  const char *from = *fromP; \
660
  fromLim = from + (((fromLim - from) >> 1) << 1);  /* shrink to even */ \
661
  for (; from < fromLim; from += 2) { \
662
    int plane; \
663
    unsigned char lo2; \
664
    unsigned char lo = GET_LO(from); \
665
    unsigned char hi = GET_HI(from); \
666
    switch (hi) { \
667
    case 0: \
668
      if (lo < 0x80) { \
669
        if (*toP == toLim) { \
670
          *fromP = from; \
671
          return XML_CONVERT_OUTPUT_EXHAUSTED; \
672
        } \
673
        *(*toP)++ = lo; \
674
        break; \
675
      } \
676
      /* fall through */ \
677
    case 0x1: case 0x2: case 0x3: \
678
    case 0x4: case 0x5: case 0x6: case 0x7: \
679
      if (toLim -  *toP < 2) { \
680
        *fromP = from; \
681
        return XML_CONVERT_OUTPUT_EXHAUSTED; \
682
      } \
683
      *(*toP)++ = ((lo >> 6) | (hi << 2) |  UTF8_cval2); \
684
      *(*toP)++ = ((lo & 0x3f) | 0x80); \
685
      break; \
686
    default: \
687
      if (toLim -  *toP < 3)  { \
688
        *fromP = from; \
689
        return XML_CONVERT_OUTPUT_EXHAUSTED; \
690
      } \
691
      /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
692
      *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
693
      *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
694
      *(*toP)++ = ((lo & 0x3f) | 0x80); \
695
      break; \
696
    case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
697
      if (toLim -  *toP < 4) { \
698
        *fromP = from; \
699
        return XML_CONVERT_OUTPUT_EXHAUSTED; \
700
      } \
701
      if (fromLim - from < 4) { \
702
        *fromP = from; \
703
        return XML_CONVERT_INPUT_INCOMPLETE; \
704
      } \
705
      plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
706
      *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
707
      *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
708
      from += 2; \
709
      lo2 = GET_LO(from); \
710
      *(*toP)++ = (((lo & 0x3) << 4) \
711
                   | ((GET_HI(from) & 0x3) << 2) \
712
                   | (lo2 >> 6) \
713
                   | 0x80); \
714
      *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
715
      break; \
716
    } \
717
  } \
718
  *fromP = from; \
719
  if (from < fromLim) \
720
    return XML_CONVERT_INPUT_INCOMPLETE; \
721
  else \
722
    return XML_CONVERT_COMPLETED; \
723
}
724
725
#define DEFINE_UTF16_TO_UTF16(E) \
726
static enum XML_Convert_Result  PTRCALL \
727
E ## toUtf16(const ENCODING *UNUSED_P(enc), \
728
             const char **fromP, const char *fromLim, \
729
             unsigned short **toP, const unsigned short *toLim) \
730
{ \
731
  enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
732
  fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1);  /* shrink to even */ \
733
  /* Avoid copying first half only of surrogate */ \
734
  if (fromLim - *fromP > ((toLim - *toP) << 1) \
735
      && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
736
    fromLim -= 2; \
737
    res = XML_CONVERT_INPUT_INCOMPLETE; \
738
  } \
739
  for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
740
    *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
741
  if ((*toP == toLim) && (*fromP < fromLim)) \
742
    return XML_CONVERT_OUTPUT_EXHAUSTED; \
743
  else \
744
    return res; \
745
}
746
747
#define SET2(ptr, ch) \
748
  (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
749
#define GET_LO(ptr) ((unsigned char)(ptr)[0])
750
#define GET_HI(ptr) ((unsigned char)(ptr)[1])
751
752







21360
DEFINE_UTF16_TO_UTF8(little2_)
753
DEFINE_UTF16_TO_UTF16(little2_)
754
755
#undef SET2
756
#undef GET_LO
757
#undef GET_HI
758
759
#define SET2(ptr, ch) \
760
  (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
761
#define GET_LO(ptr) ((unsigned char)(ptr)[1])
762
#define GET_HI(ptr) ((unsigned char)(ptr)[0])
763
764







260580
DEFINE_UTF16_TO_UTF8(big2_)
765
DEFINE_UTF16_TO_UTF16(big2_)
766
767
#undef SET2
768
#undef GET_LO
769
#undef GET_HI
770
771
#define LITTLE2_BYTE_TYPE(enc, p) \
772
 ((p)[1] == 0 \
773
  ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
774
  : unicode_byte_type((p)[1], (p)[0]))
775
#define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)
776
#define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
777
#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \
778
  UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
779
#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
780
  UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
781
782
#ifdef XML_MIN_SIZE
783
784
static int PTRFASTCALL
785
little2_byteType(const ENCODING *enc, const char *p)
786
{
787
  return LITTLE2_BYTE_TYPE(enc, p);
788
}
789
790
static int PTRFASTCALL
791
little2_byteToAscii(const ENCODING *enc, const char *p)
792
{
793
  return LITTLE2_BYTE_TO_ASCII(enc, p);
794
}
795
796
static int PTRCALL
797
little2_charMatches(const ENCODING *enc, const char *p, int c)
798
{
799
  return LITTLE2_CHAR_MATCHES(enc, p, c);
800
}
801
802
static int PTRFASTCALL
803
little2_isNameMin(const ENCODING *enc, const char *p)
804
{
805
  return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p);
806
}
807
808
static int PTRFASTCALL
809
little2_isNmstrtMin(const ENCODING *enc, const char *p)
810
{
811
  return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p);
812
}
813
814
#undef VTABLE
815
#define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
816
817
#else /* not XML_MIN_SIZE */
818
819
#undef PREFIX
820
#define PREFIX(ident) little2_ ## ident
821
#define MINBPC(enc) 2
822
/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
823
#define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
824
#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
825
#define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)
826
#define IS_NAME_CHAR(enc, p, n) 0
827
#define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
828
#define IS_NMSTRT_CHAR(enc, p, n) (0)
829
#define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
830
831
#define XML_TOK_IMPL_C
832
#include "xmltok_impl.c"
833
#undef XML_TOK_IMPL_C
834
835
#undef MINBPC
836
#undef BYTE_TYPE
837
#undef BYTE_TO_ASCII
838
#undef CHAR_MATCHES
839
#undef IS_NAME_CHAR
840
#undef IS_NAME_CHAR_MINBPC
841
#undef IS_NMSTRT_CHAR
842
#undef IS_NMSTRT_CHAR_MINBPC
843
#undef IS_INVALID_CHAR
844
845
#endif /* not XML_MIN_SIZE */
846
847
#ifdef XML_NS
848
849
static const struct normal_encoding little2_encoding_ns = {
850
  { VTABLE, 2, 0,
851
#if BYTEORDER == 1234
852
    1
853
#else
854
    0
855
#endif
856
  },
857
  {
858
#include "asciitab.h"
859
#include "latin1tab.h"
860
  },
861
  STANDARD_VTABLE(little2_) NULL_VTABLE
862
};
863
864
#endif
865
866
static const struct normal_encoding little2_encoding = {
867
  { VTABLE, 2, 0,
868
#if BYTEORDER == 1234
869
    1
870
#else
871
    0
872
#endif
873
  },
874
  {
875
#define BT_COLON BT_NMSTRT
876
#include "asciitab.h"
877
#undef BT_COLON
878
#include "latin1tab.h"
879
  },
880
  STANDARD_VTABLE(little2_) NULL_VTABLE
881
};
882
883
#if BYTEORDER != 4321
884
885
#ifdef XML_NS
886
887
static const struct normal_encoding internal_little2_encoding_ns = {
888
  { VTABLE, 2, 0, 1 },
889
  {
890
#include "iasciitab.h"
891
#include "latin1tab.h"
892
  },
893
  STANDARD_VTABLE(little2_) NULL_VTABLE
894
};
895
896
#endif
897
898
static const struct normal_encoding internal_little2_encoding = {
899
  { VTABLE, 2, 0, 1 },
900
  {
901
#define BT_COLON BT_NMSTRT
902
#include "iasciitab.h"
903
#undef BT_COLON
904
#include "latin1tab.h"
905
  },
906
  STANDARD_VTABLE(little2_) NULL_VTABLE
907
};
908
909
#endif
910
911
912
#define BIG2_BYTE_TYPE(enc, p) \
913
 ((p)[0] == 0 \
914
  ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
915
  : unicode_byte_type((p)[0], (p)[1]))
916
#define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
917
#define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
918
#define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \
919
  UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
920
#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
921
  UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
922
923
#ifdef XML_MIN_SIZE
924
925
static int PTRFASTCALL
926
big2_byteType(const ENCODING *enc, const char *p)
927
{
928
  return BIG2_BYTE_TYPE(enc, p);
929
}
930
931
static int PTRFASTCALL
932
big2_byteToAscii(const ENCODING *enc, const char *p)
933
{
934
  return BIG2_BYTE_TO_ASCII(enc, p);
935
}
936
937
static int PTRCALL
938
big2_charMatches(const ENCODING *enc, const char *p, int c)
939
{
940
  return BIG2_CHAR_MATCHES(enc, p, c);
941
}
942
943
static int PTRFASTCALL
944
big2_isNameMin(const ENCODING *enc, const char *p)
945
{
946
  return BIG2_IS_NAME_CHAR_MINBPC(enc, p);
947
}
948
949
static int PTRFASTCALL
950
big2_isNmstrtMin(const ENCODING *enc, const char *p)
951
{
952
  return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p);
953
}
954
955
#undef VTABLE
956
#define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
957
958
#else /* not XML_MIN_SIZE */
959
960
#undef PREFIX
961
#define PREFIX(ident) big2_ ## ident
962
#define MINBPC(enc) 2
963
/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
964
#define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
965
#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
966
#define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)
967
#define IS_NAME_CHAR(enc, p, n) 0
968
#define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)
969
#define IS_NMSTRT_CHAR(enc, p, n) (0)
970
#define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
971
972
#define XML_TOK_IMPL_C
973
#include "xmltok_impl.c"
974
#undef XML_TOK_IMPL_C
975
976
#undef MINBPC
977
#undef BYTE_TYPE
978
#undef BYTE_TO_ASCII
979
#undef CHAR_MATCHES
980
#undef IS_NAME_CHAR
981
#undef IS_NAME_CHAR_MINBPC
982
#undef IS_NMSTRT_CHAR
983
#undef IS_NMSTRT_CHAR_MINBPC
984
#undef IS_INVALID_CHAR
985
986
#endif /* not XML_MIN_SIZE */
987
988
#ifdef XML_NS
989
990
static const struct normal_encoding big2_encoding_ns = {
991
  { VTABLE, 2, 0,
992
#if BYTEORDER == 4321
993
  1
994
#else
995
  0
996
#endif
997
  },
998
  {
999
#include "asciitab.h"
1000
#include "latin1tab.h"
1001
  },
1002
  STANDARD_VTABLE(big2_) NULL_VTABLE
1003
};
1004
1005
#endif
1006
1007
static const struct normal_encoding big2_encoding = {
1008
  { VTABLE, 2, 0,
1009
#if BYTEORDER == 4321
1010
  1
1011
#else
1012
  0
1013
#endif
1014
  },
1015
  {
1016
#define BT_COLON BT_NMSTRT
1017
#include "asciitab.h"
1018
#undef BT_COLON
1019
#include "latin1tab.h"
1020
  },
1021
  STANDARD_VTABLE(big2_) NULL_VTABLE
1022
};
1023
1024
#if BYTEORDER != 1234
1025
1026
#ifdef XML_NS
1027
1028
static const struct normal_encoding internal_big2_encoding_ns = {
1029
  { VTABLE, 2, 0, 1 },
1030
  {
1031
#include "iasciitab.h"
1032
#include "latin1tab.h"
1033
  },
1034
  STANDARD_VTABLE(big2_) NULL_VTABLE
1035
};
1036
1037
#endif
1038
1039
static const struct normal_encoding internal_big2_encoding = {
1040
  { VTABLE, 2, 0, 1 },
1041
  {
1042
#define BT_COLON BT_NMSTRT
1043
#include "iasciitab.h"
1044
#undef BT_COLON
1045
#include "latin1tab.h"
1046
  },
1047
  STANDARD_VTABLE(big2_) NULL_VTABLE
1048
};
1049
1050
#endif
1051
1052
#undef PREFIX
1053
1054
static int FASTCALL
1055
streqci(const char *s1, const char *s2)
1056
{
1057
216040
  for (;;) {
1058
625790
    char c1 = *s1++;
1059
625790
    char c2 = *s2++;
1060

643210
    if (ASCII_a <= c1 && c1 <= ASCII_z)
1061
17420
      c1 += ASCII_A - ASCII_a;
1062

625790
    if (ASCII_a <= c2 && c2 <= ASCII_z)
1063
      /* The following line will never get executed.  streqci() is
1064
       * only called from two places, both of which guarantee to put
1065
       * upper-case strings into s2.
1066
       */
1067
      c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */
1068
625790
    if (c1 != c2)
1069
56220
      return 0;
1070
569570
    if (!c1)
1071
51800
      break;
1072

517770
  }
1073
51800
  return 1;
1074
108020
}
1075
1076
static void PTRCALL
1077
initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr,
1078
                   const char *end, POSITION *pos)
1079
{
1080
31888
  normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
1081
15944
}
1082
1083
static int
1084
toAscii(const ENCODING *enc, const char *ptr, const char *end)
1085
{
1086
2133900
  char buf[1];
1087
2133900
  char *p = buf;
1088
2133900
  XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
1089
2133900
  if (p == buf)
1090
300
    return -1;
1091
  else
1092
2133600
    return buf[0];
1093
2133900
}
1094
1095
static int FASTCALL
1096
isSpace(int c)
1097
{
1098

2209320
  switch (c) {
1099
  case 0x20:
1100
  case 0xD:
1101
  case 0xA:
1102
  case 0x9:
1103
105170
    return 1;
1104
  }
1105
999490
  return 0;
1106
1104660
}
1107
1108
/* Return 1 if there's just optional white space or there's an S
1109
   followed by name=val.
1110
*/
1111
static int
1112
parsePseudoAttribute(const ENCODING *enc,
1113
                     const char *ptr,
1114
                     const char *end,
1115
                     const char **namePtr,
1116
                     const char **nameEndPtr,
1117
                     const char **valPtr,
1118
                     const char **nextTokPtr)
1119
{
1120
  int c;
1121
  char open;
1122
315340
  if (ptr == end) {
1123
52510
    *namePtr = NULL;
1124
52510
    return 1;
1125
  }
1126
105160
  if (!isSpace(toAscii(enc, ptr, end))) {
1127
    *nextTokPtr = ptr;
1128
    return 0;
1129
  }
1130
  do {
1131
105160
    ptr += enc->minBytesPerChar;
1132
105160
  } while (isSpace(toAscii(enc, ptr, end)));
1133
105160
  if (ptr == end) {
1134
    *namePtr = NULL;
1135
    return 1;
1136
  }
1137
105160
  *namePtr = ptr;
1138
894090
  for (;;) {
1139
894090
    c = toAscii(enc, ptr, end);
1140
894090
    if (c == -1) {
1141
20
      *nextTokPtr = ptr;
1142
20
      return 0;
1143
    }
1144
894070
    if (c == ASCII_EQUALS) {
1145
105140
      *nameEndPtr = ptr;
1146
105140
      break;
1147
    }
1148
788930
    if (isSpace(c)) {
1149
      *nameEndPtr = ptr;
1150
      do {
1151
        ptr += enc->minBytesPerChar;
1152
      } while (isSpace(c = toAscii(enc, ptr, end)));
1153
      if (c != ASCII_EQUALS) {
1154
        *nextTokPtr = ptr;
1155
        return 0;
1156
      }
1157
      break;
1158
    }
1159
788930
    ptr += enc->minBytesPerChar;
1160
  }
1161
105140
  if (ptr == *namePtr) {
1162
10
    *nextTokPtr = ptr;
1163
10
    return 0;
1164
  }
1165
105130
  ptr += enc->minBytesPerChar;
1166
105130
  c = toAscii(enc, ptr, end);
1167
210260
  while (isSpace(c)) {
1168
    ptr += enc->minBytesPerChar;
1169
    c = toAscii(enc, ptr, end);
1170
  }
1171
105130
  if (c != ASCII_QUOT && c != ASCII_APOS) {
1172
    *nextTokPtr = ptr;
1173
    return 0;
1174
  }
1175
105130
  open = (char)c;
1176
105130
  ptr += enc->minBytesPerChar;
1177
105130
  *valPtr = ptr;
1178
871970
  for (;; ptr += enc->minBytesPerChar) {
1179
871970
    c = toAscii(enc, ptr, end);
1180
871970
    if (c == open)
1181
      break;
1182
766840
    if (!(ASCII_a <= c && c <= ASCII_z)
1183
750610
        && !(ASCII_A <= c && c <= ASCII_Z)
1184
2053120
        && !(ASCII_0 <= c && c <= ASCII_9)
1185
513280
        && c != ASCII_PERIOD
1186
513280
        && c != ASCII_MINUS
1187
513280
        && c != ASCII_UNDERSCORE) {
1188
      *nextTokPtr = ptr;
1189
      return 0;
1190
    }
1191
  }
1192
105130
  *nextTokPtr = ptr + enc->minBytesPerChar;
1193
105130
  return 1;
1194
157670
}
1195
1196
static const char KW_version[] = {
1197
  ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'
1198
};
1199
1200
static const char KW_encoding[] = {
1201
  ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0'
1202
};
1203
1204
static const char KW_standalone[] = {
1205
  ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o,
1206
  ASCII_n, ASCII_e, '\0'
1207
};
1208
1209
static const char KW_yes[] = {
1210
  ASCII_y, ASCII_e, ASCII_s,  '\0'
1211
};
1212
1213
static const char KW_no[] = {
1214
  ASCII_n, ASCII_o,  '\0'
1215
};
1216
1217
static int
1218
doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
1219
                                                 const char *,
1220
                                                 const char *),
1221
               int isGeneralTextEntity,
1222
               const ENCODING *enc,
1223
               const char *ptr,
1224
               const char *end,
1225
               const char **badPtr,
1226
               const char **versionPtr,
1227
               const char **versionEndPtr,
1228
               const char **encodingName,
1229
               const ENCODING **encoding,
1230
               int *standalone)
1231
{
1232
52810
  const char *val = NULL;
1233
52810
  const char *name = NULL;
1234
52810
  const char *nameEnd = NULL;
1235
52810
  ptr += 5 * enc->minBytesPerChar;
1236
52810
  end -= 2 * enc->minBytesPerChar;
1237
52810
  if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
1238
52810
      || !name) {
1239
30
    *badPtr = ptr;
1240
30
    return 0;
1241
  }
1242
52780
  if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
1243
30
    if (!isGeneralTextEntity) {
1244
      *badPtr = name;
1245
      return 0;
1246
    }
1247
  }
1248
  else {
1249
52750
    if (versionPtr)
1250
52750
      *versionPtr = val;
1251
52750
    if (versionEndPtr)
1252
52750
      *versionEndPtr = ptr;
1253
52750
    if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1254
10
      *badPtr = ptr;
1255
10
      return 0;
1256
    }
1257
52740
    if (!name) {
1258
420
      if (isGeneralTextEntity) {
1259
        /* a TextDecl must have an EncodingDecl */
1260
        *badPtr = ptr;
1261
        return 0;
1262
      }
1263
420
      return 1;
1264
    }
1265
  }
1266
52350
  if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) {
1267
52110
    int c = toAscii(enc, val, end);
1268

102210
    if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) {
1269
      *badPtr = val;
1270
      return 0;
1271
    }
1272
52110
    if (encodingName)
1273
52110
      *encodingName = val;
1274
52110
    if (encoding)
1275
52110
      *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
1276
52110
    if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1277
10
      *badPtr = ptr;
1278
10
      return 0;
1279
    }
1280
52100
    if (!name)
1281
52070
      return 1;
1282
30
  }
1283
270
  if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
1284
270
      || isGeneralTextEntity) {
1285
    *badPtr = name;
1286
    return 0;
1287
  }
1288
270
  if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) {
1289
50
    if (standalone)
1290
50
      *standalone = 1;
1291
  }
1292
220
  else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
1293
220
    if (standalone)
1294
220
      *standalone = 0;
1295
  }
1296
  else {
1297
    *badPtr = val;
1298
    return 0;
1299
  }
1300
290
  while (isSpace(toAscii(enc, ptr, end)))
1301
10
    ptr += enc->minBytesPerChar;
1302
270
  if (ptr != end) {
1303
    *badPtr = ptr;
1304
    return 0;
1305
  }
1306
270
  return 1;
1307
52810
}
1308
1309
static int FASTCALL
1310
checkCharRefNumber(int result)
1311
{
1312


67620
  switch (result >> 8) {
1313
  case 0xD8: case 0xD9: case 0xDA: case 0xDB:
1314
  case 0xDC: case 0xDD: case 0xDE: case 0xDF:
1315
10
    return -1;
1316
  case 0:
1317
22530
    if (latin1_encoding.type[result] == BT_NONXML)
1318
10
      return -1;
1319
    break;
1320
  case 0xFF:
1321
    if (result == 0xFFFE || result == 0xFFFF)
1322
      return -1;
1323
    break;
1324
  }
1325
22530
  return result;
1326
22550
}
1327
1328
int FASTCALL
1329
XmlUtf8Encode(int c, char *buf)
1330
{
1331
  enum {
1332
    /* minN is minimum legal resulting value for N byte sequence */
1333
    min2 = 0x80,
1334
    min3 = 0x800,
1335
    min4 = 0x10000
1336
  };
1337
1338
44940
  if (c < 0)
1339
    return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */
1340
22470
  if (c < min2) {
1341
190
    buf[0] = (char)(c | UTF8_cval1);
1342
190
    return 1;
1343
  }
1344
22280
  if (c < min3) {
1345
22280
    buf[0] = (char)((c >> 6) | UTF8_cval2);
1346
22280
    buf[1] = (char)((c & 0x3f) | 0x80);
1347
22280
    return 2;
1348
  }
1349
  if (c < min4) {
1350
    buf[0] = (char)((c >> 12) | UTF8_cval3);
1351
    buf[1] = (char)(((c >> 6) & 0x3f) | 0x80);
1352
    buf[2] = (char)((c & 0x3f) | 0x80);
1353
    return 3;
1354
  }
1355
  if (c < 0x110000) {
1356
    buf[0] = (char)((c >> 18) | UTF8_cval4);
1357
    buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
1358
    buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
1359
    buf[3] = (char)((c & 0x3f) | 0x80);
1360
    return 4;
1361
  }
1362
  return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */
1363
22470
}
1364
1365
int FASTCALL
1366
XmlUtf16Encode(int charNum, unsigned short *buf)
1367
{
1368
  if (charNum < 0)
1369
    return 0;
1370
  if (charNum < 0x10000) {
1371
    buf[0] = (unsigned short)charNum;
1372
    return 1;
1373
  }
1374
  if (charNum < 0x110000) {
1375
    charNum -= 0x10000;
1376
    buf[0] = (unsigned short)((charNum >> 10) + 0xD800);
1377
    buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00);
1378
    return 2;
1379
  }
1380
  return 0;
1381
}
1382
1383
struct unknown_encoding {
1384
  struct normal_encoding normal;
1385
  CONVERTER convert;
1386
  void *userData;
1387
  unsigned short utf16[256];
1388
  char utf8[256][4];
1389
};
1390
1391
#define AS_UNKNOWN_ENCODING(enc)  ((const struct unknown_encoding *) (enc))
1392
1393
int
1394
XmlSizeOfUnknownEncoding(void)
1395
{
1396
760
  return sizeof(struct unknown_encoding);
1397
}
1398
1399
static int PTRFASTCALL
1400
unknown_isName(const ENCODING *enc, const char *p)
1401
{
1402
940
  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1403
470
  int c = uenc->convert(uenc->userData, p);
1404
470
  if (c & ~0xFFFF)
1405
10
    return 0;
1406
460
  return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
1407
470
}
1408
1409
static int PTRFASTCALL
1410
unknown_isNmstrt(const ENCODING *enc, const char *p)
1411
{
1412
260
  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1413
130
  int c = uenc->convert(uenc->userData, p);
1414
130
  if (c & ~0xFFFF)
1415
10
    return 0;
1416
120
  return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
1417
130
}
1418
1419
static int PTRFASTCALL
1420
unknown_isInvalid(const ENCODING *enc, const char *p)
1421
{
1422
60
  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1423
30
  int c = uenc->convert(uenc->userData, p);
1424
60
  return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
1425
}
1426
1427
static enum XML_Convert_Result PTRCALL
1428
unknown_toUtf8(const ENCODING *enc,
1429
               const char **fromP, const char *fromLim,
1430
               char **toP, const char *toLim)
1431
{
1432
1440
  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1433
720
  char buf[XML_UTF8_ENCODE_MAX];
1434
720
  for (;;) {
1435
    const char *utf8;
1436
    int n;
1437
3100
    if (*fromP == fromLim)
1438
700
      return XML_CONVERT_COMPLETED;
1439
2400
    utf8 = uenc->utf8[(unsigned char)**fromP];
1440
2400
    n = *utf8++;
1441
2400
    if (n == 0) {
1442
70
      int c = uenc->convert(uenc->userData, *fromP);
1443
70
      n = XmlUtf8Encode(c, buf);
1444
70
      if (n > toLim - *toP)
1445
10
        return XML_CONVERT_OUTPUT_EXHAUSTED;
1446
      utf8 = buf;
1447
120
      *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1448
60
                 - (BT_LEAD2 - 2));
1449
60
    }
1450
    else {
1451
2330
      if (n > toLim - *toP)
1452
10
        return XML_CONVERT_OUTPUT_EXHAUSTED;
1453
2320
      (*fromP)++;
1454
    }
1455
    do {
1456
2380
      *(*toP)++ = *utf8++;
1457
2380
    } while (--n != 0);
1458
2380
  }
1459
720
}
1460
1461
static enum XML_Convert_Result PTRCALL
1462
unknown_toUtf16(const ENCODING *enc,
1463
                const char **fromP, const char *fromLim,
1464
                unsigned short **toP, const unsigned short *toLim)
1465
{
1466
  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1467
  while (*fromP < fromLim && *toP < toLim) {
1468
    unsigned short c = uenc->utf16[(unsigned char)**fromP];
1469
    if (c == 0) {
1470
      c = (unsigned short)
1471
          uenc->convert(uenc->userData, *fromP);
1472
      *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1473
                 - (BT_LEAD2 - 2));
1474
    }
1475
    else
1476
      (*fromP)++;
1477
    *(*toP)++ = c;
1478
  }
1479
1480
  if ((*toP == toLim) && (*fromP < fromLim))
1481
    return XML_CONVERT_OUTPUT_EXHAUSTED;
1482
  else
1483
    return XML_CONVERT_COMPLETED;
1484
}
1485
1486
ENCODING *
1487
XmlInitUnknownEncoding(void *mem,
1488
                       int *table,
1489
                       CONVERTER convert,
1490
                       void *userData)
1491
{
1492
  int i;
1493
720
  struct unknown_encoding *e = (struct unknown_encoding *)mem;
1494
340560
  for (i = 0; i < (int)sizeof(struct normal_encoding); i++)
1495
169920
    ((char *)mem)[i] = ((char *)&latin1_encoding)[i];
1496
90500
  for (i = 0; i < 128; i++)
1497
76410
    if (latin1_encoding.type[i] != BT_OTHER
1498
86650
        && latin1_encoding.type[i] != BT_NONXML
1499
73260
        && table[i] != i)
1500
10
      return 0;
1501
169800
  for (i = 0; i < 256; i++) {
1502
84590
    int c = table[i];
1503
84590
    if (c == -1) {
1504
3900
      e->normal.type[i] = BT_MALFORM;
1505
      /* This shouldn't really get used. */
1506
3900
      e->utf16[i] = 0xFFFF;
1507
3900
      e->utf8[i][0] = 1;
1508
3900
      e->utf8[i][1] = 0;
1509
3900
    }
1510
80690
    else if (c < 0) {
1511
14100
      if (c < -4)
1512
10
        return 0;
1513
      /* Multi-byte sequences need a converter function */
1514
14090
      if (!convert)
1515
10
        return 0;
1516
14080
      e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
1517
14080
      e->utf8[i][0] = 0;
1518
14080
      e->utf16[i] = 0;
1519
14080
    }
1520
66590
    else if (c < 0x80) {
1521
76320
      if (latin1_encoding.type[c] != BT_OTHER
1522
86470
          && latin1_encoding.type[c] != BT_NONXML
1523
73170
          && c != i)
1524
10
        return 0;
1525
44800
      e->normal.type[i] = latin1_encoding.type[c];
1526
44800
      e->utf8[i][0] = 1;
1527
44800
      e->utf8[i][1] = (char)c;
1528
44800
      e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c);
1529
44800
    }
1530
21780
    else if (checkCharRefNumber(c) < 0) {
1531
10
      e->normal.type[i] = BT_NONXML;
1532
      /* This shouldn't really get used. */
1533
10
      e->utf16[i] = 0xFFFF;
1534
10
      e->utf8[i][0] = 1;
1535
10
      e->utf8[i][1] = 0;
1536
10
    }
1537
    else {
1538
21770
      if (c > 0xFFFF)
1539
10
        return 0;
1540
21760
      if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
1541
10540
        e->normal.type[i] = BT_NMSTRT;
1542
11220
      else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
1543
        e->normal.type[i] = BT_NAME;
1544
      else
1545
        e->normal.type[i] = BT_OTHER;
1546
21760
      e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
1547
21760
      e->utf16[i] = (unsigned short)c;
1548
    }
1549
84550
  }
1550
310
  e->userData = userData;
1551
310
  e->convert = convert;
1552
310
  if (convert) {
1553
110
    e->normal.isName2 = unknown_isName;
1554
110
    e->normal.isName3 = unknown_isName;
1555
110
    e->normal.isName4 = unknown_isName;
1556
110
    e->normal.isNmstrt2 = unknown_isNmstrt;
1557
110
    e->normal.isNmstrt3 = unknown_isNmstrt;
1558
110
    e->normal.isNmstrt4 = unknown_isNmstrt;
1559
110
    e->normal.isInvalid2 = unknown_isInvalid;
1560
110
    e->normal.isInvalid3 = unknown_isInvalid;
1561
110
    e->normal.isInvalid4 = unknown_isInvalid;
1562
110
  }
1563
310
  e->normal.enc.utf8Convert = unknown_toUtf8;
1564
310
  e->normal.enc.utf16Convert = unknown_toUtf16;
1565
310
  return &(e->normal.enc);
1566
360
}
1567
1568
/* If this enumeration is changed, getEncodingIndex and encodings
1569
must also be changed. */
1570
enum {
1571
  UNKNOWN_ENC = -1,
1572
  ISO_8859_1_ENC = 0,
1573
  US_ASCII_ENC,
1574
  UTF_8_ENC,
1575
  UTF_16_ENC,
1576
  UTF_16BE_ENC,
1577
  UTF_16LE_ENC,
1578
  /* must match encodingNames up to here */
1579
  NO_ENC
1580
};
1581
1582
static const char KW_ISO_8859_1[] = {
1583
  ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9,
1584
  ASCII_MINUS, ASCII_1, '\0'
1585
};
1586
static const char KW_US_ASCII[] = {
1587
  ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I,
1588
  '\0'
1589
};
1590
static const char KW_UTF_8[] =  {
1591
  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'
1592
};
1593
static const char KW_UTF_16[] = {
1594
  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'
1595
};
1596
static const char KW_UTF_16BE[] = {
1597
  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E,
1598
  '\0'
1599
};
1600
static const char KW_UTF_16LE[] = {
1601
  ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E,
1602
  '\0'
1603
};
1604
1605
static int FASTCALL
1606
getEncodingIndex(const char *name)
1607
{
1608
  static const char * const encodingNames[] = {
1609
    KW_ISO_8859_1,
1610
    KW_US_ASCII,
1611
    KW_UTF_8,
1612
    KW_UTF_16,
1613
    KW_UTF_16BE,
1614
    KW_UTF_16LE,
1615
  };
1616
  int i;
1617
375234
  if (name == NULL)
1618
135737
    return NO_ENC;
1619
112780
  for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++)
1620
56000
    if (streqci(name, encodingNames[i]))
1621
51490
      return i;
1622
390
  return UNKNOWN_ENC;
1623
187617
}
1624
1625
/* For binary compatibility, we store the index of the encoding
1626
   specified at initialization in the isUtf16 member.
1627
*/
1628
1629
#define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
1630
#define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
1631
1632
/* This is what detects the encoding.  encodingTable maps from
1633
   encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of
1634
   the external (protocol) specified encoding; state is
1635
   XML_CONTENT_STATE if we're parsing an external text entity, and
1636
   XML_PROLOG_STATE otherwise.
1637
*/
1638
1639
1640
static int
1641
initScan(const ENCODING * const *encodingTable,
1642
         const INIT_ENCODING *enc,
1643
         int state,
1644
         const char *ptr,
1645
         const char *end,
1646
         const char **nextTokPtr)
1647
{
1648
  const ENCODING **encPtr;
1649
1650
164796
  if (ptr >= end)
1651
20
    return XML_TOK_NONE;
1652
82378
  encPtr = enc->encPtr;
1653
82378
  if (ptr + 1 == end) {
1654
    /* only a single byte available for auto-detection */
1655
#ifndef XML_DTD /* FIXME */
1656
    /* a well-formed document entity must have more than one byte */
1657
    if (state != XML_CONTENT_STATE)
1658
      return XML_TOK_PARTIAL;
1659
#endif
1660
    /* so we're parsing an external text entity... */
1661
    /* if UTF-16 was externally specified, then we need at least 2 bytes */
1662
16064
    switch (INIT_ENC_INDEX(enc)) {
1663
    case UTF_16_ENC:
1664
    case UTF_16LE_ENC:
1665
    case UTF_16BE_ENC:
1666
30
      return XML_TOK_PARTIAL;
1667
    }
1668

16154
    switch ((unsigned char)*ptr) {
1669
    case 0xFE:
1670
    case 0xFF:
1671
    case 0xEF: /* possibly first byte of UTF-8 BOM */
1672
120
      if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
1673
120
          && state == XML_CONTENT_STATE)
1674
        break;
1675
      /* fall through */
1676
    case 0x00:
1677
    case 0x3C:
1678
15864
      return XML_TOK_PARTIAL;
1679
    }
1680
  }
1681
  else {
1682

66314
    switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
1683
    case 0xFEFF:
1684
20
      if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
1685
20
          && state == XML_CONTENT_STATE)
1686
        break;
1687
10
      *nextTokPtr = ptr + 2;
1688
10
      *encPtr = encodingTable[UTF_16BE_ENC];
1689
10
      return XML_TOK_BOM;
1690
    /* 00 3C is handled in the default case */
1691
    case 0x3C00:
1692
140
      if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
1693
250
           || INIT_ENC_INDEX(enc) == UTF_16_ENC)
1694
130
          && state == XML_CONTENT_STATE)
1695
        break;
1696
120
      *encPtr = encodingTable[UTF_16LE_ENC];
1697
120
      return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1698
    case 0xFFFE:
1699
50
      if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
1700
50
          && state == XML_CONTENT_STATE)
1701
        break;
1702
40
      *nextTokPtr = ptr + 2;
1703
40
      *encPtr = encodingTable[UTF_16LE_ENC];
1704
40
      return XML_TOK_BOM;
1705
    case 0xEFBB:
1706
      /* Maybe a UTF-8 BOM (EF BB BF) */
1707
      /* If there's an explicitly specified (external) encoding
1708
         of ISO-8859-1 or some flavour of UTF-16
1709
         and this is an external text entity,
1710
         don't look for the BOM,
1711
         because it might be a legal data.
1712
      */
1713
100
      if (state == XML_CONTENT_STATE) {
1714
40
        int e = INIT_ENC_INDEX(enc);
1715
120
        if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC
1716
80
            || e == UTF_16LE_ENC || e == UTF_16_ENC)
1717
          break;
1718
40
      }
1719
100
      if (ptr + 2 == end)
1720
50
        return XML_TOK_PARTIAL;
1721
50
      if ((unsigned char)ptr[2] == 0xBF) {
1722
40
        *nextTokPtr = ptr + 3;
1723
40
        *encPtr = encodingTable[UTF_8_ENC];
1724
40
        return XML_TOK_BOM;
1725
      }
1726
      break;
1727
    default:
1728
66014
      if (ptr[0] == '\0') {
1729
        /* 0 isn't a legal data character. Furthermore a document
1730
           entity can only start with ASCII characters.  So the only
1731
           way this can fail to be big-endian UTF-16 if it it's an
1732
           external parsed general entity that's labelled as
1733
           UTF-16LE.
1734
        */
1735

440
        if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
1736
          break;
1737
420
        *encPtr = encodingTable[UTF_16BE_ENC];
1738
420
        return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1739
      }
1740
131168
      else if (ptr[1] == '\0') {
1741
        /* We could recover here in the case:
1742
            - parsing an external entity
1743
            - second byte is 0
1744
            - no externally specified encoding
1745
            - no encoding declaration
1746
           by assuming UTF-16LE.  But we don't, because this would mean when
1747
           presented just with a single byte, we couldn't reliably determine
1748
           whether we needed further bytes.
1749
        */
1750
65584
        if (state == XML_CONTENT_STATE)
1751
          break;
1752
10
        *encPtr = encodingTable[UTF_16LE_ENC];
1753
10
        return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1754
      }
1755
      break;
1756
    }
1757
  }
1758
65794
  *encPtr = encodingTable[INIT_ENC_INDEX(enc)];
1759
65794
  return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1760
82398
}
1761
1762
1763
#define NS(x) x
1764
#define ns(x) x
1765
#define XML_TOK_NS_C
1766
#include "xmltok_ns.c"
1767
#undef XML_TOK_NS_C
1768
#undef NS
1769
#undef ns
1770
1771
#ifdef XML_NS
1772
1773
#define NS(x) x ## NS
1774
#define ns(x) x ## _ns
1775
1776
#define XML_TOK_NS_C
1777
#include "xmltok_ns.c"
1778
#undef XML_TOK_NS_C
1779
1780
#undef NS
1781
#undef ns
1782
1783
ENCODING *
1784
XmlInitUnknownEncodingNS(void *mem,
1785
                         int *table,
1786
                         CONVERTER convert,
1787
                         void *userData)
1788
{
1789
20
  ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
1790
10
  if (enc)
1791
10
    ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
1792
10
  return enc;
1793
}
1794
1795
#endif /* XML_NS */