GCC Code Coverage Report | |||||||||||||||||||||
|
|||||||||||||||||||||
Line | Branch | Exec | Source |
1 |
/* This file is included! |
||
2 |
__ __ _ |
||
3 |
___\ \/ /_ __ __ _| |_ |
||
4 |
/ _ \\ /| '_ \ / _` | __| |
||
5 |
| __// \| |_) | (_| | |_ |
||
6 |
\___/_/\_\ .__/ \__,_|\__| |
||
7 |
|_| XML parser |
||
8 |
|||
9 |
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd |
||
10 |
Copyright (c) 2000-2017 Expat development team |
||
11 |
Licensed under the MIT license: |
||
12 |
|||
13 |
Permission is hereby granted, free of charge, to any person obtaining |
||
14 |
a copy of this software and associated documentation files (the |
||
15 |
"Software"), to deal in the Software without restriction, including |
||
16 |
without limitation the rights to use, copy, modify, merge, publish, |
||
17 |
distribute, sublicense, and/or sell copies of the Software, and to permit |
||
18 |
persons to whom the Software is furnished to do so, subject to the |
||
19 |
following conditions: |
||
20 |
|||
21 |
The above copyright notice and this permission notice shall be included |
||
22 |
in all copies or substantial portions of the Software. |
||
23 |
|||
24 |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||
25 |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
26 |
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN |
||
27 |
NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, |
||
28 |
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
||
29 |
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
||
30 |
USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
31 |
*/ |
||
32 |
|||
33 |
#ifdef XML_TOK_IMPL_C |
||
34 |
|||
35 |
#ifndef IS_INVALID_CHAR |
||
36 |
#define IS_INVALID_CHAR(enc, ptr, n) (0) |
||
37 |
#endif |
||
38 |
|||
39 |
#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ |
||
40 |
case BT_LEAD ## n: \ |
||
41 |
if (end - ptr < n) \ |
||
42 |
return XML_TOK_PARTIAL_CHAR; \ |
||
43 |
if (IS_INVALID_CHAR(enc, ptr, n)) { \ |
||
44 |
*(nextTokPtr) = (ptr); \ |
||
45 |
return XML_TOK_INVALID; \ |
||
46 |
} \ |
||
47 |
ptr += n; \ |
||
48 |
break; |
||
49 |
|||
50 |
#define INVALID_CASES(ptr, nextTokPtr) \ |
||
51 |
INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ |
||
52 |
INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ |
||
53 |
INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ |
||
54 |
case BT_NONXML: \ |
||
55 |
case BT_MALFORM: \ |
||
56 |
case BT_TRAIL: \ |
||
57 |
*(nextTokPtr) = (ptr); \ |
||
58 |
return XML_TOK_INVALID; |
||
59 |
|||
60 |
#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ |
||
61 |
case BT_LEAD ## n: \ |
||
62 |
if (end - ptr < n) \ |
||
63 |
return XML_TOK_PARTIAL_CHAR; \ |
||
64 |
if (!IS_NAME_CHAR(enc, ptr, n)) { \ |
||
65 |
*nextTokPtr = ptr; \ |
||
66 |
return XML_TOK_INVALID; \ |
||
67 |
} \ |
||
68 |
ptr += n; \ |
||
69 |
break; |
||
70 |
|||
71 |
#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ |
||
72 |
case BT_NONASCII: \ |
||
73 |
if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ |
||
74 |
*nextTokPtr = ptr; \ |
||
75 |
return XML_TOK_INVALID; \ |
||
76 |
} \ |
||
77 |
case BT_NMSTRT: \ |
||
78 |
case BT_HEX: \ |
||
79 |
case BT_DIGIT: \ |
||
80 |
case BT_NAME: \ |
||
81 |
case BT_MINUS: \ |
||
82 |
ptr += MINBPC(enc); \ |
||
83 |
break; \ |
||
84 |
CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ |
||
85 |
CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ |
||
86 |
CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) |
||
87 |
|||
88 |
#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ |
||
89 |
case BT_LEAD ## n: \ |
||
90 |
if (end - ptr < n) \ |
||
91 |
return XML_TOK_PARTIAL_CHAR; \ |
||
92 |
if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
||
93 |
*nextTokPtr = ptr; \ |
||
94 |
return XML_TOK_INVALID; \ |
||
95 |
} \ |
||
96 |
ptr += n; \ |
||
97 |
break; |
||
98 |
|||
99 |
#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ |
||
100 |
case BT_NONASCII: \ |
||
101 |
if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ |
||
102 |
*nextTokPtr = ptr; \ |
||
103 |
return XML_TOK_INVALID; \ |
||
104 |
} \ |
||
105 |
case BT_NMSTRT: \ |
||
106 |
case BT_HEX: \ |
||
107 |
ptr += MINBPC(enc); \ |
||
108 |
break; \ |
||
109 |
CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ |
||
110 |
CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ |
||
111 |
CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) |
||
112 |
|||
113 |
#ifndef PREFIX |
||
114 |
#define PREFIX(ident) ident |
||
115 |
#endif |
||
116 |
|||
117 |
|||
118 |
#define HAS_CHARS(enc, ptr, end, count) \ |
||
119 |
(end - ptr >= count * MINBPC(enc)) |
||
120 |
|||
121 |
#define HAS_CHAR(enc, ptr, end) \ |
||
122 |
HAS_CHARS(enc, ptr, end, 1) |
||
123 |
|||
124 |
#define REQUIRE_CHARS(enc, ptr, end, count) \ |
||
125 |
{ \ |
||
126 |
if (! HAS_CHARS(enc, ptr, end, count)) { \ |
||
127 |
return XML_TOK_PARTIAL; \ |
||
128 |
} \ |
||
129 |
} |
||
130 |
|||
131 |
#define REQUIRE_CHAR(enc, ptr, end) \ |
||
132 |
REQUIRE_CHARS(enc, ptr, end, 1) |
||
133 |
|||
134 |
|||
135 |
/* ptr points to character following "<!-" */ |
||
136 |
|||
137 |
static int PTRCALL |
||
138 |
PREFIX(scanComment)(const ENCODING *enc, const char *ptr, |
||
139 |
const char *end, const char **nextTokPtr) |
||
140 |
{ |
||
141 |
✓✓✓✓ ✓✓ |
306792 |
if (HAS_CHAR(enc, ptr, end)) { |
142 |
✗✓✓✗ ✗✓✓✗ ✗✓ |
153606 |
if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { |
143 |
*nextTokPtr = ptr; |
||
144 |
return XML_TOK_INVALID; |
||
145 |
} |
||
146 |
153258 |
ptr += MINBPC(enc); |
|
147 |
✓✓✓✓ ✓✓ |
99204120 |
while (HAS_CHAR(enc, ptr, end)) { |
148 |
✗✗✗✗ ✗✗✓✓ ✓✗✗✗ ✗✗✗✗ ✓✓✓✗ ✗✗✗✗ ✗✗✓✓ |
49603812 |
switch (BYTE_TYPE(enc, ptr)) { |
149 |
INVALID_CASES(ptr, nextTokPtr) |
||
150 |
case BT_MINUS: |
||
151 |
1020402 |
ptr += MINBPC(enc); |
|
152 |
✓✓✓✓ ✓✓ |
1020540 |
REQUIRE_CHAR(enc, ptr, end); |
153 |
✓✓✓✗ ✓✗✓✗ ✓✗ |
1020300 |
if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { |
154 |
150264 |
ptr += MINBPC(enc); |
|
155 |
✓✓✓✓ ✓✓ |
150402 |
REQUIRE_CHAR(enc, ptr, end); |
156 |
✗✓✓✗ ✗✓✓✗ ✗✓ |
150138 |
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
157 |
*nextTokPtr = ptr; |
||
158 |
return XML_TOK_INVALID; |
||
159 |
} |
||
160 |
150126 |
*nextTokPtr = ptr + MINBPC(enc); |
|
161 |
150126 |
return XML_TOK_COMMENT; |
|
162 |
} |
||
163 |
break; |
||
164 |
default: |
||
165 |
48578802 |
ptr += MINBPC(enc); |
|
166 |
48578802 |
break; |
|
167 |
} |
||
168 |
} |
||
169 |
} |
||
170 |
2994 |
return XML_TOK_PARTIAL; |
|
171 |
153396 |
} |
|
172 |
|||
173 |
/* ptr points to character following "<!" */ |
||
174 |
|||
175 |
static int PTRCALL |
||
176 |
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, |
||
177 |
const char *end, const char **nextTokPtr) |
||
178 |
{ |
||
179 |
✓✓✓✓ ✓✓ |
527175 |
REQUIRE_CHAR(enc, ptr, end); |
180 |
✓✓✗✓ ✗✓✗✓ ✓✗✓✗ ✓✗✓✓ ✗✓✗ |
249342 |
switch (BYTE_TYPE(enc, ptr)) { |
181 |
case BT_MINUS: |
||
182 |
122874 |
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
183 |
case BT_LSQB: |
||
184 |
42 |
*nextTokPtr = ptr + MINBPC(enc); |
|
185 |
42 |
return XML_TOK_COND_SECT_OPEN; |
|
186 |
case BT_NMSTRT: |
||
187 |
case BT_HEX: |
||
188 |
122766 |
ptr += MINBPC(enc); |
|
189 |
break; |
||
190 |
default: |
||
191 |
*nextTokPtr = ptr; |
||
192 |
return XML_TOK_INVALID; |
||
193 |
} |
||
194 |
✓✓✓✓ ✓✓ |
1219302 |
while (HAS_CHAR(enc, ptr, end)) { |
195 |
✗✗✗✓ ✗✓✗✓ ✗✗✗✗ ✓✗✓✗ ✓✗✗✗ ✗✓✗✓ ✗ |
539316 |
switch (BYTE_TYPE(enc, ptr)) { |
196 |
case BT_PERCNT: |
||
197 |
REQUIRE_CHARS(enc, ptr, end, 2); |
||
198 |
/* don't allow <!ENTITY% foo "whatever"> */ |
||
199 |
switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { |
||
200 |
case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: |
||
201 |
*nextTokPtr = ptr; |
||
202 |
return XML_TOK_INVALID; |
||
203 |
} |
||
204 |
/* fall through */ |
||
205 |
case BT_S: case BT_CR: case BT_LF: |
||
206 |
43527 |
*nextTokPtr = ptr; |
|
207 |
43527 |
return XML_TOK_DECL_OPEN; |
|
208 |
case BT_NMSTRT: |
||
209 |
case BT_HEX: |
||
210 |
486885 |
ptr += MINBPC(enc); |
|
211 |
break; |
||
212 |
default: |
||
213 |
*nextTokPtr = ptr; |
||
214 |
return XML_TOK_INVALID; |
||
215 |
} |
||
216 |
} |
||
217 |
79239 |
return XML_TOK_PARTIAL; |
|
218 |
257619 |
} |
|
219 |
|||
220 |
static int PTRCALL |
||
221 |
PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr, |
||
222 |
const char *end, int *tokPtr) |
||
223 |
{ |
||
224 |
int upper = 0; |
||
225 |
425388 |
*tokPtr = XML_TOK_PI; |
|
226 |
✓✓✓✓ ✓✓ |
212694 |
if (end - ptr != MINBPC(enc)*3) |
227 |
67914 |
return 1; |
|
228 |
✗✓✓✓ ✗✗✗✓ ✓✗✗✗ ✓ |
168480 |
switch (BYTE_TO_ASCII(enc, ptr)) { |
229 |
case ASCII_x: |
||
230 |
break; |
||
231 |
case ASCII_X: |
||
232 |
upper = 1; |
||
233 |
break; |
||
234 |
default: |
||
235 |
132 |
return 1; |
|
236 |
} |
||
237 |
144648 |
ptr += MINBPC(enc); |
|
238 |
✗✓✓✓ ✗✗✗✓ ✓✗✗✗ ✓ |
168348 |
switch (BYTE_TO_ASCII(enc, ptr)) { |
239 |
case ASCII_m: |
||
240 |
break; |
||
241 |
case ASCII_M: |
||
242 |
upper = 1; |
||
243 |
break; |
||
244 |
default: |
||
245 |
120 |
return 1; |
|
246 |
} |
||
247 |
144528 |
ptr += MINBPC(enc); |
|
248 |
✗✓✓✓ ✗✗✗✓ ✓✗✗✗ ✓ |
168228 |
switch (BYTE_TO_ASCII(enc, ptr)) { |
249 |
case ASCII_l: |
||
250 |
break; |
||
251 |
case ASCII_L: |
||
252 |
upper = 1; |
||
253 |
break; |
||
254 |
default: |
||
255 |
138 |
return 1; |
|
256 |
} |
||
257 |
✗✓✗✓ ✗✓ |
144390 |
if (upper) |
258 |
return 0; |
||
259 |
144390 |
*tokPtr = XML_TOK_XML_DECL; |
|
260 |
144390 |
return 1; |
|
261 |
212694 |
} |
|
262 |
|||
263 |
/* ptr points to character following "<?" */ |
||
264 |
|||
265 |
static int PTRCALL |
||
266 |
PREFIX(scanPi)(const ENCODING *enc, const char *ptr, |
||
267 |
const char *end, const char **nextTokPtr) |
||
268 |
{ |
||
269 |
441420 |
int tok; |
|
270 |
const char *target = ptr; |
||
271 |
✓✓✓✓ ✓✓ |
222732 |
REQUIRE_CHAR(enc, ptr, end); |
272 |
✗✗✓✗ ✗✗✗✓ ✓✓✗✓ ✗✗✗✗ ✓✓✓✗ ✓✗✗✗ ✗ |
244800 |
switch (BYTE_TYPE(enc, ptr)) { |
273 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
218772 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
274 |
default: |
||
275 |
*nextTokPtr = ptr; |
||
276 |
return XML_TOK_INVALID; |
||
277 |
} |
||
278 |
✓✓✓✓ ✓✓ |
2032164 |
while (HAS_CHAR(enc, ptr, end)) { |
279 |
✗✗✗✗ ✗✓✗✗ ✗✗✗✓ ✓✗✓✓ ✓✗✗✗ ✗✓✗✗ ✗✗✗✓ ✓✗✓✓ ✓✗✗✗ ✗✓✗✗ ✗✗✗✓ ✓✗ |
1083600 |
switch (BYTE_TYPE(enc, ptr)) { |
280 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
797454 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
281 |
case BT_S: case BT_CR: case BT_LF: |
||
282 |
✗✓✗✓ ✗✓ |
212634 |
if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
283 |
*nextTokPtr = ptr; |
||
284 |
return XML_TOK_INVALID; |
||
285 |
} |
||
286 |
212634 |
ptr += MINBPC(enc); |
|
287 |
✓✓✓✓ ✓✓ |
105176676 |
while (HAS_CHAR(enc, ptr, end)) { |
288 |
✓✗✗✗ ✗✗✓✓ ✓✗✗✗ ✗✗✗✗ ✓✓✓✗ ✗✗✗✗ ✗✗✓✓ |
52821288 |
switch (BYTE_TYPE(enc, ptr)) { |
289 |
✓✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
66 |
INVALID_CASES(ptr, nextTokPtr) |
290 |
case BT_QUEST: |
||
291 |
63882 |
ptr += MINBPC(enc); |
|
292 |
✓✓✓✓ ✓✓ |
65868 |
REQUIRE_CHAR(enc, ptr, end); |
293 |
✓✗✓✗ ✓✗✓✗ ✓✗ |
62082 |
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
294 |
61896 |
*nextTokPtr = ptr + MINBPC(enc); |
|
295 |
61896 |
return tok; |
|
296 |
} |
||
297 |
break; |
||
298 |
default: |
||
299 |
52375686 |
ptr += MINBPC(enc); |
|
300 |
52375686 |
break; |
|
301 |
} |
||
302 |
} |
||
303 |
148746 |
return XML_TOK_PARTIAL; |
|
304 |
case BT_QUEST: |
||
305 |
✗✓✗✓ ✗✓ |
60 |
if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
306 |
*nextTokPtr = ptr; |
||
307 |
return XML_TOK_INVALID; |
||
308 |
} |
||
309 |
60 |
ptr += MINBPC(enc); |
|
310 |
✓✓✓✓ ✓✓ |
96 |
REQUIRE_CHAR(enc, ptr, end); |
311 |
✓✗✓✗ ✓✗✓✗ ✓✗ |
36 |
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
312 |
24 |
*nextTokPtr = ptr + MINBPC(enc); |
|
313 |
24 |
return tok; |
|
314 |
} |
||
315 |
/* fall through */ |
||
316 |
default: |
||
317 |
*nextTokPtr = ptr; |
||
318 |
return XML_TOK_INVALID; |
||
319 |
} |
||
320 |
} |
||
321 |
5994 |
return XML_TOK_PARTIAL; |
|
322 |
220710 |
} |
|
323 |
|||
324 |
static int PTRCALL |
||
325 |
PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr, |
||
326 |
const char *end, const char **nextTokPtr) |
||
327 |
{ |
||
328 |
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, |
||
329 |
ASCII_T, ASCII_A, ASCII_LSQB }; |
||
330 |
int i; |
||
331 |
/* CDATA[ */ |
||
332 |
✓✓✓✓ ✓✓ |
6780 |
REQUIRE_CHARS(enc, ptr, end, 6); |
333 |
✓✓✓✓ ✓✓ |
2484 |
for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { |
334 |
✓✓✓✗ ✗✓✓✗ ✗✓ |
1512 |
if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { |
335 |
24 |
*nextTokPtr = ptr; |
|
336 |
24 |
return XML_TOK_INVALID; |
|
337 |
} |
||
338 |
} |
||
339 |
162 |
*nextTokPtr = ptr; |
|
340 |
162 |
return XML_TOK_CDATA_SECT_OPEN; |
|
341 |
2322 |
} |
|
342 |
|||
343 |
static int PTRCALL |
||
344 |
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, |
||
345 |
const char *end, const char **nextTokPtr) |
||
346 |
{ |
||
347 |
✓✓✓✓ ✓✓ |
5400 |
if (ptr >= end) |
348 |
1164 |
return XML_TOK_NONE; |
|
349 |
if (MINBPC(enc) > 1) { |
||
350 |
426 |
size_t n = end - ptr; |
|
351 |
✓✓✓✓ |
426 |
if (n & (MINBPC(enc) - 1)) { |
352 |
216 |
n &= ~(MINBPC(enc) - 1); |
|
353 |
✓✓✓✓ |
216 |
if (n == 0) |
354 |
156 |
return XML_TOK_PARTIAL; |
|
355 |
60 |
end = ptr + n; |
|
356 |
60 |
} |
|
357 |
✓✓✓✓ |
270 |
} |
358 |
✓✗✓✓ ✗✗✗✗ ✗✓✓✗ ✓✗✗✗ ✗✗✗✗ ✗✓✓✓ ✓✗✗✗ ✗✓✗✗ ✓✓ |
1920 |
switch (BYTE_TYPE(enc, ptr)) { |
359 |
case BT_RSQB: |
||
360 |
288 |
ptr += MINBPC(enc); |
|
361 |
✓✓✓✓ ✓✓ |
396 |
REQUIRE_CHAR(enc, ptr, end); |
362 |
✓✓✓✗ ✓✗✓✗ ✓✗ |
240 |
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) |
363 |
break; |
||
364 |
162 |
ptr += MINBPC(enc); |
|
365 |
✓✓✓✓ ✓✓ |
252 |
REQUIRE_CHAR(enc, ptr, end); |
366 |
✓✓✓✗ ✗✓✓✗ ✗✓ |
96 |
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
367 |
6 |
ptr -= MINBPC(enc); |
|
368 |
6 |
break; |
|
369 |
} |
||
370 |
66 |
*nextTokPtr = ptr + MINBPC(enc); |
|
371 |
66 |
return XML_TOK_CDATA_SECT_CLOSE; |
|
372 |
case BT_CR: |
||
373 |
ptr += MINBPC(enc); |
||
374 |
REQUIRE_CHAR(enc, ptr, end); |
||
375 |
if (BYTE_TYPE(enc, ptr) == BT_LF) |
||
376 |
ptr += MINBPC(enc); |
||
377 |
*nextTokPtr = ptr; |
||
378 |
return XML_TOK_DATA_NEWLINE; |
||
379 |
case BT_LF: |
||
380 |
6 |
*nextTokPtr = ptr + MINBPC(enc); |
|
381 |
6 |
return XML_TOK_DATA_NEWLINE; |
|
382 |
✓✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✓✓ |
240 |
INVALID_CASES(ptr, nextTokPtr) |
383 |
default: |
||
384 |
972 |
ptr += MINBPC(enc); |
|
385 |
972 |
break; |
|
386 |
} |
||
387 |
✓✓✗✓ ✓✓ |
14556 |
while (HAS_CHAR(enc, ptr, end)) { |
388 |
✓✗✗✗ ✗✗✗✗ ✓✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✓✗ ✗✗✗✗ ✗✗✗✗ ✓✓ |
18744 |
switch (BYTE_TYPE(enc, ptr)) { |
389 |
#define LEAD_CASE(n) \ |
||
390 |
case BT_LEAD ## n: \ |
||
391 |
if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
||
392 |
*nextTokPtr = ptr; \ |
||
393 |
return XML_TOK_DATA_CHARS; \ |
||
394 |
} \ |
||
395 |
ptr += n; \ |
||
396 |
break; |
||
397 |
✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
12 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
398 |
#undef LEAD_CASE |
||
399 |
case BT_NONXML: |
||
400 |
case BT_MALFORM: |
||
401 |
case BT_TRAIL: |
||
402 |
case BT_CR: |
||
403 |
case BT_LF: |
||
404 |
case BT_RSQB: |
||
405 |
12 |
*nextTokPtr = ptr; |
|
406 |
12 |
return XML_TOK_DATA_CHARS; |
|
407 |
default: |
||
408 |
6246 |
ptr += MINBPC(enc); |
|
409 |
6246 |
break; |
|
410 |
} |
||
411 |
} |
||
412 |
1014 |
*nextTokPtr = ptr; |
|
413 |
1014 |
return XML_TOK_DATA_CHARS; |
|
414 |
2700 |
} |
|
415 |
|||
416 |
/* ptr points to character following "</" */ |
||
417 |
|||
418 |
static int PTRCALL |
||
419 |
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, |
||
420 |
const char *end, const char **nextTokPtr) |
||
421 |
{ |
||
422 |
✓✓✓✓ ✓✓ |
19583652 |
REQUIRE_CHAR(enc, ptr, end); |
423 |
✗✗✓✓ ✗✗✗✓ ✗✗✗✓ ✗✗✗✗ ✓✗✗✗ ✓✗✗✗ ✗ |
9790554 |
switch (BYTE_TYPE(enc, ptr)) { |
424 |
✓✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
9789876 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
425 |
default: |
||
426 |
*nextTokPtr = ptr; |
||
427 |
return XML_TOK_INVALID; |
||
428 |
} |
||
429 |
✓✓✓✓ ✓✓ |
166693824 |
while (HAS_CHAR(enc, ptr, end)) { |
430 |
✗✗✗✗ ✗✓✓✗ ✗✗✗✓ ✓✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✓✗ ✓✓✓✗ ✗✗✗✓ ✗✗✗✗ ✗✗✓✓ ✗ |
83309160 |
switch (BYTE_TYPE(enc, ptr)) { |
431 |
✓✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
73556418 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
432 |
case BT_S: case BT_CR: case BT_LF: |
||
433 |
✓✓✗✗ ✗✗ |
24 |
for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
434 |
✓✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗ |
6 |
switch (BYTE_TYPE(enc, ptr)) { |
435 |
case BT_S: case BT_CR: case BT_LF: |
||
436 |
break; |
||
437 |
case BT_GT: |
||
438 |
6 |
*nextTokPtr = ptr + MINBPC(enc); |
|
439 |
6 |
return XML_TOK_END_TAG; |
|
440 |
default: |
||
441 |
*nextTokPtr = ptr; |
||
442 |
return XML_TOK_INVALID; |
||
443 |
} |
||
444 |
} |
||
445 |
6 |
return XML_TOK_PARTIAL; |
|
446 |
#ifdef XML_NS |
||
447 |
case BT_COLON: |
||
448 |
/* no need to check qname syntax here, |
||
449 |
since end-tag must match exactly */ |
||
450 |
1044 |
ptr += MINBPC(enc); |
|
451 |
1044 |
break; |
|
452 |
#endif |
||
453 |
case BT_GT: |
||
454 |
9751320 |
*nextTokPtr = ptr + MINBPC(enc); |
|
455 |
9751320 |
return XML_TOK_END_TAG; |
|
456 |
default: |
||
457 |
*nextTokPtr = ptr; |
||
458 |
return XML_TOK_INVALID; |
||
459 |
} |
||
460 |
} |
||
461 |
38442 |
return XML_TOK_PARTIAL; |
|
462 |
9791154 |
} |
|
463 |
|||
464 |
/* ptr points to character following "&#X" */ |
||
465 |
|||
466 |
static int PTRCALL |
||
467 |
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, |
||
468 |
const char *end, const char **nextTokPtr) |
||
469 |
{ |
||
470 |
✓✓✓✓ ✓✓ |
4416 |
if (HAS_CHAR(enc, ptr, end)) { |
471 |
✓✓✓✗ ✗✓✓✗ ✗✓ |
2340 |
switch (BYTE_TYPE(enc, ptr)) { |
472 |
case BT_DIGIT: |
||
473 |
case BT_HEX: |
||
474 |
break; |
||
475 |
default: |
||
476 |
6 |
*nextTokPtr = ptr; |
|
477 |
6 |
return XML_TOK_INVALID; |
|
478 |
} |
||
479 |
✓✓✓✓ ✓✓ |
11664 |
for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
480 |
✓✗✗✓ ✓✗✓✗ ✗✓✓✗ ✓✗✗✓ |
6096 |
switch (BYTE_TYPE(enc, ptr)) { |
481 |
case BT_DIGIT: |
||
482 |
case BT_HEX: |
||
483 |
break; |
||
484 |
case BT_SEMI: |
||
485 |
1398 |
*nextTokPtr = ptr + MINBPC(enc); |
|
486 |
1398 |
return XML_TOK_CHAR_REF; |
|
487 |
default: |
||
488 |
*nextTokPtr = ptr; |
||
489 |
return XML_TOK_INVALID; |
||
490 |
} |
||
491 |
} |
||
492 |
} |
||
493 |
804 |
return XML_TOK_PARTIAL; |
|
494 |
2208 |
} |
|
495 |
|||
496 |
/* ptr points to character following "&#" */ |
||
497 |
|||
498 |
static int PTRCALL |
||
499 |
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, |
||
500 |
const char *end, const char **nextTokPtr) |
||
501 |
{ |
||
502 |
✓✓✓✓ ✓✓ |
13128 |
if (HAS_CHAR(enc, ptr, end)) { |
503 |
✓✓✓✗ ✓✓✓✗ ✓✓ |
6702 |
if (CHAR_MATCHES(enc, ptr, ASCII_x)) |
504 |
2208 |
return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
505 |
✗✓✓✗ ✗✓✓✗ ✗✓ |
4662 |
switch (BYTE_TYPE(enc, ptr)) { |
506 |
case BT_DIGIT: |
||
507 |
break; |
||
508 |
default: |
||
509 |
*nextTokPtr = ptr; |
||
510 |
return XML_TOK_INVALID; |
||
511 |
} |
||
512 |
✓✓✓✓ ✓✓ |
22356 |
for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
513 |
✓✗✓✓ ✗✓✗✓ ✓✗✓✗ ✓ |
12432 |
switch (BYTE_TYPE(enc, ptr)) { |
514 |
case BT_DIGIT: |
||
515 |
break; |
||
516 |
case BT_SEMI: |
||
517 |
3324 |
*nextTokPtr = ptr + MINBPC(enc); |
|
518 |
3324 |
return XML_TOK_CHAR_REF; |
|
519 |
default: |
||
520 |
*nextTokPtr = ptr; |
||
521 |
return XML_TOK_INVALID; |
||
522 |
} |
||
523 |
} |
||
524 |
} |
||
525 |
1032 |
return XML_TOK_PARTIAL; |
|
526 |
6564 |
} |
|
527 |
|||
528 |
/* ptr points to character following "&" */ |
||
529 |
|||
530 |
static int PTRCALL |
||
531 |
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, |
||
532 |
const char **nextTokPtr) |
||
533 |
{ |
||
534 |
✓✓✓✓ ✓✓ |
6095451 |
REQUIRE_CHAR(enc, ptr, end); |
535 |
✗✗✓✗ ✓✗✓✗ ✓✗✗✗ ✓✗✗✗ ✓✗✓✓ ✓✗✓✗ ✗✗✓✗ |
3045879 |
switch (BYTE_TYPE(enc, ptr)) { |
536 |
✗✗✗✗ ✗✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
3037683 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
537 |
case BT_NUM: |
||
538 |
6564 |
return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
539 |
default: |
||
540 |
*nextTokPtr = ptr; |
||
541 |
return XML_TOK_INVALID; |
||
542 |
} |
||
543 |
✓✓✓✓ ✓✓ |
43880394 |
while (HAS_CHAR(enc, ptr, end)) { |
544 |
✗✗✗✗ ✗✓✗✓ ✗✓✗✓ ✗✗✗✗ ✗✗✓✗ ✗✗✓✗ ✓✓✓✗ ✗✗✗✓ ✗✗✗✓ ✗ |
21905619 |
switch (BYTE_TYPE(enc, ptr)) { |
545 |
✗✗✗✗ ✗✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
18902550 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
546 |
case BT_SEMI: |
||
547 |
3002397 |
*nextTokPtr = ptr + MINBPC(enc); |
|
548 |
3002397 |
return XML_TOK_ENTITY_REF; |
|
549 |
default: |
||
550 |
*nextTokPtr = ptr; |
||
551 |
return XML_TOK_INVALID; |
||
552 |
} |
||
553 |
} |
||
554 |
35268 |
return XML_TOK_PARTIAL; |
|
555 |
3046560 |
} |
|
556 |
|||
557 |
/* ptr points to character following first character of attribute name */ |
||
558 |
|||
559 |
static int PTRCALL |
||
560 |
PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, |
||
561 |
const char **nextTokPtr) |
||
562 |
{ |
||
563 |
#ifdef XML_NS |
||
564 |
int hadColon = 0; |
||
565 |
#endif |
||
566 |
✓✓✓✓ ✓✓ |
1323025404 |
while (HAS_CHAR(enc, ptr, end)) { |
567 |
✗✗✗✗ ✗✓✗✗ ✗✓✗✗ ✓✓✗✓ ✓✓✗✗ ✗✗✓✗ ✗✗✓✗ ✗✗✓✗ ✓✗✗✗ ✗✗✗✓ ✗✗✗✓ ✗✗✗✓ ✗ |
660578256 |
switch (BYTE_TYPE(enc, ptr)) { |
568 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
654638730 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
569 |
#ifdef XML_NS |
||
570 |
case BT_COLON: |
||
571 |
✓✓✗✓ ✗✓ |
1742502 |
if (hadColon) { |
572 |
6 |
*nextTokPtr = ptr; |
|
573 |
6 |
return XML_TOK_INVALID; |
|
574 |
} |
||
575 |
hadColon = 1; |
||
576 |
1742496 |
ptr += MINBPC(enc); |
|
577 |
✓✓✓✓ ✓✓ |
1744944 |
REQUIRE_CHAR(enc, ptr, end); |
578 |
✗✗✓✗ ✗✗✓✓ ✓✓✗✓ ✗✗✗✗ ✓✗✗✗ ✓✗✗✗ ✗ |
1741170 |
switch (BYTE_TYPE(enc, ptr)) { |
579 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
1740132 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
580 |
default: |
||
581 |
6 |
*nextTokPtr = ptr; |
|
582 |
6 |
return XML_TOK_INVALID; |
|
583 |
} |
||
584 |
516 |
break; |
|
585 |
#endif |
||
586 |
case BT_S: case BT_CR: case BT_LF: |
||
587 |
1380000 |
for (;;) { |
|
588 |
int t; |
||
589 |
|||
590 |
1380000 |
ptr += MINBPC(enc); |
|
591 |
✗✓✗✗ ✗✗ |
1380000 |
REQUIRE_CHAR(enc, ptr, end); |
592 |
✗✗✗✗ |
1380000 |
t = BYTE_TYPE(enc, ptr); |
593 |
✓✓✗✗ ✗✗ |
1380000 |
if (t == BT_EQUALS) |
594 |
1350000 |
break; |
|
595 |
✗✗✗✓ ✗✗✗✗ ✗✗✗✗ |
30000 |
switch (t) { |
596 |
case BT_S: |
||
597 |
case BT_LF: |
||
598 |
case BT_CR: |
||
599 |
break; |
||
600 |
default: |
||
601 |
*nextTokPtr = ptr; |
||
602 |
return XML_TOK_INVALID; |
||
603 |
} |
||
604 |
✓✓✗✗ ✗✗✗✗ ✗ |
30000 |
} |
605 |
/* fall through */ |
||
606 |
case BT_EQUALS: |
||
607 |
{ |
||
608 |
int open; |
||
609 |
#ifdef XML_NS |
||
610 |
hadColon = 0; |
||
611 |
#endif |
||
612 |
5808288 |
for (;;) { |
|
613 |
5808288 |
ptr += MINBPC(enc); |
|
614 |
✓✓✓✓ ✓✓ |
5812368 |
REQUIRE_CHAR(enc, ptr, end); |
615 |
✓✗✓✗ |
5806464 |
open = BYTE_TYPE(enc, ptr); |
616 |
✓✓✗✓ ✗✓ |
5804208 |
if (open == BT_QUOT || open == BT_APOS) |
617 |
break; |
||
618 |
✗✗✗✓ ✗✗✗✗ ✗✗✗✗ |
1620000 |
switch (open) { |
619 |
case BT_S: |
||
620 |
case BT_LF: |
||
621 |
case BT_CR: |
||
622 |
break; |
||
623 |
default: |
||
624 |
*nextTokPtr = ptr; |
||
625 |
return XML_TOK_INVALID; |
||
626 |
} |
||
627 |
} |
||
628 |
4184208 |
ptr += MINBPC(enc); |
|
629 |
/* in attribute value */ |
||
630 |
436903974 |
for (;;) { |
|
631 |
int t; |
||
632 |
✓✓✓✓ ✓✓ |
437463834 |
REQUIRE_CHAR(enc, ptr, end); |
633 |
✓✗✓✗ |
436349106 |
t = BYTE_TYPE(enc, ptr); |
634 |
✓✓✓✓ ✓✓ |
436344114 |
if (t == open) |
635 |
3622632 |
break; |
|
636 |
✓✗✗✗ ✗✗✓✗ ✓✗✗✗ ✗✗✗✓ ✗✓✗✗ ✗✗✗✗ ✓✗✓ |
432721482 |
switch (t) { |
637 |
✓✓✓✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
30 |
INVALID_CASES(ptr, nextTokPtr) |
638 |
case BT_AMP: |
||
639 |
{ |
||
640 |
6510 |
int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); |
|
641 |
✓✓✓✓ ✓✓ |
6510 |
if (tok <= 0) { |
642 |
✗✓✗✓ ✗✓ |
1704 |
if (tok == XML_TOK_INVALID) |
643 |
*nextTokPtr = ptr; |
||
644 |
1704 |
return tok; |
|
645 |
} |
||
646 |
✓✓✓✓ ✓✓ |
4806 |
break; |
647 |
} |
||
648 |
case BT_LT: |
||
649 |
*nextTokPtr = ptr; |
||
650 |
return XML_TOK_INVALID; |
||
651 |
default: |
||
652 |
432714960 |
ptr += MINBPC(enc); |
|
653 |
432714960 |
break; |
|
654 |
} |
||
655 |
✓✓✓✓ ✓✓✓✓ ✓ |
432719766 |
} |
656 |
3622632 |
ptr += MINBPC(enc); |
|
657 |
✓✓✓✓ ✓✓ |
3626700 |
REQUIRE_CHAR(enc, ptr, end); |
658 |
✗✗✗✓ ✓✓✓✗ ✗✗✗✓ ✓✓✓✗ ✗✗✗✗ ✓✓ |
3619236 |
switch (BYTE_TYPE(enc, ptr)) { |
659 |
case BT_S: |
||
660 |
case BT_CR: |
||
661 |
case BT_LF: |
||
662 |
break; |
||
663 |
case BT_SOL: |
||
664 |
goto sol; |
||
665 |
case BT_GT: |
||
666 |
goto gt; |
||
667 |
default: |
||
668 |
*nextTokPtr = ptr; |
||
669 |
return XML_TOK_INVALID; |
||
670 |
} |
||
671 |
/* ptr points to closing quote */ |
||
672 |
1485906 |
for (;;) { |
|
673 |
1668498 |
ptr += MINBPC(enc); |
|
674 |
✓✓✓✓ ✗✗ |
1671516 |
REQUIRE_CHAR(enc, ptr, end); |
675 |
✗✗✓✗ ✗✗✗✗ ✓✓✓✗ ✓✓✓✗ ✓✗✗✗ ✗✗✗✗ ✓✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
1666014 |
switch (BYTE_TYPE(enc, ptr)) { |
676 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
1482792 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
677 |
case BT_S: case BT_CR: case BT_LF: |
||
678 |
182592 |
continue; |
|
679 |
case BT_GT: |
||
680 |
gt: |
||
681 |
2131968 |
*nextTokPtr = ptr + MINBPC(enc); |
|
682 |
2131968 |
return XML_TOK_START_TAG_WITH_ATTS; |
|
683 |
case BT_SOL: |
||
684 |
sol: |
||
685 |
876 |
ptr += MINBPC(enc); |
|
686 |
✓✓✓✓ ✓✓ |
1326 |
REQUIRE_CHAR(enc, ptr, end); |
687 |
✓✓✓✗ ✗✓✓✗ ✗✓ |
450 |
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
688 |
6 |
*nextTokPtr = ptr; |
|
689 |
6 |
return XML_TOK_INVALID; |
|
690 |
} |
||
691 |
420 |
*nextTokPtr = ptr + MINBPC(enc); |
|
692 |
420 |
return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; |
|
693 |
default: |
||
694 |
*nextTokPtr = ptr; |
||
695 |
return XML_TOK_INVALID; |
||
696 |
} |
||
697 |
break; |
||
698 |
} |
||
699 |
✓✓ | 1482702 |
break; |
700 |
} |
||
701 |
default: |
||
702 |
*nextTokPtr = ptr; |
||
703 |
return XML_TOK_INVALID; |
||
704 |
} |
||
705 |
} |
||
706 |
943338 |
return XML_TOK_PARTIAL; |
|
707 |
3651384 |
} |
|
708 |
|||
709 |
/* ptr points to character following "<" */ |
||
710 |
|||
711 |
static int PTRCALL |
||
712 |
PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, |
||
713 |
const char **nextTokPtr) |
||
714 |
{ |
||
715 |
#ifdef XML_NS |
||
716 |
int hadColon; |
||
717 |
#endif |
||
718 |
✓✓✓✓ ✓✓ |
43479654 |
REQUIRE_CHAR(enc, ptr, end); |
719 |
✗✗✓✓ ✓✗✓✓ ✓✗✓✗ ✗✗✓✗ ✗✗✓✗ ✓✗✓✗ ✗✗✓✗ ✗✗✓✗ ✓✗ |
21746130 |
switch (BYTE_TYPE(enc, ptr)) { |
720 |
✓✓✓✓ ✓✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
11909538 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
721 |
case BT_EXCL: |
||
722 |
33336 |
ptr += MINBPC(enc); |
|
723 |
✓✓✓✓ ✓✓ |
33822 |
REQUIRE_CHAR(enc, ptr, end); |
724 |
✓✓✓✓ ✗✗✓✗ ✓✗✗✓ ✗ |
35514 |
switch (BYTE_TYPE(enc, ptr)) { |
725 |
case BT_MINUS: |
||
726 |
30522 |
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
727 |
case BT_LSQB: |
||
728 |
2322 |
return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), |
|
729 |
end, nextTokPtr); |
||
730 |
} |
||
731 |
6 |
*nextTokPtr = ptr; |
|
732 |
6 |
return XML_TOK_INVALID; |
|
733 |
case BT_QUEST: |
||
734 |
2178 |
return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
735 |
case BT_SOL: |
||
736 |
9791154 |
return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
737 |
default: |
||
738 |
*nextTokPtr = ptr; |
||
739 |
return XML_TOK_INVALID; |
||
740 |
} |
||
741 |
#ifdef XML_NS |
||
742 |
hadColon = 0; |
||
743 |
#endif |
||
744 |
/* we have a start-tag */ |
||
745 |
✓✓✓✓ ✓✓ |
1804651014 |
while (HAS_CHAR(enc, ptr, end)) { |
746 |
✗✗✗✗ ✗✓✓✗ ✓✓✗✗ ✓✓✓✗ ✓✗✗✗ ✗✗✗✓ ✗✗✗✓ ✗✗✓✓ ✓✗✓✗ ✗✗✗✗ ✗✓✗✗ ✗✓✗✗ ✓✓✓✗ |
901704402 |
switch (BYTE_TYPE(enc, ptr)) { |
747 |
✓✓✓✓ ✗✗✗✗ ✓✓✓✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
889226511 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
748 |
#ifdef XML_NS |
||
749 |
case BT_COLON: |
||
750 |
✓✓✗✓ ✗✓ |
1191432 |
if (hadColon) { |
751 |
6 |
*nextTokPtr = ptr; |
|
752 |
6 |
return XML_TOK_INVALID; |
|
753 |
} |
||
754 |
hadColon = 1; |
||
755 |
1191426 |
ptr += MINBPC(enc); |
|
756 |
✓✓✓✓ ✓✓ |
1192842 |
REQUIRE_CHAR(enc, ptr, end); |
757 |
✗✗✓✗ ✗✗✓✓ ✗✗✗✓ ✗✗✗✗ ✗✓✓✗ ✓✗✗✗ ✗ |
1191846 |
switch (BYTE_TYPE(enc, ptr)) { |
758 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
1190412 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
759 |
default: |
||
760 |
6 |
*nextTokPtr = ptr; |
|
761 |
6 |
return XML_TOK_INVALID; |
|
762 |
} |
||
763 |
714 |
break; |
|
764 |
#endif |
||
765 |
case BT_S: case BT_CR: case BT_LF: |
||
766 |
{ |
||
767 |
3654048 |
ptr += MINBPC(enc); |
|
768 |
✓✓✓✓ ✓✓ |
7736154 |
while (HAS_CHAR(enc, ptr, end)) { |
769 |
✗✗✓✗ ✗✗✗✗ ✓✗✓✓ ✓✓✓✗ ✓✗✗✗ ✗✗✗✗ ✗✗✓✗ ✗✗✓✗ ✗✗✗✗ ✗✗✗✗ |
4082466 |
switch (BYTE_TYPE(enc, ptr)) { |
770 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
3651474 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
771 |
case BT_GT: |
||
772 |
goto gt; |
||
773 |
case BT_SOL: |
||
774 |
goto sol; |
||
775 |
case BT_S: case BT_CR: case BT_LF: |
||
776 |
428058 |
ptr += MINBPC(enc); |
|
777 |
428058 |
continue; |
|
778 |
default: |
||
779 |
*nextTokPtr = ptr; |
||
780 |
return XML_TOK_INVALID; |
||
781 |
} |
||
782 |
3651384 |
return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); |
|
783 |
} |
||
784 |
2634 |
return XML_TOK_PARTIAL; |
|
785 |
} |
||
786 |
case BT_GT: |
||
787 |
gt: |
||
788 |
7623849 |
*nextTokPtr = ptr + MINBPC(enc); |
|
789 |
7623849 |
return XML_TOK_START_TAG_NO_ATTS; |
|
790 |
case BT_SOL: |
||
791 |
sol: |
||
792 |
1494 |
ptr += MINBPC(enc); |
|
793 |
✓✓✓✓ ✓✓ |
2166 |
REQUIRE_CHAR(enc, ptr, end); |
794 |
✗✓✓✗ ✗✓✓✗ ✗✓ |
870 |
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
795 |
*nextTokPtr = ptr; |
||
796 |
return XML_TOK_INVALID; |
||
797 |
} |
||
798 |
822 |
*nextTokPtr = ptr + MINBPC(enc); |
|
799 |
822 |
return XML_TOK_EMPTY_ELEMENT_NO_ATTS; |
|
800 |
default: |
||
801 |
*nextTokPtr = ptr; |
||
802 |
return XML_TOK_INVALID; |
||
803 |
} |
||
804 |
} |
||
805 |
628557 |
return XML_TOK_PARTIAL; |
|
806 |
21738588 |
} |
|
807 |
|||
808 |
static int PTRCALL |
||
809 |
PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, |
||
810 |
const char **nextTokPtr) |
||
811 |
{ |
||
812 |
✓✓✓✓ ✓✓ |
153235746 |
if (ptr >= end) |
813 |
16293 |
return XML_TOK_NONE; |
|
814 |
if (MINBPC(enc) > 1) { |
||
815 |
6414 |
size_t n = end - ptr; |
|
816 |
✓✓✓✓ |
6414 |
if (n & (MINBPC(enc) - 1)) { |
817 |
3048 |
n &= ~(MINBPC(enc) - 1); |
|
818 |
✓✓✓✓ |
3048 |
if (n == 0) |
819 |
432 |
return XML_TOK_PARTIAL; |
|
820 |
2616 |
end = ptr + n; |
|
821 |
2616 |
} |
|
822 |
✓✓✓✓ |
5982 |
} |
823 |
✓✓✓✓ ✓✓✓✓ ✗✗✓✓ ✓✓✓✓ ✗✗✓✗ ✗✗✗✗ ✗✓✓✓ ✓✓✗✗ ✗✗✗✗ ✗✗✗✓ |
76613112 |
switch (BYTE_TYPE(enc, ptr)) { |
824 |
case BT_LT: |
||
825 |
21738588 |
return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
826 |
case BT_AMP: |
||
827 |
3039708 |
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
828 |
case BT_CR: |
||
829 |
60 |
ptr += MINBPC(enc); |
|
830 |
✓✓✗✗ ✗✗ |
60 |
if (! HAS_CHAR(enc, ptr, end)) |
831 |
48 |
return XML_TOK_TRAILING_CR; |
|
832 |
✓✓✗✗ ✗✗✗✗ ✗✗ |
12 |
if (BYTE_TYPE(enc, ptr) == BT_LF) |
833 |
6 |
ptr += MINBPC(enc); |
|
834 |
12 |
*nextTokPtr = ptr; |
|
835 |
12 |
return XML_TOK_DATA_NEWLINE; |
|
836 |
case BT_LF: |
||
837 |
21391074 |
*nextTokPtr = ptr + MINBPC(enc); |
|
838 |
21391074 |
return XML_TOK_DATA_NEWLINE; |
|
839 |
case BT_RSQB: |
||
840 |
48 |
ptr += MINBPC(enc); |
|
841 |
✓✓✓✗ ✗✗ |
48 |
if (! HAS_CHAR(enc, ptr, end)) |
842 |
36 |
return XML_TOK_TRAILING_RSQB; |
|
843 |
✗✓✗✗ ✗✗✗✗ ✗✗ |
12 |
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) |
844 |
break; |
||
845 |
ptr += MINBPC(enc); |
||
846 |
if (! HAS_CHAR(enc, ptr, end)) |
||
847 |
return XML_TOK_TRAILING_RSQB; |
||
848 |
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
||
849 |
ptr -= MINBPC(enc); |
||
850 |
break; |
||
851 |
} |
||
852 |
*nextTokPtr = ptr; |
||
853 |
return XML_TOK_INVALID; |
||
854 |
✓✓✓✓ ✓✓✓✓ ✓✓✓✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
2670 |
INVALID_CASES(ptr, nextTokPtr) |
855 |
default: |
||
856 |
30430284 |
ptr += MINBPC(enc); |
|
857 |
30430284 |
break; |
|
858 |
} |
||
859 |
✓✓✗✓ ✗✓ |
973174476 |
while (HAS_CHAR(enc, ptr, end)) { |
860 |
✗✗✗✓ ✗✗✗✗ ✗✗✓✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
486576990 |
switch (BYTE_TYPE(enc, ptr)) { |
861 |
#define LEAD_CASE(n) \ |
||
862 |
case BT_LEAD ## n: \ |
||
863 |
if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
||
864 |
*nextTokPtr = ptr; \ |
||
865 |
return XML_TOK_DATA_CHARS; \ |
||
866 |
} \ |
||
867 |
ptr += n; \ |
||
868 |
break; |
||
869 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
||
870 |
#undef LEAD_CASE |
||
871 |
case BT_RSQB: |
||
872 |
✓✗✗✗ ✗✗ |
180000 |
if (HAS_CHARS(enc, ptr, end, 2)) { |
873 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
180000 |
if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { |
874 |
ptr += MINBPC(enc); |
||
875 |
180000 |
break; |
|
876 |
} |
||
877 |
if (HAS_CHARS(enc, ptr, end, 3)) { |
||
878 |
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { |
||
879 |
ptr += MINBPC(enc); |
||
880 |
break; |
||
881 |
} |
||
882 |
*nextTokPtr = ptr + 2*MINBPC(enc); |
||
883 |
return XML_TOK_INVALID; |
||
884 |
} |
||
885 |
} |
||
886 |
/* fall through */ |
||
887 |
case BT_AMP: |
||
888 |
case BT_LT: |
||
889 |
case BT_NONXML: |
||
890 |
case BT_MALFORM: |
||
891 |
case BT_TRAIL: |
||
892 |
case BT_CR: |
||
893 |
case BT_LF: |
||
894 |
30420096 |
*nextTokPtr = ptr; |
|
895 |
30420096 |
return XML_TOK_DATA_CHARS; |
|
896 |
default: |
||
897 |
455976894 |
ptr += MINBPC(enc); |
|
898 |
455976894 |
break; |
|
899 |
} |
||
900 |
} |
||
901 |
10248 |
*nextTokPtr = ptr; |
|
902 |
10248 |
return XML_TOK_DATA_CHARS; |
|
903 |
76617873 |
} |
|
904 |
|||
905 |
/* ptr points to character following "%" */ |
||
906 |
|||
907 |
static int PTRCALL |
||
908 |
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, |
||
909 |
const char **nextTokPtr) |
||
910 |
{ |
||
911 |
✓✓✓✓ ✓✓ |
30450 |
REQUIRE_CHAR(enc, ptr, end); |
912 |
✗✗✓✗ ✗✗✗✗ ✗✓✗✓ ✗✗✗✓ ✗✗✗✗ ✗✗✓✗ ✓✓✓✗ ✓✗✗✗ ✗✗✗✓ ✗ |
14118 |
switch (BYTE_TYPE(enc, ptr)) { |
913 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
13374 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
914 |
case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: |
||
915 |
576 |
*nextTokPtr = ptr; |
|
916 |
576 |
return XML_TOK_PERCENT; |
|
917 |
default: |
||
918 |
*nextTokPtr = ptr; |
||
919 |
return XML_TOK_INVALID; |
||
920 |
} |
||
921 |
✓✓✓✓ ✓✓ |
12623736 |
while (HAS_CHAR(enc, ptr, end)) { |
922 |
✗✗✗✗ ✗✓✗✗ ✗✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✓✗ ✓✓✓✗ ✗✗✗✓ ✗✗✗✓ ✗ |
6299100 |
switch (BYTE_TYPE(enc, ptr)) { |
923 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
6298542 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
924 |
case BT_SEMI: |
||
925 |
486 |
*nextTokPtr = ptr + MINBPC(enc); |
|
926 |
486 |
return XML_TOK_PARAM_ENTITY_REF; |
|
927 |
default: |
||
928 |
*nextTokPtr = ptr; |
||
929 |
return XML_TOK_INVALID; |
||
930 |
} |
||
931 |
} |
||
932 |
12858 |
return XML_TOK_PARTIAL; |
|
933 |
14790 |
} |
|
934 |
|||
935 |
static int PTRCALL |
||
936 |
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, |
||
937 |
const char **nextTokPtr) |
||
938 |
{ |
||
939 |
✓✓✗✗ ✓✓ |
20430 |
REQUIRE_CHAR(enc, ptr, end); |
940 |
✗✗✓✗ ✗✗✓✗ ✗✗✗✗ ✗✗✗✗ ✗✓✓✗ ✓✗✗✗ ✗ |
8712 |
switch (BYTE_TYPE(enc, ptr)) { |
941 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
8646 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
942 |
default: |
||
943 |
6 |
*nextTokPtr = ptr; |
|
944 |
6 |
return XML_TOK_INVALID; |
|
945 |
} |
||
946 |
✓✓✗✗ ✓✓ |
77844 |
while (HAS_CHAR(enc, ptr, end)) { |
947 |
✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✓✓✓✗ ✗✗✗✓ ✗✗✗✗ ✗✗✗✗ ✗✓✗ |
31434 |
switch (BYTE_TYPE(enc, ptr)) { |
948 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
30324 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
949 |
case BT_CR: case BT_LF: case BT_S: |
||
950 |
case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: |
||
951 |
1062 |
*nextTokPtr = ptr; |
|
952 |
1062 |
return XML_TOK_POUND_NAME; |
|
953 |
default: |
||
954 |
*nextTokPtr = ptr; |
||
955 |
return XML_TOK_INVALID; |
||
956 |
} |
||
957 |
} |
||
958 |
7554 |
return -XML_TOK_POUND_NAME; |
|
959 |
9684 |
} |
|
960 |
|||
961 |
static int PTRCALL |
||
962 |
PREFIX(scanLit)(int open, const ENCODING *enc, |
||
963 |
const char *ptr, const char *end, |
||
964 |
const char **nextTokPtr) |
||
965 |
{ |
||
966 |
✓✓✓✓ ✓✓ |
1101006123 |
while (HAS_CHAR(enc, ptr, end)) { |
967 |
✓✗✓✓ |
548802300 |
int t = BYTE_TYPE(enc, ptr); |
968 |
✓✗✗✗ ✗✗✗✓ ✓✗✗✗ ✗✗✗✗ ✓✓✗✗ ✗✗✗✗ ✗✓✓ |
548790228 |
switch (t) { |
969 |
✓✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
66 |
INVALID_CASES(ptr, nextTokPtr) |
970 |
case BT_QUOT: |
||
971 |
case BT_APOS: |
||
972 |
97122 |
ptr += MINBPC(enc); |
|
973 |
✓✓✓✗ ✓✗ |
97122 |
if (t != open) |
974 |
break; |
||
975 |
✓✓✓✓ ✓✓ |
72648 |
if (! HAS_CHAR(enc, ptr, end)) |
976 |
6330 |
return -XML_TOK_LITERAL; |
|
977 |
66318 |
*nextTokPtr = ptr; |
|
978 |
✗✗✗✗ ✗✓✗✓ ✗✗✗✗ ✗✗✓✗ ✓✗✗✗ ✗✗✗✓ ✗ |
66450 |
switch (BYTE_TYPE(enc, ptr)) { |
979 |
case BT_S: case BT_CR: case BT_LF: |
||
980 |
case BT_GT: case BT_PERCNT: case BT_LSQB: |
||
981 |
66318 |
return XML_TOK_LITERAL; |
|
982 |
default: |
||
983 |
return XML_TOK_INVALID; |
||
984 |
} |
||
985 |
default: |
||
986 |
548693082 |
ptr += MINBPC(enc); |
|
987 |
548693082 |
break; |
|
988 |
} |
||
989 |
✓✓✓✓ ✓✓ |
548717574 |
} |
990 |
1117671 |
return XML_TOK_PARTIAL; |
|
991 |
1190325 |
} |
|
992 |
|||
993 |
static int PTRCALL |
||
994 |
PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, |
||
995 |
const char **nextTokPtr) |
||
996 |
{ |
||
997 |
int tok; |
||
998 |
✓✓✓✓ ✓✓ |
6104658 |
if (ptr >= end) |
999 |
111591 |
return XML_TOK_NONE; |
|
1000 |
if (MINBPC(enc) > 1) { |
||
1001 |
21228 |
size_t n = end - ptr; |
|
1002 |
✓✓✓✓ |
21228 |
if (n & (MINBPC(enc) - 1)) { |
1003 |
10260 |
n &= ~(MINBPC(enc) - 1); |
|
1004 |
✓✓✓✓ |
10260 |
if (n == 0) |
1005 |
990 |
return XML_TOK_PARTIAL; |
|
1006 |
9270 |
end = ptr + n; |
|
1007 |
9270 |
} |
|
1008 |
✓✓✓✓ |
20238 |
} |
1009 |
✓✓✓✓ ✗✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✗✗✓ ✗✗✗✓ ✓✓✗✗ ✓✓✓✗ ✓✓✗✓ ✓✗✗✗ ✓✗✗✗ ✗✗✓✗ ✗✗✗✗ ✗✓✓✗ ✓✓✗✗ ✓✓✗✓ ✓✗✗✗ ✓✓✗✗ ✗✗✓✗ ✗✗✗✓ ✗ |
2980224 |
switch (BYTE_TYPE(enc, ptr)) { |
1010 |
case BT_QUOT: |
||
1011 |
60024 |
return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1012 |
case BT_APOS: |
||
1013 |
1130301 |
return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1014 |
case BT_LT: |
||
1015 |
{ |
||
1016 |
523455 |
ptr += MINBPC(enc); |
|
1017 |
✓✓✓✓ ✓✓ |
534648 |
REQUIRE_CHAR(enc, ptr, end); |
1018 |
✓✓✗✗ ✗✗✗✓ ✓✓✗✓ ✓✗✗✗ ✗✗✓✗ ✓✗✓✓ ✗✗✗✗ ✗✓✗ |
543870 |
switch (BYTE_TYPE(enc, ptr)) { |
1019 |
case BT_EXCL: |
||
1020 |
257619 |
return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1021 |
case BT_QUEST: |
||
1022 |
218532 |
return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1023 |
case BT_NMSTRT: |
||
1024 |
case BT_HEX: |
||
1025 |
case BT_NONASCII: |
||
1026 |
case BT_LEAD2: |
||
1027 |
case BT_LEAD3: |
||
1028 |
case BT_LEAD4: |
||
1029 |
36099 |
*nextTokPtr = ptr - MINBPC(enc); |
|
1030 |
36099 |
return XML_TOK_INSTANCE_START; |
|
1031 |
} |
||
1032 |
12 |
*nextTokPtr = ptr; |
|
1033 |
12 |
return XML_TOK_INVALID; |
|
1034 |
} |
||
1035 |
case BT_CR: |
||
1036 |
✓✗✓✗ ✗✗ |
18 |
if (ptr + MINBPC(enc) == end) { |
1037 |
18 |
*nextTokPtr = end; |
|
1038 |
/* indicate that this might be part of a CR/LF pair */ |
||
1039 |
18 |
return -XML_TOK_PROLOG_S; |
|
1040 |
} |
||
1041 |
/* fall through */ |
||
1042 |
case BT_S: case BT_LF: |
||
1043 |
1468887 |
for (;;) { |
|
1044 |
1468887 |
ptr += MINBPC(enc); |
|
1045 |
✓✓✓✓ ✗✓ |
1468887 |
if (! HAS_CHAR(enc, ptr, end)) |
1046 |
break; |
||
1047 |
✗✓✗✓ ✓✗✗✓ ✗✗✗✗ ✗✗✗✗ |
1385496 |
switch (BYTE_TYPE(enc, ptr)) { |
1048 |
case BT_S: case BT_LF: |
||
1049 |
break; |
||
1050 |
case BT_CR: |
||
1051 |
/* don't split CR/LF pair */ |
||
1052 |
if (ptr + MINBPC(enc) != end) |
||
1053 |
break; |
||
1054 |
/* fall through */ |
||
1055 |
default: |
||
1056 |
335460 |
*nextTokPtr = ptr; |
|
1057 |
335460 |
return XML_TOK_PROLOG_S; |
|
1058 |
} |
||
1059 |
} |
||
1060 |
83403 |
*nextTokPtr = ptr; |
|
1061 |
83403 |
return XML_TOK_PROLOG_S; |
|
1062 |
case BT_PERCNT: |
||
1063 |
14580 |
return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1064 |
case BT_COMMA: |
||
1065 |
1956 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1066 |
1956 |
return XML_TOK_COMMA; |
|
1067 |
case BT_LSQB: |
||
1068 |
3423 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1069 |
3423 |
return XML_TOK_OPEN_BRACKET; |
|
1070 |
case BT_RSQB: |
||
1071 |
4794 |
ptr += MINBPC(enc); |
|
1072 |
✓✓✓✓ ✓✓ |
4794 |
if (! HAS_CHAR(enc, ptr, end)) |
1073 |
2397 |
return -XML_TOK_CLOSE_BRACKET; |
|
1074 |
✗✓✓✗ ✗✓✓✗ ✗✓ |
2427 |
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { |
1075 |
REQUIRE_CHARS(enc, ptr, end, 2); |
||
1076 |
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { |
||
1077 |
*nextTokPtr = ptr + 2*MINBPC(enc); |
||
1078 |
return XML_TOK_COND_SECT_CLOSE; |
||
1079 |
} |
||
1080 |
} |
||
1081 |
2397 |
*nextTokPtr = ptr; |
|
1082 |
2397 |
return XML_TOK_CLOSE_BRACKET; |
|
1083 |
case BT_LPAR: |
||
1084 |
2934 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1085 |
2934 |
return XML_TOK_OPEN_PAREN; |
|
1086 |
case BT_RPAR: |
||
1087 |
4284 |
ptr += MINBPC(enc); |
|
1088 |
✓✓✗✗ ✗✗ |
4284 |
if (! HAS_CHAR(enc, ptr, end)) |
1089 |
2136 |
return -XML_TOK_CLOSE_PAREN; |
|
1090 |
✓✗✓✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗ |
2148 |
switch (BYTE_TYPE(enc, ptr)) { |
1091 |
case BT_AST: |
||
1092 |
210 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1093 |
210 |
return XML_TOK_CLOSE_PAREN_ASTERISK; |
|
1094 |
case BT_QUEST: |
||
1095 |
*nextTokPtr = ptr + MINBPC(enc); |
||
1096 |
return XML_TOK_CLOSE_PAREN_QUESTION; |
||
1097 |
case BT_PLUS: |
||
1098 |
42 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1099 |
42 |
return XML_TOK_CLOSE_PAREN_PLUS; |
|
1100 |
case BT_CR: case BT_LF: case BT_S: |
||
1101 |
case BT_GT: case BT_COMMA: case BT_VERBAR: |
||
1102 |
case BT_RPAR: |
||
1103 |
1896 |
*nextTokPtr = ptr; |
|
1104 |
1896 |
return XML_TOK_CLOSE_PAREN; |
|
1105 |
} |
||
1106 |
*nextTokPtr = ptr; |
||
1107 |
return XML_TOK_INVALID; |
||
1108 |
case BT_VERBAR: |
||
1109 |
5430 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1110 |
5430 |
return XML_TOK_OR; |
|
1111 |
case BT_GT: |
||
1112 |
39399 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1113 |
39399 |
return XML_TOK_DECL_CLOSE; |
|
1114 |
case BT_NUM: |
||
1115 |
9684 |
return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1116 |
#define LEAD_CASE(n) \ |
||
1117 |
case BT_LEAD ## n: \ |
||
1118 |
if (end - ptr < n) \ |
||
1119 |
return XML_TOK_PARTIAL_CHAR; \ |
||
1120 |
if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
||
1121 |
ptr += n; \ |
||
1122 |
tok = XML_TOK_NAME; \ |
||
1123 |
break; \ |
||
1124 |
} \ |
||
1125 |
if (IS_NAME_CHAR(enc, ptr, n)) { \ |
||
1126 |
ptr += n; \ |
||
1127 |
tok = XML_TOK_NMTOKEN; \ |
||
1128 |
break; \ |
||
1129 |
} \ |
||
1130 |
*nextTokPtr = ptr; \ |
||
1131 |
return XML_TOK_INVALID; |
||
1132 |
✓✓✓✗ ✗✗✓✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗ |
180 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
1133 |
#undef LEAD_CASE |
||
1134 |
case BT_NMSTRT: |
||
1135 |
case BT_HEX: |
||
1136 |
tok = XML_TOK_NAME; |
||
1137 |
720405 |
ptr += MINBPC(enc); |
|
1138 |
720405 |
break; |
|
1139 |
case BT_DIGIT: |
||
1140 |
case BT_NAME: |
||
1141 |
case BT_MINUS: |
||
1142 |
#ifdef XML_NS |
||
1143 |
case BT_COLON: |
||
1144 |
#endif |
||
1145 |
tok = XML_TOK_NMTOKEN; |
||
1146 |
36 |
ptr += MINBPC(enc); |
|
1147 |
36 |
break; |
|
1148 |
case BT_NONASCII: |
||
1149 |
✗✗✓✓ |
48 |
if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { |
1150 |
30 |
ptr += MINBPC(enc); |
|
1151 |
tok = XML_TOK_NAME; |
||
1152 |
30 |
break; |
|
1153 |
} |
||
1154 |
✗✗✓✗ |
18 |
if (IS_NAME_CHAR_MINBPC(enc, ptr)) { |
1155 |
18 |
ptr += MINBPC(enc); |
|
1156 |
tok = XML_TOK_NMTOKEN; |
||
1157 |
18 |
break; |
|
1158 |
} |
||
1159 |
/* fall through */ |
||
1160 |
default: |
||
1161 |
30 |
*nextTokPtr = ptr; |
|
1162 |
30 |
return XML_TOK_INVALID; |
|
1163 |
} |
||
1164 |
✓✓✓✓ ✓✓ |
515765292 |
while (HAS_CHAR(enc, ptr, end)) { |
1165 |
✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✓✓ ✓✓✓✓ ✗✗✗✗ ✗✗✓✗ ✗✗✗✗ ✗✗✗✗ ✗✗✓✗ ✗✗✗✗ ✓✓✓✗ ✗✗✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✓ ✓✗✗✗ ✗ |
257259369 |
switch (BYTE_TYPE(enc, ptr)) { |
1166 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
257157243 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
1167 |
case BT_GT: case BT_RPAR: case BT_COMMA: |
||
1168 |
case BT_VERBAR: case BT_LSQB: case BT_PERCNT: |
||
1169 |
case BT_S: case BT_CR: case BT_LF: |
||
1170 |
90174 |
*nextTokPtr = ptr; |
|
1171 |
90174 |
return tok; |
|
1172 |
#ifdef XML_NS |
||
1173 |
case BT_COLON: |
||
1174 |
5640 |
ptr += MINBPC(enc); |
|
1175 |
✓✓✓✗ ✗✗✓✗ ✗ |
5640 |
switch (tok) { |
1176 |
case XML_TOK_NAME: |
||
1177 |
✓✓✗✗ ✓✓ |
6270 |
REQUIRE_CHAR(enc, ptr, end); |
1178 |
tok = XML_TOK_PREFIXED_NAME; |
||
1179 |
✗✗✗✗ ✗✓✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✓ ✓✗✗✗ ✗✓✗✗ ✗✗ |
4944 |
switch (BYTE_TYPE(enc, ptr)) { |
1180 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
4902 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
1181 |
default: |
||
1182 |
tok = XML_TOK_NMTOKEN; |
||
1183 |
6 |
break; |
|
1184 |
} |
||
1185 |
break; |
||
1186 |
case XML_TOK_PREFIXED_NAME: |
||
1187 |
tok = XML_TOK_NMTOKEN; |
||
1188 |
30 |
break; |
|
1189 |
} |
||
1190 |
break; |
||
1191 |
#endif |
||
1192 |
case BT_PLUS: |
||
1193 |
✓✓✗✗ ✗✗ |
174 |
if (tok == XML_TOK_NMTOKEN) { |
1194 |
6 |
*nextTokPtr = ptr; |
|
1195 |
6 |
return XML_TOK_INVALID; |
|
1196 |
} |
||
1197 |
168 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1198 |
168 |
return XML_TOK_NAME_PLUS; |
|
1199 |
case BT_AST: |
||
1200 |
✓✗✗✗ ✗✗ |
6 |
if (tok == XML_TOK_NMTOKEN) { |
1201 |
6 |
*nextTokPtr = ptr; |
|
1202 |
6 |
return XML_TOK_INVALID; |
|
1203 |
} |
||
1204 |
*nextTokPtr = ptr + MINBPC(enc); |
||
1205 |
return XML_TOK_NAME_ASTERISK; |
||
1206 |
case BT_QUEST: |
||
1207 |
✓✓✗✗ ✗✗ |
1944 |
if (tok == XML_TOK_NMTOKEN) { |
1208 |
6 |
*nextTokPtr = ptr; |
|
1209 |
6 |
return XML_TOK_INVALID; |
|
1210 |
} |
||
1211 |
1938 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1212 |
1938 |
return XML_TOK_NAME_QUESTION; |
|
1213 |
default: |
||
1214 |
6 |
*nextTokPtr = ptr; |
|
1215 |
6 |
return XML_TOK_INVALID; |
|
1216 |
} |
||
1217 |
} |
||
1218 |
627507 |
return -tok; |
|
1219 |
3052329 |
} |
|
1220 |
|||
1221 |
static int PTRCALL |
||
1222 |
PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, |
||
1223 |
const char *end, const char **nextTokPtr) |
||
1224 |
{ |
||
1225 |
const char *start; |
||
1226 |
✓✓✓✓ ✓✓ |
7392 |
if (ptr >= end) |
1227 |
534 |
return XML_TOK_NONE; |
|
1228 |
✗✓✗✓ ✗✓ |
3162 |
else if (! HAS_CHAR(enc, ptr, end)) { |
1229 |
/* This line cannot be executed. The incoming data has already |
||
1230 |
* been tokenized once, so incomplete characters like this have |
||
1231 |
* already been eliminated from the input. Retaining the paranoia |
||
1232 |
* check is still valuable, however. |
||
1233 |
*/ |
||
1234 |
return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ |
||
1235 |
} |
||
1236 |
start = ptr; |
||
1237 |
✓✓✓✗ ✓✗ |
475116 |
while (HAS_CHAR(enc, ptr, end)) { |
1238 |
✓✗✗✓ ✓✓✓✓ ✓✓✗✗ ✗✗✓✗ ✗✗✓✗ ✓✗✗✗ ✗✓✗✗ ✗✓✗ |
237192 |
switch (BYTE_TYPE(enc, ptr)) { |
1239 |
#define LEAD_CASE(n) \ |
||
1240 |
case BT_LEAD ## n: ptr += n; break; |
||
1241 |
6 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1242 |
#undef LEAD_CASE |
||
1243 |
case BT_AMP: |
||
1244 |
✓✓✓✗ ✓✗ |
282 |
if (ptr == start) |
1245 |
258 |
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1246 |
24 |
*nextTokPtr = ptr; |
|
1247 |
24 |
return XML_TOK_DATA_CHARS; |
|
1248 |
case BT_LT: |
||
1249 |
/* this is for inside entity references */ |
||
1250 |
6 |
*nextTokPtr = ptr; |
|
1251 |
6 |
return XML_TOK_INVALID; |
|
1252 |
case BT_LF: |
||
1253 |
✓✗✗✗ ✗✗ |
12 |
if (ptr == start) { |
1254 |
12 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1255 |
12 |
return XML_TOK_DATA_NEWLINE; |
|
1256 |
} |
||
1257 |
*nextTokPtr = ptr; |
||
1258 |
return XML_TOK_DATA_CHARS; |
||
1259 |
case BT_CR: |
||
1260 |
✓✓✗✗ ✗✗ |
18 |
if (ptr == start) { |
1261 |
12 |
ptr += MINBPC(enc); |
|
1262 |
✓✓✗✗ ✗✗ |
12 |
if (! HAS_CHAR(enc, ptr, end)) |
1263 |
6 |
return XML_TOK_TRAILING_CR; |
|
1264 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
6 |
if (BYTE_TYPE(enc, ptr) == BT_LF) |
1265 |
6 |
ptr += MINBPC(enc); |
|
1266 |
6 |
*nextTokPtr = ptr; |
|
1267 |
6 |
return XML_TOK_DATA_NEWLINE; |
|
1268 |
} |
||
1269 |
6 |
*nextTokPtr = ptr; |
|
1270 |
6 |
return XML_TOK_DATA_CHARS; |
|
1271 |
case BT_S: |
||
1272 |
✓✓✓✗ ✓✗ |
2406 |
if (ptr == start) { |
1273 |
1440 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1274 |
1440 |
return XML_TOK_ATTRIBUTE_VALUE_S; |
|
1275 |
} |
||
1276 |
966 |
*nextTokPtr = ptr; |
|
1277 |
966 |
return XML_TOK_DATA_CHARS; |
|
1278 |
default: |
||
1279 |
234390 |
ptr += MINBPC(enc); |
|
1280 |
234390 |
break; |
|
1281 |
} |
||
1282 |
} |
||
1283 |
438 |
*nextTokPtr = ptr; |
|
1284 |
438 |
return XML_TOK_DATA_CHARS; |
|
1285 |
3696 |
} |
|
1286 |
|||
1287 |
static int PTRCALL |
||
1288 |
PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, |
||
1289 |
const char *end, const char **nextTokPtr) |
||
1290 |
{ |
||
1291 |
const char *start; |
||
1292 |
✓✓✓✓ ✓✓ |
2988 |
if (ptr >= end) |
1293 |
639 |
return XML_TOK_NONE; |
|
1294 |
✗✓✗✓ ✗✓ |
855 |
else if (! HAS_CHAR(enc, ptr, end)) { |
1295 |
/* This line cannot be executed. The incoming data has already |
||
1296 |
* been tokenized once, so incomplete characters like this have |
||
1297 |
* already been eliminated from the input. Retaining the paranoia |
||
1298 |
* check is still valuable, however. |
||
1299 |
*/ |
||
1300 |
return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ |
||
1301 |
} |
||
1302 |
start = ptr; |
||
1303 |
✓✓✓✓ ✓✓ |
458766 |
while (HAS_CHAR(enc, ptr, end)) { |
1304 |
✗✗✗✓ ✓✓✓✓ ✓✗✗✗ ✗✗✗✗ ✗✓✓✗ ✗✗✗✓ ✗✗✗✓ |
229356 |
switch (BYTE_TYPE(enc, ptr)) { |
1305 |
#define LEAD_CASE(n) \ |
||
1306 |
case BT_LEAD ## n: ptr += n; break; |
||
1307 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
||
1308 |
#undef LEAD_CASE |
||
1309 |
case BT_AMP: |
||
1310 |
✓✓✗✗ ✓✗ |
108 |
if (ptr == start) |
1311 |
84 |
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1312 |
24 |
*nextTokPtr = ptr; |
|
1313 |
24 |
return XML_TOK_DATA_CHARS; |
|
1314 |
case BT_PERCNT: |
||
1315 |
✓✗✗✗ ✗✗ |
210 |
if (ptr == start) { |
1316 |
210 |
int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), |
|
1317 |
end, nextTokPtr); |
||
1318 |
210 |
return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; |
|
1319 |
} |
||
1320 |
*nextTokPtr = ptr; |
||
1321 |
return XML_TOK_DATA_CHARS; |
||
1322 |
case BT_LF: |
||
1323 |
✓✓✗✗ ✗✗ |
90 |
if (ptr == start) { |
1324 |
72 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1325 |
72 |
return XML_TOK_DATA_NEWLINE; |
|
1326 |
} |
||
1327 |
18 |
*nextTokPtr = ptr; |
|
1328 |
18 |
return XML_TOK_DATA_CHARS; |
|
1329 |
case BT_CR: |
||
1330 |
✓✓✗✗ ✗✗ |
12 |
if (ptr == start) { |
1331 |
6 |
ptr += MINBPC(enc); |
|
1332 |
✓✗✗✗ ✗✗ |
6 |
if (! HAS_CHAR(enc, ptr, end)) |
1333 |
6 |
return XML_TOK_TRAILING_CR; |
|
1334 |
if (BYTE_TYPE(enc, ptr) == BT_LF) |
||
1335 |
ptr += MINBPC(enc); |
||
1336 |
*nextTokPtr = ptr; |
||
1337 |
return XML_TOK_DATA_NEWLINE; |
||
1338 |
} |
||
1339 |
6 |
*nextTokPtr = ptr; |
|
1340 |
6 |
return XML_TOK_DATA_CHARS; |
|
1341 |
default: |
||
1342 |
228528 |
ptr += MINBPC(enc); |
|
1343 |
228528 |
break; |
|
1344 |
} |
||
1345 |
} |
||
1346 |
435 |
*nextTokPtr = ptr; |
|
1347 |
435 |
return XML_TOK_DATA_CHARS; |
|
1348 |
1494 |
} |
|
1349 |
|||
1350 |
#ifdef XML_DTD |
||
1351 |
|||
1352 |
static int PTRCALL |
||
1353 |
PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, |
||
1354 |
const char *end, const char **nextTokPtr) |
||
1355 |
{ |
||
1356 |
int level = 0; |
||
1357 |
if (MINBPC(enc) > 1) { |
||
1358 |
1272 |
size_t n = end - ptr; |
|
1359 |
✓✓✓✓ |
636 |
if (n & (MINBPC(enc) - 1)) { |
1360 |
312 |
n &= ~(MINBPC(enc) - 1); |
|
1361 |
312 |
end = ptr + n; |
|
1362 |
312 |
} |
|
1363 |
} |
||
1364 |
✓✓✓✓ ✓✓ |
21672 |
while (HAS_CHAR(enc, ptr, end)) { |
1365 |
✓✓✗✗ ✗✓✓✓ ✓✓✗✗ ✗✗✗✗ ✗✓✓✓ ✓✗✗✗ ✗✗✗✗ ✓✓✓ |
24876 |
switch (BYTE_TYPE(enc, ptr)) { |
1366 |
✓✓✓✗ ✓✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
60 |
INVALID_CASES(ptr, nextTokPtr) |
1367 |
case BT_LT: |
||
1368 |
876 |
ptr += MINBPC(enc); |
|
1369 |
✓✓✓✓ ✓✓ |
918 |
REQUIRE_CHAR(enc, ptr, end); |
1370 |
✓✗✓✗ ✓✗✓✗ ✓✗ |
1422 |
if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { |
1371 |
834 |
ptr += MINBPC(enc); |
|
1372 |
✓✓✓✓ ✓✓ |
876 |
REQUIRE_CHAR(enc, ptr, end); |
1373 |
✗✓✓✗ ✗✓✓✗ ✗✓ |
1356 |
if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { |
1374 |
++level; |
||
1375 |
ptr += MINBPC(enc); |
||
1376 |
} |
||
1377 |
} |
||
1378 |
break; |
||
1379 |
case BT_RSQB: |
||
1380 |
78 |
ptr += MINBPC(enc); |
|
1381 |
✓✓✓✓ ✓✓ |
108 |
REQUIRE_CHAR(enc, ptr, end); |
1382 |
✓✗✓✗ ✓✗✓✗ ✓✗ |
84 |
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { |
1383 |
48 |
ptr += MINBPC(enc); |
|
1384 |
✓✓✓✓ ✓✓ |
78 |
REQUIRE_CHAR(enc, ptr, end); |
1385 |
✓✗✓✗ ✓✗✓✗ ✓✗ |
30 |
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
1386 |
18 |
ptr += MINBPC(enc); |
|
1387 |
✓✗✓✗ ✓✗ |
18 |
if (level == 0) { |
1388 |
18 |
*nextTokPtr = ptr; |
|
1389 |
18 |
return XML_TOK_IGNORE_SECT; |
|
1390 |
} |
||
1391 |
--level; |
||
1392 |
} |
||
1393 |
} |
||
1394 |
break; |
||
1395 |
default: |
||
1396 |
8940 |
ptr += MINBPC(enc); |
|
1397 |
8940 |
break; |
|
1398 |
} |
||
1399 |
} |
||
1400 |
756 |
return XML_TOK_PARTIAL; |
|
1401 |
948 |
} |
|
1402 |
|||
1403 |
#endif /* XML_DTD */ |
||
1404 |
|||
1405 |
static int PTRCALL |
||
1406 |
PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, |
||
1407 |
const char **badPtr) |
||
1408 |
{ |
||
1409 |
60828 |
ptr += MINBPC(enc); |
|
1410 |
30414 |
end -= MINBPC(enc); |
|
1411 |
✓✓✓✓ ✓✓ |
2509464 |
for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
1412 |
✓✗✓✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✓✓ ✗✗✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✓ ✓✗✗✗ ✓✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✓ |
1888932 |
switch (BYTE_TYPE(enc, ptr)) { |
1413 |
case BT_DIGIT: |
||
1414 |
case BT_HEX: |
||
1415 |
case BT_MINUS: |
||
1416 |
case BT_APOS: |
||
1417 |
case BT_LPAR: |
||
1418 |
case BT_RPAR: |
||
1419 |
case BT_PLUS: |
||
1420 |
case BT_COMMA: |
||
1421 |
case BT_SOL: |
||
1422 |
case BT_EQUALS: |
||
1423 |
case BT_QUEST: |
||
1424 |
case BT_CR: |
||
1425 |
case BT_LF: |
||
1426 |
case BT_SEMI: |
||
1427 |
case BT_EXCL: |
||
1428 |
case BT_AST: |
||
1429 |
case BT_PERCNT: |
||
1430 |
case BT_NUM: |
||
1431 |
#ifdef XML_NS |
||
1432 |
case BT_COLON: |
||
1433 |
#endif |
||
1434 |
break; |
||
1435 |
case BT_S: |
||
1436 |
✗✓✗✗ ✗✗✗✗ ✗✗ |
90000 |
if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { |
1437 |
*badPtr = ptr; |
||
1438 |
return 0; |
||
1439 |
} |
||
1440 |
break; |
||
1441 |
case BT_NAME: |
||
1442 |
case BT_NMSTRT: |
||
1443 |
✗✓✓✗ ✗✓✓✗ ✗✓ |
574584 |
if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) |
1444 |
break; |
||
1445 |
default: |
||
1446 |
✓✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗ |
6 |
switch (BYTE_TO_ASCII(enc, ptr)) { |
1447 |
case 0x24: /* $ */ |
||
1448 |
case 0x40: /* @ */ |
||
1449 |
break; |
||
1450 |
default: |
||
1451 |
6 |
*badPtr = ptr; |
|
1452 |
6 |
return 0; |
|
1453 |
} |
||
1454 |
break; |
||
1455 |
} |
||
1456 |
} |
||
1457 |
30408 |
return 1; |
|
1458 |
30414 |
} |
|
1459 |
|||
1460 |
/* This must only be called for a well-formed start-tag or empty |
||
1461 |
element tag. Returns the number of attributes. Pointers to the |
||
1462 |
first attsMax attributes are stored in atts. |
||
1463 |
*/ |
||
1464 |
|||
1465 |
static int PTRCALL |
||
1466 |
PREFIX(getAtts)(const ENCODING *enc, const char *ptr, |
||
1467 |
int attsMax, ATTRIBUTE *atts) |
||
1468 |
{ |
||
1469 |
enum { other, inName, inValue } state = inName; |
||
1470 |
int nAtts = 0; |
||
1471 |
int open = 0; /* defined when state == inValue; |
||
1472 |
initialization just to shut up compilers */ |
||
1473 |
|||
1474 |
148945839 |
for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { |
|
1475 |
✓✗✗✗ ✗✓✓✓ ✓✓✗✓ ✗✓✓✓ ✓✗✗✗ ✗✗✓✗ ✓✓✓✗ ✗✗✓✓ ✓✓✗✗ ✗✗✗✓ ✗✓✓✓ ✗✗✗✓ ✓ |
261504741 |
switch (BYTE_TYPE(enc, ptr)) { |
1476 |
#define START_NAME \ |
||
1477 |
if (state == other) { \ |
||
1478 |
if (nAtts < attsMax) { \ |
||
1479 |
atts[nAtts].name = ptr; \ |
||
1480 |
atts[nAtts].normalized = 1; \ |
||
1481 |
} \ |
||
1482 |
state = inName; \ |
||
1483 |
} |
||
1484 |
#define LEAD_CASE(n) \ |
||
1485 |
case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; |
||
1486 |
✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
72 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
1487 |
#undef LEAD_CASE |
||
1488 |
case BT_NONASCII: |
||
1489 |
case BT_NMSTRT: |
||
1490 |
case BT_HEX: |
||
1491 |
✓✓✓✓ ✓✓✓✗ ✓✓✓✗ |
118024887 |
START_NAME |
1492 |
break; |
||
1493 |
#undef START_NAME |
||
1494 |
case BT_QUOT: |
||
1495 |
✓✓✗✗ ✗✗ |
5700012 |
if (state != inValue) { |
1496 |
✓✗✗✗ ✗✗ |
2850006 |
if (nAtts < attsMax) |
1497 |
2850006 |
atts[nAtts].valuePtr = ptr + MINBPC(enc); |
|
1498 |
state = inValue; |
||
1499 |
open = BT_QUOT; |
||
1500 |
2850006 |
} |
|
1501 |
✓✗✗✗ ✗✗ |
2850006 |
else if (open == BT_QUOT) { |
1502 |
state = other; |
||
1503 |
✓✗✗✗ ✗✗ |
2850006 |
if (nAtts < attsMax) |
1504 |
2850006 |
atts[nAtts].valueEnd = ptr; |
|
1505 |
2850006 |
nAtts++; |
|
1506 |
2850006 |
} |
|
1507 |
break; |
||
1508 |
case BT_APOS: |
||
1509 |
✓✓✓✓ ✓✓ |
6408 |
if (state != inValue) { |
1510 |
✓✓✓✗ ✓✗ |
3204 |
if (nAtts < attsMax) |
1511 |
3180 |
atts[nAtts].valuePtr = ptr + MINBPC(enc); |
|
1512 |
state = inValue; |
||
1513 |
open = BT_APOS; |
||
1514 |
3204 |
} |
|
1515 |
✓✗✓✗ ✓✗ |
3204 |
else if (open == BT_APOS) { |
1516 |
state = other; |
||
1517 |
✓✓✓✗ ✓✗ |
3204 |
if (nAtts < attsMax) |
1518 |
3180 |
atts[nAtts].valueEnd = ptr; |
|
1519 |
3204 |
nAtts++; |
|
1520 |
3204 |
} |
|
1521 |
break; |
||
1522 |
case BT_AMP: |
||
1523 |
✓✗✓✗ ✓✗ |
258 |
if (nAtts < attsMax) |
1524 |
258 |
atts[nAtts].normalized = 0; |
|
1525 |
break; |
||
1526 |
case BT_S: |
||
1527 |
✓✓✓✓ ✓✓ |
5975112 |
if (state == inName) |
1528 |
3211740 |
state = other; |
|
1529 |
✗✓✗✗ ✗✗ |
2763444 |
else if (state == inValue |
1530 |
✓✓✓✓ ✓✗ |
2763858 |
&& nAtts < attsMax |
1531 |
✓✗✓✗ ✓✗ |
972 |
&& atts[nAtts].normalized |
1532 |
✓✓✗✓ ✗✓ |
600 |
&& (ptr == atts[nAtts].valuePtr |
1533 |
✓✓✗✗ ✗✗✗✗ ✗✗ |
186 |
|| BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE |
1534 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
144 |
|| BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE |
1535 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
144 |
|| BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) |
1536 |
42 |
atts[nAtts].normalized = 0; |
|
1537 |
break; |
||
1538 |
case BT_CR: case BT_LF: |
||
1539 |
/* This case ensures that the first attribute name is counted |
||
1540 |
Apart from that we could just change state on the quote. */ |
||
1541 |
✓✓✗✗ ✗✗ |
390474 |
if (state == inName) |
1542 |
270072 |
state = other; |
|
1543 |
✓✓✓✗ ✗✗✗✗ ✗✗✗✗ |
120420 |
else if (state == inValue && nAtts < attsMax) |
1544 |
18 |
atts[nAtts].normalized = 0; |
|
1545 |
break; |
||
1546 |
case BT_GT: |
||
1547 |
case BT_SOL: |
||
1548 |
✓✓✓✗ ✓✗ |
10529007 |
if (state != inValue) |
1549 |
9755373 |
return nAtts; |
|
1550 |
break; |
||
1551 |
default: |
||
1552 |
break; |
||
1553 |
} |
||
1554 |
} |
||
1555 |
/* not reached */ |
||
1556 |
} |
||
1557 |
|||
1558 |
static int PTRFASTCALL |
||
1559 |
PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr) |
||
1560 |
{ |
||
1561 |
int result = 0; |
||
1562 |
/* skip &# */ |
||
1563 |
948 |
ptr += 2*MINBPC(enc); |
|
1564 |
✓✓✓✗ ✓✓✓✗ ✓✓ |
498 |
if (CHAR_MATCHES(enc, ptr, ASCII_x)) { |
1565 |
✓✓✓✓ ✓✓ |
1776 |
for (ptr += MINBPC(enc); |
1566 |
✓✗✓✗ |
1008 |
!CHAR_MATCHES(enc, ptr, ASCII_SEMI); |
1567 |
654 |
ptr += MINBPC(enc)) { |
|
1568 |
✓✗✓✗ |
192 |
int c = BYTE_TO_ASCII(enc, ptr); |
1569 |
✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✓ ✗✗✗✗ ✗✗✓✗ ✗✗✗✗ ✗✗✗✗ ✓✗✗✗ ✗✗✓✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✓ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✓ |
1320 |
switch (c) { |
1570 |
case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: |
||
1571 |
case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: |
||
1572 |
414 |
result <<= 4; |
|
1573 |
414 |
result |= (c - ASCII_0); |
|
1574 |
414 |
break; |
|
1575 |
case ASCII_A: case ASCII_B: case ASCII_C: |
||
1576 |
case ASCII_D: case ASCII_E: case ASCII_F: |
||
1577 |
246 |
result <<= 4; |
|
1578 |
246 |
result += 10 + (c - ASCII_A); |
|
1579 |
246 |
break; |
|
1580 |
case ASCII_a: case ASCII_b: case ASCII_c: |
||
1581 |
case ASCII_d: case ASCII_e: case ASCII_f: |
||
1582 |
result <<= 4; |
||
1583 |
result += 10 + (c - ASCII_a); |
||
1584 |
break; |
||
1585 |
} |
||
1586 |
✓✓✗✓ ✗✓ |
660 |
if (result >= 0x110000) |
1587 |
6 |
return -1; |
|
1588 |
✓✓✓✗ ✓✗ |
654 |
} |
1589 |
} |
||
1590 |
else { |
||
1591 |
✓✓✓✗ ✓✓✓✗ ✓✓ |
1956 |
for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { |
1592 |
✓✗✓✗ |
108 |
int c = BYTE_TO_ASCII(enc, ptr); |
1593 |
696 |
result *= 10; |
|
1594 |
696 |
result += (c - ASCII_0); |
|
1595 |
✓✓✗✓ ✗✓ |
696 |
if (result >= 0x110000) |
1596 |
6 |
return -1; |
|
1597 |
✓✓✓✗ ✓✗ |
690 |
} |
1598 |
} |
||
1599 |
462 |
return checkCharRefNumber(result); |
|
1600 |
474 |
} |
|
1601 |
|||
1602 |
static int PTRCALL |
||
1603 |
PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr, |
||
1604 |
const char *end) |
||
1605 |
{ |
||
1606 |
✓✓✓✓ ✓✗✗✓ ✓✓✗✓ |
6012699 |
switch ((end - ptr)/MINBPC(enc)) { |
1607 |
case 2: |
||
1608 |
✓✓✓✗ ✗✓✓✗ ✗✓ |
2852997 |
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { |
1609 |
✓✓✓✗ ✗✗✗✗ ✗✗✗✗ ✗ |
2850030 |
switch (BYTE_TO_ASCII(enc, ptr)) { |
1610 |
case ASCII_l: |
||
1611 |
1590012 |
return ASCII_LT; |
|
1612 |
case ASCII_g: |
||
1613 |
1260012 |
return ASCII_GT; |
|
1614 |
} |
||
1615 |
} |
||
1616 |
break; |
||
1617 |
case 3: |
||
1618 |
✓✓✗✗ ✗✗✓✗ ✗✓ |
150306 |
if (CHAR_MATCHES(enc, ptr, ASCII_a)) { |
1619 |
150030 |
ptr += MINBPC(enc); |
|
1620 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
150030 |
if (CHAR_MATCHES(enc, ptr, ASCII_m)) { |
1621 |
150030 |
ptr += MINBPC(enc); |
|
1622 |
✓✓✗✗ ✗✗✗✗ ✗✗ |
150030 |
if (CHAR_MATCHES(enc, ptr, ASCII_p)) |
1623 |
150024 |
return ASCII_AMP; |
|
1624 |
} |
||
1625 |
} |
||
1626 |
break; |
||
1627 |
case 4: |
||
1628 |
✓✓✓✗ ✗✗✗✗ ✗✗✗✗ ✗ |
60 |
switch (BYTE_TO_ASCII(enc, ptr)) { |
1629 |
case ASCII_q: |
||
1630 |
18 |
ptr += MINBPC(enc); |
|
1631 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
18 |
if (CHAR_MATCHES(enc, ptr, ASCII_u)) { |
1632 |
18 |
ptr += MINBPC(enc); |
|
1633 |
✓✓✗✗ ✗✗✗✗ ✗✗ |
18 |
if (CHAR_MATCHES(enc, ptr, ASCII_o)) { |
1634 |
12 |
ptr += MINBPC(enc); |
|
1635 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
12 |
if (CHAR_MATCHES(enc, ptr, ASCII_t)) |
1636 |
12 |
return ASCII_QUOT; |
|
1637 |
} |
||
1638 |
} |
||
1639 |
break; |
||
1640 |
case ASCII_a: |
||
1641 |
30 |
ptr += MINBPC(enc); |
|
1642 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
30 |
if (CHAR_MATCHES(enc, ptr, ASCII_p)) { |
1643 |
30 |
ptr += MINBPC(enc); |
|
1644 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
30 |
if (CHAR_MATCHES(enc, ptr, ASCII_o)) { |
1645 |
30 |
ptr += MINBPC(enc); |
|
1646 |
✓✓✗✗ ✗✗✗✗ ✗✗ |
30 |
if (CHAR_MATCHES(enc, ptr, ASCII_s)) |
1647 |
24 |
return ASCII_APOS; |
|
1648 |
} |
||
1649 |
} |
||
1650 |
break; |
||
1651 |
} |
||
1652 |
} |
||
1653 |
4653 |
return 0; |
|
1654 |
3004737 |
} |
|
1655 |
|||
1656 |
static int PTRCALL |
||
1657 |
PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1, |
||
1658 |
const char *end1, const char *ptr2) |
||
1659 |
{ |
||
1660 |
✓✓✓✓ ✓✓ |
2535114 |
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { |
1661 |
✗✓✗✓ ✗✓ |
1032699 |
if (end1 - ptr1 < MINBPC(enc)) { |
1662 |
/* This line cannot be executed. THe incoming data has already |
||
1663 |
* been tokenized once, so imcomplete characters like this have |
||
1664 |
* already been eliminated from the input. Retaining the |
||
1665 |
* paranoia check is still valuable, however. |
||
1666 |
*/ |
||
1667 |
return 0; /* LCOV_EXCL_LINE */ |
||
1668 |
} |
||
1669 |
✓✓✓✗ ✓✓✓✓ ✓✓ |
1036401 |
if (!CHAR_MATCHES(enc, ptr1, *ptr2)) |
1670 |
39102 |
return 0; |
|
1671 |
} |
||
1672 |
143538 |
return ptr1 == end1; |
|
1673 |
182640 |
} |
|
1674 |
|||
1675 |
static int PTRFASTCALL |
||
1676 |
PREFIX(nameLength)(const ENCODING *enc, const char *ptr) |
||
1677 |
{ |
||
1678 |
const char *start = ptr; |
||
1679 |
190030551 |
for (;;) { |
|
1680 |
✓✗✗✗ ✗✗✗✗ ✗✓✓✓ ✓✗✗✗ ✗✗✗✗ ✗✗✓✓ ✓✓✗✗ ✗✗✗✗ ✗✗✗✓ ✓ |
167672436 |
switch (BYTE_TYPE(enc, ptr)) { |
1681 |
#define LEAD_CASE(n) \ |
||
1682 |
case BT_LEAD ## n: ptr += n; break; |
||
1683 |
72 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1684 |
#undef LEAD_CASE |
||
1685 |
case BT_NONASCII: |
||
1686 |
case BT_NMSTRT: |
||
1687 |
#ifdef XML_NS |
||
1688 |
case BT_COLON: |
||
1689 |
#endif |
||
1690 |
case BT_HEX: |
||
1691 |
case BT_DIGIT: |
||
1692 |
case BT_NAME: |
||
1693 |
case BT_MINUS: |
||
1694 |
145308873 |
ptr += MINBPC(enc); |
|
1695 |
145308873 |
break; |
|
1696 |
default: |
||
1697 |
22360803 |
return (int)(ptr - start); |
|
1698 |
} |
||
1699 |
} |
||
1700 |
} |
||
1701 |
|||
1702 |
static const char * PTRFASTCALL |
||
1703 |
PREFIX(skipS)(const ENCODING *enc, const char *ptr) |
||
1704 |
{ |
||
1705 |
384 |
for (;;) { |
|
1706 |
✗✗✓✓ ✓✗✗✗ ✗✓✓✗ ✗✗✗✓ |
276 |
switch (BYTE_TYPE(enc, ptr)) { |
1707 |
case BT_LF: |
||
1708 |
case BT_CR: |
||
1709 |
case BT_S: |
||
1710 |
120 |
ptr += MINBPC(enc); |
|
1711 |
break; |
||
1712 |
default: |
||
1713 |
132 |
return ptr; |
|
1714 |
} |
||
1715 |
} |
||
1716 |
} |
||
1717 |
|||
1718 |
static void PTRCALL |
||
1719 |
PREFIX(updatePosition)(const ENCODING *enc, |
||
1720 |
const char *ptr, |
||
1721 |
const char *end, |
||
1722 |
POSITION *pos) |
||
1723 |
{ |
||
1724 |
✓✓✓✓ ✓✓ |
18238203 |
while (HAS_CHAR(enc, ptr, end)) { |
1725 |
✓✓✗✓ ✓✓✓✓ ✗✗✗✓ ✓✓✓✓ ✗✗✓✓ ✗✓ |
2562060 |
switch (BYTE_TYPE(enc, ptr)) { |
1726 |
#define LEAD_CASE(n) \ |
||
1727 |
case BT_LEAD ## n: \ |
||
1728 |
ptr += n; \ |
||
1729 |
break; |
||
1730 |
108 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1731 |
#undef LEAD_CASE |
||
1732 |
case BT_LF: |
||
1733 |
13620 |
pos->columnNumber = (XML_Size)-1; |
|
1734 |
13620 |
pos->lineNumber++; |
|
1735 |
13620 |
ptr += MINBPC(enc); |
|
1736 |
13620 |
break; |
|
1737 |
case BT_CR: |
||
1738 |
36 |
pos->lineNumber++; |
|
1739 |
36 |
ptr += MINBPC(enc); |
|
1740 |
✓✗✓✓ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ |
60 |
if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF) |
1741 |
12 |
ptr += MINBPC(enc); |
|
1742 |
36 |
pos->columnNumber = (XML_Size)-1; |
|
1743 |
36 |
break; |
|
1744 |
default: |
||
1745 |
2524092 |
ptr += MINBPC(enc); |
|
1746 |
2524092 |
break; |
|
1747 |
} |
||
1748 |
2537856 |
pos->columnNumber++; |
|
1749 |
} |
||
1750 |
4387497 |
} |
|
1751 |
|||
1752 |
#undef DO_LEAD_CASE |
||
1753 |
#undef MULTIBYTE_CASES |
||
1754 |
#undef INVALID_CASES |
||
1755 |
#undef CHECK_NAME_CASE |
||
1756 |
#undef CHECK_NAME_CASES |
||
1757 |
#undef CHECK_NMSTRT_CASE |
||
1758 |
#undef CHECK_NMSTRT_CASES |
||
1759 |
|||
1760 |
#endif /* XML_TOK_IMPL_C */ |
Generated by: GCOVR (Version 3.3) |