GCC Code Coverage Report | |||||||||||||||||||||
|
|||||||||||||||||||||
Line | Branch | Exec | Source |
1 |
/* This file is included! |
||
2 |
__ __ _ |
||
3 |
___\ \/ /_ __ __ _| |_ |
||
4 |
/ _ \\ /| '_ \ / _` | __| |
||
5 |
| __// \| |_) | (_| | |_ |
||
6 |
\___/_/\_\ .__/ \__,_|\__| |
||
7 |
|_| XML parser |
||
8 |
|||
9 |
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd |
||
10 |
Copyright (c) 2000-2017 Expat development team |
||
11 |
Licensed under the MIT license: |
||
12 |
|||
13 |
Permission is hereby granted, free of charge, to any person obtaining |
||
14 |
a copy of this software and associated documentation files (the |
||
15 |
"Software"), to deal in the Software without restriction, including |
||
16 |
without limitation the rights to use, copy, modify, merge, publish, |
||
17 |
distribute, sublicense, and/or sell copies of the Software, and to permit |
||
18 |
persons to whom the Software is furnished to do so, subject to the |
||
19 |
following conditions: |
||
20 |
|||
21 |
The above copyright notice and this permission notice shall be included |
||
22 |
in all copies or substantial portions of the Software. |
||
23 |
|||
24 |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||
25 |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
26 |
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN |
||
27 |
NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, |
||
28 |
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
||
29 |
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
||
30 |
USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
31 |
*/ |
||
32 |
|||
33 |
#ifdef XML_TOK_IMPL_C |
||
34 |
|||
35 |
#ifndef IS_INVALID_CHAR |
||
36 |
#define IS_INVALID_CHAR(enc, ptr, n) (0) |
||
37 |
#endif |
||
38 |
|||
39 |
#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ |
||
40 |
case BT_LEAD ## n: \ |
||
41 |
if (end - ptr < n) \ |
||
42 |
return XML_TOK_PARTIAL_CHAR; \ |
||
43 |
if (IS_INVALID_CHAR(enc, ptr, n)) { \ |
||
44 |
*(nextTokPtr) = (ptr); \ |
||
45 |
return XML_TOK_INVALID; \ |
||
46 |
} \ |
||
47 |
ptr += n; \ |
||
48 |
break; |
||
49 |
|||
50 |
#define INVALID_CASES(ptr, nextTokPtr) \ |
||
51 |
INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ |
||
52 |
INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ |
||
53 |
INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ |
||
54 |
case BT_NONXML: \ |
||
55 |
case BT_MALFORM: \ |
||
56 |
case BT_TRAIL: \ |
||
57 |
*(nextTokPtr) = (ptr); \ |
||
58 |
return XML_TOK_INVALID; |
||
59 |
|||
60 |
#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ |
||
61 |
case BT_LEAD ## n: \ |
||
62 |
if (end - ptr < n) \ |
||
63 |
return XML_TOK_PARTIAL_CHAR; \ |
||
64 |
if (!IS_NAME_CHAR(enc, ptr, n)) { \ |
||
65 |
*nextTokPtr = ptr; \ |
||
66 |
return XML_TOK_INVALID; \ |
||
67 |
} \ |
||
68 |
ptr += n; \ |
||
69 |
break; |
||
70 |
|||
71 |
#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ |
||
72 |
case BT_NONASCII: \ |
||
73 |
if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ |
||
74 |
*nextTokPtr = ptr; \ |
||
75 |
return XML_TOK_INVALID; \ |
||
76 |
} \ |
||
77 |
case BT_NMSTRT: \ |
||
78 |
case BT_HEX: \ |
||
79 |
case BT_DIGIT: \ |
||
80 |
case BT_NAME: \ |
||
81 |
case BT_MINUS: \ |
||
82 |
ptr += MINBPC(enc); \ |
||
83 |
break; \ |
||
84 |
CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ |
||
85 |
CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ |
||
86 |
CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) |
||
87 |
|||
88 |
#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ |
||
89 |
case BT_LEAD ## n: \ |
||
90 |
if (end - ptr < n) \ |
||
91 |
return XML_TOK_PARTIAL_CHAR; \ |
||
92 |
if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
||
93 |
*nextTokPtr = ptr; \ |
||
94 |
return XML_TOK_INVALID; \ |
||
95 |
} \ |
||
96 |
ptr += n; \ |
||
97 |
break; |
||
98 |
|||
99 |
#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ |
||
100 |
case BT_NONASCII: \ |
||
101 |
if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ |
||
102 |
*nextTokPtr = ptr; \ |
||
103 |
return XML_TOK_INVALID; \ |
||
104 |
} \ |
||
105 |
case BT_NMSTRT: \ |
||
106 |
case BT_HEX: \ |
||
107 |
ptr += MINBPC(enc); \ |
||
108 |
break; \ |
||
109 |
CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ |
||
110 |
CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ |
||
111 |
CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) |
||
112 |
|||
113 |
#ifndef PREFIX |
||
114 |
#define PREFIX(ident) ident |
||
115 |
#endif |
||
116 |
|||
117 |
|||
118 |
#define HAS_CHARS(enc, ptr, end, count) \ |
||
119 |
(end - ptr >= count * MINBPC(enc)) |
||
120 |
|||
121 |
#define HAS_CHAR(enc, ptr, end) \ |
||
122 |
HAS_CHARS(enc, ptr, end, 1) |
||
123 |
|||
124 |
#define REQUIRE_CHARS(enc, ptr, end, count) \ |
||
125 |
{ \ |
||
126 |
if (! HAS_CHARS(enc, ptr, end, count)) { \ |
||
127 |
return XML_TOK_PARTIAL; \ |
||
128 |
} \ |
||
129 |
} |
||
130 |
|||
131 |
#define REQUIRE_CHAR(enc, ptr, end) \ |
||
132 |
REQUIRE_CHARS(enc, ptr, end, 1) |
||
133 |
|||
134 |
|||
135 |
/* ptr points to character following "<!-" */ |
||
136 |
|||
137 |
static int PTRCALL |
||
138 |
PREFIX(scanComment)(const ENCODING *enc, const char *ptr, |
||
139 |
const char *end, const char **nextTokPtr) |
||
140 |
{ |
||
141 |
✓✓✓✓ ✓✓ |
511320 |
if (HAS_CHAR(enc, ptr, end)) { |
142 |
✗✓✓✗ ✗✓✓✗ ✗✓ |
256010 |
if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { |
143 |
*nextTokPtr = ptr; |
||
144 |
return XML_TOK_INVALID; |
||
145 |
} |
||
146 |
255430 |
ptr += MINBPC(enc); |
|
147 |
✓✓✓✓ ✓✓ |
82925530 |
while (HAS_CHAR(enc, ptr, end)) { |
148 |
✗✗✗✗ ✗✗✓✓ ✓✗✗✗ ✗✗✗✗ ✓✓✓✗ ✗✗✗✗ ✗✗✓✓ |
82673020 |
switch (BYTE_TYPE(enc, ptr)) { |
149 |
INVALID_CASES(ptr, nextTokPtr) |
||
150 |
case BT_MINUS: |
||
151 |
1700670 |
ptr += MINBPC(enc); |
|
152 |
✓✓✓✓ ✓✓ |
1700900 |
REQUIRE_CHAR(enc, ptr, end); |
153 |
✓✓✗✓ ✗✓✗✓ ✗✓ |
1700500 |
if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { |
154 |
250440 |
ptr += MINBPC(enc); |
|
155 |
✓✓✓✓ ✓✓ |
250670 |
REQUIRE_CHAR(enc, ptr, end); |
156 |
✗✓✓✗ ✗✓✓✗ ✗✓ |
250230 |
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
157 |
*nextTokPtr = ptr; |
||
158 |
return XML_TOK_INVALID; |
||
159 |
} |
||
160 |
250210 |
*nextTokPtr = ptr + MINBPC(enc); |
|
161 |
250210 |
return XML_TOK_COMMENT; |
|
162 |
} |
||
163 |
break; |
||
164 |
default: |
||
165 |
80964670 |
ptr += MINBPC(enc); |
|
166 |
80964670 |
break; |
|
167 |
} |
||
168 |
} |
||
169 |
} |
||
170 |
4990 |
return XML_TOK_PARTIAL; |
|
171 |
255660 |
} |
|
172 |
|||
173 |
/* ptr points to character following "<!" */ |
||
174 |
|||
175 |
static int PTRCALL |
||
176 |
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, |
||
177 |
const char *end, const char **nextTokPtr) |
||
178 |
{ |
||
179 |
✓✓✓✓ ✓✓ |
878212 |
REQUIRE_CHAR(enc, ptr, end); |
180 |
✓✓✗✓ ✗✓✗✓ ✓✗✓✗ ✓✗✓✓ ✗✓✗ |
415398 |
switch (BYTE_TYPE(enc, ptr)) { |
181 |
case BT_MINUS: |
||
182 |
204790 |
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
183 |
case BT_LSQB: |
||
184 |
70 |
*nextTokPtr = ptr + MINBPC(enc); |
|
185 |
70 |
return XML_TOK_COND_SECT_OPEN; |
|
186 |
case BT_NMSTRT: |
||
187 |
case BT_HEX: |
||
188 |
204438 |
ptr += MINBPC(enc); |
|
189 |
break; |
||
190 |
default: |
||
191 |
*nextTokPtr = ptr; |
||
192 |
return XML_TOK_INVALID; |
||
193 |
} |
||
194 |
✓✓✓✓ ✓✓ |
2030752 |
while (HAS_CHAR(enc, ptr, end)) { |
195 |
✗✗✗✓ ✗✓✗✓ ✗✗✗✗ ✓✗✓✗ ✓✗✗✗ ✗✓✗✓ ✗ |
898300 |
switch (BYTE_TYPE(enc, ptr)) { |
196 |
case BT_PERCNT: |
||
197 |
REQUIRE_CHARS(enc, ptr, end, 2); |
||
198 |
/* don't allow <!ENTITY% foo "whatever"> */ |
||
199 |
switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { |
||
200 |
case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: |
||
201 |
*nextTokPtr = ptr; |
||
202 |
return XML_TOK_INVALID; |
||
203 |
} |
||
204 |
/* fall through */ |
||
205 |
case BT_S: case BT_CR: case BT_LF: |
||
206 |
72522 |
*nextTokPtr = ptr; |
|
207 |
72522 |
return XML_TOK_DECL_OPEN; |
|
208 |
case BT_NMSTRT: |
||
209 |
case BT_HEX: |
||
210 |
810938 |
ptr += MINBPC(enc); |
|
211 |
break; |
||
212 |
default: |
||
213 |
*nextTokPtr = ptr; |
||
214 |
return XML_TOK_INVALID; |
||
215 |
} |
||
216 |
} |
||
217 |
131916 |
return XML_TOK_PARTIAL; |
|
218 |
429170 |
} |
|
219 |
|||
220 |
static int PTRCALL |
||
221 |
PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr, |
||
222 |
const char *end, int *tokPtr) |
||
223 |
{ |
||
224 |
int upper = 0; |
||
225 |
708980 |
*tokPtr = XML_TOK_PI; |
|
226 |
✓✓✓✓ ✓✓ |
354490 |
if (end - ptr != MINBPC(enc)*3) |
227 |
113190 |
return 1; |
|
228 |
✗✓✓✓ ✗✗✗✓ ✓✗✗✗ ✓ |
280800 |
switch (BYTE_TO_ASCII(enc, ptr)) { |
229 |
case ASCII_x: |
||
230 |
break; |
||
231 |
case ASCII_X: |
||
232 |
upper = 1; |
||
233 |
break; |
||
234 |
default: |
||
235 |
220 |
return 1; |
|
236 |
} |
||
237 |
241080 |
ptr += MINBPC(enc); |
|
238 |
✗✓✓✓ ✗✗✗✓ ✓✗✗✗ ✓ |
280580 |
switch (BYTE_TO_ASCII(enc, ptr)) { |
239 |
case ASCII_m: |
||
240 |
break; |
||
241 |
case ASCII_M: |
||
242 |
upper = 1; |
||
243 |
break; |
||
244 |
default: |
||
245 |
200 |
return 1; |
|
246 |
} |
||
247 |
240880 |
ptr += MINBPC(enc); |
|
248 |
✗✓✓✓ ✗✗✗✓ ✓✗✗✗ ✓ |
280380 |
switch (BYTE_TO_ASCII(enc, ptr)) { |
249 |
case ASCII_l: |
||
250 |
break; |
||
251 |
case ASCII_L: |
||
252 |
upper = 1; |
||
253 |
break; |
||
254 |
default: |
||
255 |
230 |
return 1; |
|
256 |
} |
||
257 |
✗✓✗✓ ✗✓ |
240650 |
if (upper) |
258 |
return 0; |
||
259 |
240650 |
*tokPtr = XML_TOK_XML_DECL; |
|
260 |
240650 |
return 1; |
|
261 |
354490 |
} |
|
262 |
|||
263 |
/* ptr points to character following "<?" */ |
||
264 |
|||
265 |
static int PTRCALL |
||
266 |
PREFIX(scanPi)(const ENCODING *enc, const char *ptr, |
||
267 |
const char *end, const char **nextTokPtr) |
||
268 |
{ |
||
269 |
735700 |
int tok; |
|
270 |
const char *target = ptr; |
||
271 |
✓✓✓✓ ✓✓ |
371220 |
REQUIRE_CHAR(enc, ptr, end); |
272 |
✗✗✓✗ ✗✗✗✓ ✓✓✗✓ ✗✗✗✗ ✓✓✓✗ ✓✗✗✗ ✗ |
408000 |
switch (BYTE_TYPE(enc, ptr)) { |
273 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
364620 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
274 |
default: |
||
275 |
*nextTokPtr = ptr; |
||
276 |
return XML_TOK_INVALID; |
||
277 |
} |
||
278 |
✓✓✓✓ ✓✓ |
1756560 |
while (HAS_CHAR(enc, ptr, end)) { |
279 |
✗✗✗✗ ✗✓✗✗ ✗✗✗✓ ✓✗✓✓ ✓✗✗✗ ✗✓✗✗ ✗✗✗✓ ✓✗✓✓ ✓✗✗✗ ✗✓✗✗ ✗✗✗✓ ✓✗ |
1806000 |
switch (BYTE_TYPE(enc, ptr)) { |
280 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
1329090 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
281 |
case BT_S: case BT_CR: case BT_LF: |
||
282 |
✗✓✗✓ ✗✓ |
354390 |
if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
283 |
*nextTokPtr = ptr; |
||
284 |
return XML_TOK_INVALID; |
||
285 |
} |
||
286 |
354390 |
ptr += MINBPC(enc); |
|
287 |
✓✓✓✓ ✓✓ |
88001620 |
while (HAS_CHAR(enc, ptr, end)) { |
288 |
✓✗✗✗ ✗✗✓✓ ✓✗✗✗ ✗✗✗✗ ✓✓✓✗ ✗✗✗✗ ✗✗✓✓ |
88035480 |
switch (BYTE_TYPE(enc, ptr)) { |
289 |
✓✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
110 |
INVALID_CASES(ptr, nextTokPtr) |
290 |
case BT_QUEST: |
||
291 |
106470 |
ptr += MINBPC(enc); |
|
292 |
✓✓✓✓ ✓✓ |
109780 |
REQUIRE_CHAR(enc, ptr, end); |
293 |
✗✓✗✓ ✗✓✗✓ ✗✓ |
103470 |
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
294 |
103160 |
*nextTokPtr = ptr + MINBPC(enc); |
|
295 |
103160 |
return tok; |
|
296 |
} |
||
297 |
break; |
||
298 |
default: |
||
299 |
87292810 |
ptr += MINBPC(enc); |
|
300 |
87292810 |
break; |
|
301 |
} |
||
302 |
} |
||
303 |
247910 |
return XML_TOK_PARTIAL; |
|
304 |
case BT_QUEST: |
||
305 |
✗✓✗✓ ✗✓ |
100 |
if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
306 |
*nextTokPtr = ptr; |
||
307 |
return XML_TOK_INVALID; |
||
308 |
} |
||
309 |
100 |
ptr += MINBPC(enc); |
|
310 |
✓✓✓✓ ✓✓ |
160 |
REQUIRE_CHAR(enc, ptr, end); |
311 |
✓✗✓✗ ✓✗✓✗ ✓✗ |
60 |
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
312 |
40 |
*nextTokPtr = ptr + MINBPC(enc); |
|
313 |
40 |
return tok; |
|
314 |
} |
||
315 |
/* fall through */ |
||
316 |
default: |
||
317 |
*nextTokPtr = ptr; |
||
318 |
return XML_TOK_INVALID; |
||
319 |
} |
||
320 |
} |
||
321 |
9990 |
return XML_TOK_PARTIAL; |
|
322 |
367850 |
} |
|
323 |
|||
324 |
static int PTRCALL |
||
325 |
PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr, |
||
326 |
const char *end, const char **nextTokPtr) |
||
327 |
{ |
||
328 |
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, |
||
329 |
ASCII_T, ASCII_A, ASCII_LSQB }; |
||
330 |
int i; |
||
331 |
/* CDATA[ */ |
||
332 |
✓✓✓✓ ✓✓ |
11300 |
REQUIRE_CHARS(enc, ptr, end, 6); |
333 |
✓✓✓✓ ✓✓ |
4140 |
for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { |
334 |
✓✓✓✗ ✗✓✓✗ ✗✓ |
2520 |
if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { |
335 |
40 |
*nextTokPtr = ptr; |
|
336 |
40 |
return XML_TOK_INVALID; |
|
337 |
} |
||
338 |
} |
||
339 |
270 |
*nextTokPtr = ptr; |
|
340 |
270 |
return XML_TOK_CDATA_SECT_OPEN; |
|
341 |
3870 |
} |
|
342 |
|||
343 |
static int PTRCALL |
||
344 |
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, |
||
345 |
const char *end, const char **nextTokPtr) |
||
346 |
{ |
||
347 |
✓✓✓✓ ✓✓ |
9000 |
if (ptr >= end) |
348 |
1940 |
return XML_TOK_NONE; |
|
349 |
if (MINBPC(enc) > 1) { |
||
350 |
710 |
size_t n = end - ptr; |
|
351 |
✓✓✓✓ |
710 |
if (n & (MINBPC(enc) - 1)) { |
352 |
360 |
n &= ~(MINBPC(enc) - 1); |
|
353 |
✓✓✓✓ |
360 |
if (n == 0) |
354 |
260 |
return XML_TOK_PARTIAL; |
|
355 |
100 |
end = ptr + n; |
|
356 |
100 |
} |
|
357 |
✓✓✓✓ |
450 |
} |
358 |
✓✗✓✓ ✗✗✗✗ ✗✓✓✗ ✓✗✗✗ ✗✗✗✗ ✗✓✓✓ ✓✗✗✗ ✗✓✗✗ ✓✓ |
3200 |
switch (BYTE_TYPE(enc, ptr)) { |
359 |
case BT_RSQB: |
||
360 |
480 |
ptr += MINBPC(enc); |
|
361 |
✓✓✓✓ ✓✓ |
660 |
REQUIRE_CHAR(enc, ptr, end); |
362 |
✓✓✓✗ ✓✗✓✗ ✓✗ |
400 |
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) |
363 |
break; |
||
364 |
270 |
ptr += MINBPC(enc); |
|
365 |
✓✓✓✓ ✓✓ |
420 |
REQUIRE_CHAR(enc, ptr, end); |
366 |
✓✓✓✗ ✗✓✓✗ ✗✓ |
160 |
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
367 |
10 |
ptr -= MINBPC(enc); |
|
368 |
10 |
break; |
|
369 |
} |
||
370 |
110 |
*nextTokPtr = ptr + MINBPC(enc); |
|
371 |
110 |
return XML_TOK_CDATA_SECT_CLOSE; |
|
372 |
case BT_CR: |
||
373 |
ptr += MINBPC(enc); |
||
374 |
REQUIRE_CHAR(enc, ptr, end); |
||
375 |
if (BYTE_TYPE(enc, ptr) == BT_LF) |
||
376 |
ptr += MINBPC(enc); |
||
377 |
*nextTokPtr = ptr; |
||
378 |
return XML_TOK_DATA_NEWLINE; |
||
379 |
case BT_LF: |
||
380 |
10 |
*nextTokPtr = ptr + MINBPC(enc); |
|
381 |
10 |
return XML_TOK_DATA_NEWLINE; |
|
382 |
✓✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✓✓ |
400 |
INVALID_CASES(ptr, nextTokPtr) |
383 |
default: |
||
384 |
1620 |
ptr += MINBPC(enc); |
|
385 |
1620 |
break; |
|
386 |
} |
||
387 |
✓✓✗✓ ✓✓ |
12130 |
while (HAS_CHAR(enc, ptr, end)) { |
388 |
✓✗✗✗ ✗✗✗✗ ✓✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✓✗ ✗✗✗✗ ✗✗✗✗ ✓✓ |
31240 |
switch (BYTE_TYPE(enc, ptr)) { |
389 |
#define LEAD_CASE(n) \ |
||
390 |
case BT_LEAD ## n: \ |
||
391 |
if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
||
392 |
*nextTokPtr = ptr; \ |
||
393 |
return XML_TOK_DATA_CHARS; \ |
||
394 |
} \ |
||
395 |
ptr += n; \ |
||
396 |
break; |
||
397 |
✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
20 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
398 |
#undef LEAD_CASE |
||
399 |
case BT_NONXML: |
||
400 |
case BT_MALFORM: |
||
401 |
case BT_TRAIL: |
||
402 |
case BT_CR: |
||
403 |
case BT_LF: |
||
404 |
case BT_RSQB: |
||
405 |
20 |
*nextTokPtr = ptr; |
|
406 |
20 |
return XML_TOK_DATA_CHARS; |
|
407 |
default: |
||
408 |
10410 |
ptr += MINBPC(enc); |
|
409 |
10410 |
break; |
|
410 |
} |
||
411 |
} |
||
412 |
1690 |
*nextTokPtr = ptr; |
|
413 |
1690 |
return XML_TOK_DATA_CHARS; |
|
414 |
4500 |
} |
|
415 |
|||
416 |
/* ptr points to character following "</" */ |
||
417 |
|||
418 |
static int PTRCALL |
||
419 |
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, |
||
420 |
const char *end, const char **nextTokPtr) |
||
421 |
{ |
||
422 |
✓✓✓✓ ✓✓ |
32639310 |
REQUIRE_CHAR(enc, ptr, end); |
423 |
✗✗✓✓ ✗✗✗✓ ✗✗✗✓ ✗✗✗✗ ✓✗✗✗ ✓✗✗✗ ✗ |
16317550 |
switch (BYTE_TYPE(enc, ptr)) { |
424 |
✓✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
16316420 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
425 |
default: |
||
426 |
*nextTokPtr = ptr; |
||
427 |
return XML_TOK_INVALID; |
||
428 |
} |
||
429 |
✓✓✓✓ ✓✓ |
138912050 |
while (HAS_CHAR(enc, ptr, end)) { |
430 |
✗✗✗✗ ✗✓✓✗ ✗✗✗✓ ✓✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✓✗ ✓✓✓✗ ✗✗✗✓ ✗✗✗✗ ✗✗✓✓ ✗ |
138848540 |
switch (BYTE_TYPE(enc, ptr)) { |
431 |
✓✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
122593980 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
432 |
case BT_S: case BT_CR: case BT_LF: |
||
433 |
✓✓✗✗ ✗✗ |
40 |
for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
434 |
✓✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗ |
10 |
switch (BYTE_TYPE(enc, ptr)) { |
435 |
case BT_S: case BT_CR: case BT_LF: |
||
436 |
break; |
||
437 |
case BT_GT: |
||
438 |
10 |
*nextTokPtr = ptr + MINBPC(enc); |
|
439 |
10 |
return XML_TOK_END_TAG; |
|
440 |
default: |
||
441 |
*nextTokPtr = ptr; |
||
442 |
return XML_TOK_INVALID; |
||
443 |
} |
||
444 |
} |
||
445 |
10 |
return XML_TOK_PARTIAL; |
|
446 |
#ifdef XML_NS |
||
447 |
case BT_COLON: |
||
448 |
/* no need to check qname syntax here, |
||
449 |
since end-tag must match exactly */ |
||
450 |
1740 |
ptr += MINBPC(enc); |
|
451 |
1740 |
break; |
|
452 |
#endif |
||
453 |
case BT_GT: |
||
454 |
16252190 |
*nextTokPtr = ptr + MINBPC(enc); |
|
455 |
16252190 |
return XML_TOK_END_TAG; |
|
456 |
default: |
||
457 |
*nextTokPtr = ptr; |
||
458 |
return XML_TOK_INVALID; |
||
459 |
} |
||
460 |
} |
||
461 |
64040 |
return XML_TOK_PARTIAL; |
|
462 |
16318540 |
} |
|
463 |
|||
464 |
/* ptr points to character following "&#X" */ |
||
465 |
|||
466 |
static int PTRCALL |
||
467 |
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, |
||
468 |
const char *end, const char **nextTokPtr) |
||
469 |
{ |
||
470 |
✓✓✓✓ ✓✓ |
7360 |
if (HAS_CHAR(enc, ptr, end)) { |
471 |
✓✓✓✗ ✗✓✓✗ ✗✓ |
3900 |
switch (BYTE_TYPE(enc, ptr)) { |
472 |
case BT_DIGIT: |
||
473 |
case BT_HEX: |
||
474 |
break; |
||
475 |
default: |
||
476 |
10 |
*nextTokPtr = ptr; |
|
477 |
10 |
return XML_TOK_INVALID; |
|
478 |
} |
||
479 |
✓✓✓✓ ✓✓ |
19440 |
for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
480 |
✓✗✗✓ ✓✗✓✗ ✗✓✓✗ ✓✗✗✓ |
10160 |
switch (BYTE_TYPE(enc, ptr)) { |
481 |
case BT_DIGIT: |
||
482 |
case BT_HEX: |
||
483 |
break; |
||
484 |
case BT_SEMI: |
||
485 |
2330 |
*nextTokPtr = ptr + MINBPC(enc); |
|
486 |
2330 |
return XML_TOK_CHAR_REF; |
|
487 |
default: |
||
488 |
*nextTokPtr = ptr; |
||
489 |
return XML_TOK_INVALID; |
||
490 |
} |
||
491 |
} |
||
492 |
} |
||
493 |
1340 |
return XML_TOK_PARTIAL; |
|
494 |
3680 |
} |
|
495 |
|||
496 |
/* ptr points to character following "&#" */ |
||
497 |
|||
498 |
static int PTRCALL |
||
499 |
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, |
||
500 |
const char *end, const char **nextTokPtr) |
||
501 |
{ |
||
502 |
✓✓✓✓ ✓✓ |
21880 |
if (HAS_CHAR(enc, ptr, end)) { |
503 |
✓✓✓✗ ✓✓✓✗ ✓✓ |
11170 |
if (CHAR_MATCHES(enc, ptr, ASCII_x)) |
504 |
3680 |
return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
505 |
✗✓✓✗ ✗✓✓✗ ✗✓ |
7770 |
switch (BYTE_TYPE(enc, ptr)) { |
506 |
case BT_DIGIT: |
||
507 |
break; |
||
508 |
default: |
||
509 |
*nextTokPtr = ptr; |
||
510 |
return XML_TOK_INVALID; |
||
511 |
} |
||
512 |
✓✓✓✓ ✓✓ |
37260 |
for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
513 |
✓✗✓✓ ✗✓✗✓ ✓✗✓✗ ✓ |
20720 |
switch (BYTE_TYPE(enc, ptr)) { |
514 |
case BT_DIGIT: |
||
515 |
break; |
||
516 |
case BT_SEMI: |
||
517 |
5540 |
*nextTokPtr = ptr + MINBPC(enc); |
|
518 |
5540 |
return XML_TOK_CHAR_REF; |
|
519 |
default: |
||
520 |
*nextTokPtr = ptr; |
||
521 |
return XML_TOK_INVALID; |
||
522 |
} |
||
523 |
} |
||
524 |
} |
||
525 |
1720 |
return XML_TOK_PARTIAL; |
|
526 |
10940 |
} |
|
527 |
|||
528 |
/* ptr points to character following "&" */ |
||
529 |
|||
530 |
static int PTRCALL |
||
531 |
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, |
||
532 |
const char **nextTokPtr) |
||
533 |
{ |
||
534 |
✓✓✓✓ ✓✓ |
10158966 |
REQUIRE_CHAR(enc, ptr, end); |
535 |
✗✗✓✗ ✓✗✓✗ ✓✗✗✗ ✓✗✗✗ ✓✗✓✓ ✓✗✓✗ ✗✗✓✗ |
5076422 |
switch (BYTE_TYPE(enc, ptr)) { |
536 |
✗✗✗✗ ✗✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
5062762 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
537 |
case BT_NUM: |
||
538 |
10940 |
return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
539 |
default: |
||
540 |
*nextTokPtr = ptr; |
||
541 |
return XML_TOK_INVALID; |
||
542 |
} |
||
543 |
✓✓✓✓ ✓✓ |
36567770 |
while (HAS_CHAR(enc, ptr, end)) { |
544 |
✗✗✗✗ ✗✓✗✓ ✗✓✗✓ ✗✗✗✗ ✗✗✓✗ ✗✗✓✗ ✓✓✓✗ ✗✗✗✓ ✗✗✗✓ ✗ |
36509302 |
switch (BYTE_TYPE(enc, ptr)) { |
545 |
✗✗✗✗ ✗✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
31504198 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
546 |
case BT_SEMI: |
||
547 |
5003984 |
*nextTokPtr = ptr + MINBPC(enc); |
|
548 |
5003984 |
return XML_TOK_ENTITY_REF; |
|
549 |
default: |
||
550 |
*nextTokPtr = ptr; |
||
551 |
return XML_TOK_INVALID; |
||
552 |
} |
||
553 |
} |
||
554 |
58748 |
return XML_TOK_PARTIAL; |
|
555 |
5077546 |
} |
|
556 |
|||
557 |
/* ptr points to character following first character of attribute name */ |
||
558 |
|||
559 |
static int PTRCALL |
||
560 |
PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, |
||
561 |
const char **nextTokPtr) |
||
562 |
{ |
||
563 |
#ifdef XML_NS |
||
564 |
int hadColon = 0; |
||
565 |
#endif |
||
566 |
✓✓✓✓ ✓✓ |
1108606810 |
while (HAS_CHAR(enc, ptr, end)) { |
567 |
✗✗✗✗ ✗✓✗✗ ✗✓✗✗ ✓✓✗✓ ✓✓✗✗ ✗✗✓✗ ✗✗✓✗ ✗✗✓✗ ✓✗✗✗ ✗✗✗✓ ✗✗✗✓ ✗✗✗✓ ✗ |
1100963760 |
switch (BYTE_TYPE(enc, ptr)) { |
568 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
1091064550 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
569 |
#ifdef XML_NS |
||
570 |
case BT_COLON: |
||
571 |
✓✓✗✓ ✗✓ |
2904170 |
if (hadColon) { |
572 |
10 |
*nextTokPtr = ptr; |
|
573 |
10 |
return XML_TOK_INVALID; |
|
574 |
} |
||
575 |
hadColon = 1; |
||
576 |
2904160 |
ptr += MINBPC(enc); |
|
577 |
✓✓✓✓ ✓✓ |
2908240 |
REQUIRE_CHAR(enc, ptr, end); |
578 |
✗✗✓✗ ✗✗✓✓ ✓✓✗✓ ✗✗✗✗ ✓✗✗✗ ✓✗✗✗ ✗ |
2901950 |
switch (BYTE_TYPE(enc, ptr)) { |
579 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
2900220 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
580 |
default: |
||
581 |
10 |
*nextTokPtr = ptr; |
|
582 |
10 |
return XML_TOK_INVALID; |
|
583 |
} |
||
584 |
860 |
break; |
|
585 |
#endif |
||
586 |
case BT_S: case BT_CR: case BT_LF: |
||
587 |
for (;;) { |
||
588 |
int t; |
||
589 |
|||
590 |
2300000 |
ptr += MINBPC(enc); |
|
591 |
✗✓✗✗ ✗✗ |
2300000 |
REQUIRE_CHAR(enc, ptr, end); |
592 |
✗✗✗✗ |
2300000 |
t = BYTE_TYPE(enc, ptr); |
593 |
✓✓✗✗ ✗✗ |
2300000 |
if (t == BT_EQUALS) |
594 |
2250000 |
break; |
|
595 |
✗✗✗✓ ✗✗✗✗ ✗✗✗✗ |
50000 |
switch (t) { |
596 |
case BT_S: |
||
597 |
case BT_LF: |
||
598 |
case BT_CR: |
||
599 |
break; |
||
600 |
default: |
||
601 |
*nextTokPtr = ptr; |
||
602 |
return XML_TOK_INVALID; |
||
603 |
} |
||
604 |
✓✓✗✗ ✗✗✗✗ ✗ |
50000 |
} |
605 |
/* fall through */ |
||
606 |
case BT_EQUALS: |
||
607 |
{ |
||
608 |
int open; |
||
609 |
#ifdef XML_NS |
||
610 |
hadColon = 0; |
||
611 |
#endif |
||
612 |
6980480 |
for (;;) { |
|
613 |
9680480 |
ptr += MINBPC(enc); |
|
614 |
✓✓✓✓ ✓✓ |
9687280 |
REQUIRE_CHAR(enc, ptr, end); |
615 |
✓✗✓✗ |
9677440 |
open = BYTE_TYPE(enc, ptr); |
616 |
✓✓✗✓ ✗✓ |
9673680 |
if (open == BT_QUOT || open == BT_APOS) |
617 |
break; |
||
618 |
✗✗✓✗ ✗✗✗✗ ✗✗✗✗ |
2700000 |
switch (open) { |
619 |
case BT_S: |
||
620 |
case BT_LF: |
||
621 |
case BT_CR: |
||
622 |
break; |
||
623 |
default: |
||
624 |
*nextTokPtr = ptr; |
||
625 |
return XML_TOK_INVALID; |
||
626 |
} |
||
627 |
} |
||
628 |
6973680 |
ptr += MINBPC(enc); |
|
629 |
/* in attribute value */ |
||
630 |
6973680 |
for (;;) { |
|
631 |
int t; |
||
632 |
✓✓✓✓ ✓✓ |
729106390 |
REQUIRE_CHAR(enc, ptr, end); |
633 |
✓✗✓✗ |
727248510 |
t = BYTE_TYPE(enc, ptr); |
634 |
✓✓✓✓ ✓✓ |
727240190 |
if (t == open) |
635 |
6037720 |
break; |
|
636 |
✓✗✗✗ ✗✗✓✗ ✓✗✗✗ ✗✗✗✓ ✗✓✗✗ ✗✗✗✗ ✓✗✓ |
721202470 |
switch (t) { |
637 |
✓✓✓✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
50 |
INVALID_CASES(ptr, nextTokPtr) |
638 |
case BT_AMP: |
||
639 |
{ |
||
640 |
10850 |
int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); |
|
641 |
✓✓✓✓ ✓✓ |
10850 |
if (tok <= 0) { |
642 |
✗✓✗✓ ✗✓ |
2840 |
if (tok == XML_TOK_INVALID) |
643 |
*nextTokPtr = ptr; |
||
644 |
2840 |
return tok; |
|
645 |
} |
||
646 |
✓✓✓✓ ✓✓ |
8010 |
break; |
647 |
} |
||
648 |
case BT_LT: |
||
649 |
*nextTokPtr = ptr; |
||
650 |
return XML_TOK_INVALID; |
||
651 |
default: |
||
652 |
721191600 |
ptr += MINBPC(enc); |
|
653 |
721191600 |
break; |
|
654 |
} |
||
655 |
✓✓✓✓ ✓✓✓✓ ✓ |
721199610 |
} |
656 |
6037720 |
ptr += MINBPC(enc); |
|
657 |
✓✓✓✓ ✓✓ |
6044500 |
REQUIRE_CHAR(enc, ptr, end); |
658 |
✗✗✗✓ ✓✓✓✗ ✗✗✗✓ ✓✓✓✗ ✗✗✗✗ ✓✓ |
6032060 |
switch (BYTE_TYPE(enc, ptr)) { |
659 |
case BT_S: |
||
660 |
case BT_CR: |
||
661 |
case BT_LF: |
||
662 |
break; |
||
663 |
case BT_SOL: |
||
664 |
goto sol; |
||
665 |
case BT_GT: |
||
666 |
goto gt; |
||
667 |
default: |
||
668 |
*nextTokPtr = ptr; |
||
669 |
return XML_TOK_INVALID; |
||
670 |
} |
||
671 |
/* ptr points to closing quote */ |
||
672 |
for (;;) { |
||
673 |
2780830 |
ptr += MINBPC(enc); |
|
674 |
✓✓✓✓ ✗✗ |
2785860 |
REQUIRE_CHAR(enc, ptr, end); |
675 |
✗✗✓✗ ✗✓✗✗ ✗✓✓✗ ✓✓✗✗ ✗✓✗✓ ✗✗✗✗ ✓✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
2776690 |
switch (BYTE_TYPE(enc, ptr)) { |
676 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
2471320 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
677 |
case BT_S: case BT_CR: case BT_LF: |
||
678 |
continue; |
||
679 |
case BT_GT: |
||
680 |
gt: |
||
681 |
3553280 |
*nextTokPtr = ptr + MINBPC(enc); |
|
682 |
3553280 |
return XML_TOK_START_TAG_WITH_ATTS; |
|
683 |
case BT_SOL: |
||
684 |
sol: |
||
685 |
1460 |
ptr += MINBPC(enc); |
|
686 |
✓✓✓✓ ✓✓ |
2210 |
REQUIRE_CHAR(enc, ptr, end); |
687 |
✓✓✓✗ ✗✓✓✗ ✗✓ |
750 |
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
688 |
10 |
*nextTokPtr = ptr; |
|
689 |
10 |
return XML_TOK_INVALID; |
|
690 |
} |
||
691 |
700 |
*nextTokPtr = ptr + MINBPC(enc); |
|
692 |
700 |
return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; |
|
693 |
default: |
||
694 |
*nextTokPtr = ptr; |
||
695 |
return XML_TOK_INVALID; |
||
696 |
} |
||
697 |
break; |
||
698 |
} |
||
699 |
✓✓ | 2471170 |
break; |
700 |
} |
||
701 |
default: |
||
702 |
*nextTokPtr = ptr; |
||
703 |
return XML_TOK_INVALID; |
||
704 |
} |
||
705 |
} |
||
706 |
1572230 |
return XML_TOK_PARTIAL; |
|
707 |
6085640 |
} |
|
708 |
|||
709 |
/* ptr points to character following "<" */ |
||
710 |
|||
711 |
static int PTRCALL |
||
712 |
PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, |
||
713 |
const char **nextTokPtr) |
||
714 |
{ |
||
715 |
#ifdef XML_NS |
||
716 |
int hadColon; |
||
717 |
#endif |
||
718 |
✓✓✓✓ ✓✓ |
72465732 |
REQUIRE_CHAR(enc, ptr, end); |
719 |
✗✗✓✓ ✓✗✓✓ ✓✗✓✗ ✗✗✓✗ ✗✗✓✗ ✓✗✓✗ ✗✗✓✗ ✗✗✓✗ ✓✗ |
36243386 |
switch (BYTE_TYPE(enc, ptr)) { |
720 |
✓✓✓✓ ✓✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
19849116 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
721 |
case BT_EXCL: |
||
722 |
55560 |
ptr += MINBPC(enc); |
|
723 |
✓✓✓✓ ✓✓ |
56370 |
REQUIRE_CHAR(enc, ptr, end); |
724 |
✓✓✓✓ ✗✗✓✗ ✓✗✗✓ ✗ |
59190 |
switch (BYTE_TYPE(enc, ptr)) { |
725 |
case BT_MINUS: |
||
726 |
50870 |
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
727 |
case BT_LSQB: |
||
728 |
3870 |
return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), |
|
729 |
end, nextTokPtr); |
||
730 |
} |
||
731 |
10 |
*nextTokPtr = ptr; |
|
732 |
10 |
return XML_TOK_INVALID; |
|
733 |
case BT_QUEST: |
||
734 |
3630 |
return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
735 |
case BT_SOL: |
||
736 |
16318540 |
return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
737 |
default: |
||
738 |
*nextTokPtr = ptr; |
||
739 |
return XML_TOK_INVALID; |
||
740 |
} |
||
741 |
#ifdef XML_NS |
||
742 |
hadColon = 0; |
||
743 |
#endif |
||
744 |
/* we have a start-tag */ |
||
745 |
✓✓✓✓ ✓✓ |
1523724402 |
while (HAS_CHAR(enc, ptr, end)) { |
746 |
✗✗✗✗ ✗✓✓✗ ✓✓✗✗ ✓✓✓✗ ✓✗✗✗ ✗✗✗✓ ✗✗✗✓ ✗✗✓✓ ✓✗✓✗ ✗✗✗✗ ✗✓✗✗ ✗✓✗✗ ✓✓✓✗ |
1502840404 |
switch (BYTE_TYPE(enc, ptr)) { |
747 |
✓✓✓✓ ✗✗✗✗ ✓✓✓✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
1482043950 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
748 |
#ifdef XML_NS |
||
749 |
case BT_COLON: |
||
750 |
✓✓✗✓ ✗✓ |
1985720 |
if (hadColon) { |
751 |
10 |
*nextTokPtr = ptr; |
|
752 |
10 |
return XML_TOK_INVALID; |
|
753 |
} |
||
754 |
hadColon = 1; |
||
755 |
1985710 |
ptr += MINBPC(enc); |
|
756 |
✓✓✓✓ ✓✓ |
1988070 |
REQUIRE_CHAR(enc, ptr, end); |
757 |
✗✗✓✗ ✗✗✓✓ ✗✗✗✓ ✗✗✗✗ ✗✓✓✗ ✓✗✗✗ ✗ |
1986410 |
switch (BYTE_TYPE(enc, ptr)) { |
758 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
1984020 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
759 |
default: |
||
760 |
10 |
*nextTokPtr = ptr; |
|
761 |
10 |
return XML_TOK_INVALID; |
|
762 |
} |
||
763 |
1190 |
break; |
|
764 |
#endif |
||
765 |
case BT_S: case BT_CR: case BT_LF: |
||
766 |
{ |
||
767 |
6090080 |
ptr += MINBPC(enc); |
|
768 |
✓✓✓✓ ✓✓ |
12893590 |
while (HAS_CHAR(enc, ptr, end)) { |
769 |
✗✗✓✗ ✗✗✗✗ ✓✗✓✓ ✓✓✓✗ ✓✗✗✗ ✗✗✗✗ ✗✗✓✗ ✗✗✓✗ ✗✗✗✗ ✗✗✗✗ |
6804110 |
switch (BYTE_TYPE(enc, ptr)) { |
770 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
6085790 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
771 |
case BT_GT: |
||
772 |
goto gt; |
||
773 |
case BT_SOL: |
||
774 |
goto sol; |
||
775 |
case BT_S: case BT_CR: case BT_LF: |
||
776 |
713430 |
ptr += MINBPC(enc); |
|
777 |
713430 |
continue; |
|
778 |
default: |
||
779 |
*nextTokPtr = ptr; |
||
780 |
return XML_TOK_INVALID; |
||
781 |
} |
||
782 |
6085640 |
return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); |
|
783 |
} |
||
784 |
4390 |
return XML_TOK_PARTIAL; |
|
785 |
} |
||
786 |
case BT_GT: |
||
787 |
gt: |
||
788 |
12706404 |
*nextTokPtr = ptr + MINBPC(enc); |
|
789 |
12706404 |
return XML_TOK_START_TAG_NO_ATTS; |
|
790 |
case BT_SOL: |
||
791 |
sol: |
||
792 |
2470 |
ptr += MINBPC(enc); |
|
793 |
✓✓✓✓ ✓✓ |
3580 |
REQUIRE_CHAR(enc, ptr, end); |
794 |
✗✓✓✗ ✗✓✓✗ ✗✓ |
1440 |
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
795 |
*nextTokPtr = ptr; |
||
796 |
return XML_TOK_INVALID; |
||
797 |
} |
||
798 |
1360 |
*nextTokPtr = ptr + MINBPC(enc); |
|
799 |
1360 |
return XML_TOK_EMPTY_ELEMENT_NO_ATTS; |
|
800 |
default: |
||
801 |
*nextTokPtr = ptr; |
||
802 |
return XML_TOK_INVALID; |
||
803 |
} |
||
804 |
} |
||
805 |
1047512 |
return XML_TOK_PARTIAL; |
|
806 |
36230806 |
} |
|
807 |
|||
808 |
static int PTRCALL |
||
809 |
PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, |
||
810 |
const char **nextTokPtr) |
||
811 |
{ |
||
812 |
✓✓✓✓ ✓✓ |
255392412 |
if (ptr >= end) |
813 |
27134 |
return XML_TOK_NONE; |
|
814 |
if (MINBPC(enc) > 1) { |
||
815 |
10690 |
size_t n = end - ptr; |
|
816 |
✓✓✓✓ |
10690 |
if (n & (MINBPC(enc) - 1)) { |
817 |
5080 |
n &= ~(MINBPC(enc) - 1); |
|
818 |
✓✓✓✓ |
5080 |
if (n == 0) |
819 |
720 |
return XML_TOK_PARTIAL; |
|
820 |
4360 |
end = ptr + n; |
|
821 |
4360 |
} |
|
822 |
✓✓✓✓ |
9970 |
} |
823 |
✓✓✓✓ ✓✓✓✓ ✗✗✓✓ ✓✓✓✓ ✗✗✓✗ ✗✗✗✗ ✗✓✓✓ ✓✓✗✗ ✗✗✗✗ ✗✗✗✓ |
127688292 |
switch (BYTE_TYPE(enc, ptr)) { |
824 |
case BT_LT: |
||
825 |
36230806 |
return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
826 |
case BT_AMP: |
||
827 |
5066126 |
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
828 |
case BT_CR: |
||
829 |
100 |
ptr += MINBPC(enc); |
|
830 |
✓✓✗✗ ✗✗ |
100 |
if (! HAS_CHAR(enc, ptr, end)) |
831 |
80 |
return XML_TOK_TRAILING_CR; |
|
832 |
✓✓✗✗ ✗✗✗✗ ✗✗ |
20 |
if (BYTE_TYPE(enc, ptr) == BT_LF) |
833 |
10 |
ptr += MINBPC(enc); |
|
834 |
20 |
*nextTokPtr = ptr; |
|
835 |
20 |
return XML_TOK_DATA_NEWLINE; |
|
836 |
case BT_LF: |
||
837 |
35651790 |
*nextTokPtr = ptr + MINBPC(enc); |
|
838 |
35651790 |
return XML_TOK_DATA_NEWLINE; |
|
839 |
case BT_RSQB: |
||
840 |
80 |
ptr += MINBPC(enc); |
|
841 |
✓✓✓✗ ✗✗ |
80 |
if (! HAS_CHAR(enc, ptr, end)) |
842 |
60 |
return XML_TOK_TRAILING_RSQB; |
|
843 |
✗✓✗✗ ✗✗✗✗ ✗✗ |
20 |
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) |
844 |
break; |
||
845 |
ptr += MINBPC(enc); |
||
846 |
if (! HAS_CHAR(enc, ptr, end)) |
||
847 |
return XML_TOK_TRAILING_RSQB; |
||
848 |
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
||
849 |
ptr -= MINBPC(enc); |
||
850 |
break; |
||
851 |
} |
||
852 |
*nextTokPtr = ptr; |
||
853 |
return XML_TOK_INVALID; |
||
854 |
✓✓✓✓ ✓✓✓✓ ✓✓✓✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
4450 |
INVALID_CASES(ptr, nextTokPtr) |
855 |
default: |
||
856 |
50717140 |
ptr += MINBPC(enc); |
|
857 |
50717140 |
break; |
|
858 |
} |
||
859 |
✓✓✗✓ ✗✓ |
810978730 |
while (HAS_CHAR(enc, ptr, end)) { |
860 |
✗✗✗✓ ✗✗✗✗ ✗✗✓✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
810961650 |
switch (BYTE_TYPE(enc, ptr)) { |
861 |
#define LEAD_CASE(n) \ |
||
862 |
case BT_LEAD ## n: \ |
||
863 |
if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
||
864 |
*nextTokPtr = ptr; \ |
||
865 |
return XML_TOK_DATA_CHARS; \ |
||
866 |
} \ |
||
867 |
ptr += n; \ |
||
868 |
break; |
||
869 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
||
870 |
#undef LEAD_CASE |
||
871 |
case BT_RSQB: |
||
872 |
✓✗✗✗ ✗✗ |
300000 |
if (HAS_CHARS(enc, ptr, end, 2)) { |
873 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
300000 |
if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { |
874 |
ptr += MINBPC(enc); |
||
875 |
300000 |
break; |
|
876 |
} |
||
877 |
if (HAS_CHARS(enc, ptr, end, 3)) { |
||
878 |
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { |
||
879 |
ptr += MINBPC(enc); |
||
880 |
break; |
||
881 |
} |
||
882 |
*nextTokPtr = ptr + 2*MINBPC(enc); |
||
883 |
return XML_TOK_INVALID; |
||
884 |
} |
||
885 |
} |
||
886 |
/* fall through */ |
||
887 |
case BT_AMP: |
||
888 |
case BT_LT: |
||
889 |
case BT_NONXML: |
||
890 |
case BT_MALFORM: |
||
891 |
case BT_TRAIL: |
||
892 |
case BT_CR: |
||
893 |
case BT_LF: |
||
894 |
50700160 |
*nextTokPtr = ptr; |
|
895 |
50700160 |
return XML_TOK_DATA_CHARS; |
|
896 |
default: |
||
897 |
759961490 |
ptr += MINBPC(enc); |
|
898 |
759961490 |
break; |
|
899 |
} |
||
900 |
} |
||
901 |
17080 |
*nextTokPtr = ptr; |
|
902 |
17080 |
return XML_TOK_DATA_CHARS; |
|
903 |
127696206 |
} |
|
904 |
|||
905 |
/* ptr points to character following "%" */ |
||
906 |
|||
907 |
static int PTRCALL |
||
908 |
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, |
||
909 |
const char **nextTokPtr) |
||
910 |
{ |
||
911 |
✓✓✓✓ ✓✓ |
50750 |
REQUIRE_CHAR(enc, ptr, end); |
912 |
✗✗✓✗ ✗✗✗✗ ✗✓✗✓ ✗✗✗✓ ✗✗✗✗ ✗✗✓✗ ✓✓✓✗ ✓✗✗✗ ✗✗✗✓ ✗ |
23530 |
switch (BYTE_TYPE(enc, ptr)) { |
913 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
22290 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
914 |
case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: |
||
915 |
960 |
*nextTokPtr = ptr; |
|
916 |
960 |
return XML_TOK_PERCENT; |
|
917 |
default: |
||
918 |
*nextTokPtr = ptr; |
||
919 |
return XML_TOK_INVALID; |
||
920 |
} |
||
921 |
✓✓✓✓ ✓✓ |
10519920 |
while (HAS_CHAR(enc, ptr, end)) { |
922 |
✗✗✗✗ ✗✓✗✗ ✗✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✓✗ ✓✓✓✗ ✗✗✗✓ ✗✗✗✓ ✗ |
10498500 |
switch (BYTE_TYPE(enc, ptr)) { |
923 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
10497570 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
924 |
case BT_SEMI: |
||
925 |
810 |
*nextTokPtr = ptr + MINBPC(enc); |
|
926 |
810 |
return XML_TOK_PARAM_ENTITY_REF; |
|
927 |
default: |
||
928 |
*nextTokPtr = ptr; |
||
929 |
return XML_TOK_INVALID; |
||
930 |
} |
||
931 |
} |
||
932 |
21430 |
return XML_TOK_PARTIAL; |
|
933 |
24650 |
} |
|
934 |
|||
935 |
static int PTRCALL |
||
936 |
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, |
||
937 |
const char **nextTokPtr) |
||
938 |
{ |
||
939 |
✓✓✗✗ ✓✓ |
34050 |
REQUIRE_CHAR(enc, ptr, end); |
940 |
✗✗✓✗ ✗✗✓✗ ✗✗✗✗ ✗✗✗✗ ✗✓✓✗ ✓✗✗✗ ✗ |
14520 |
switch (BYTE_TYPE(enc, ptr)) { |
941 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
14410 |
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
942 |
default: |
||
943 |
10 |
*nextTokPtr = ptr; |
|
944 |
10 |
return XML_TOK_INVALID; |
|
945 |
} |
||
946 |
✓✓✗✗ ✓✓ |
64950 |
while (HAS_CHAR(enc, ptr, end)) { |
947 |
✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✓✓✓✗ ✗✗✗✓ ✗✗✗✗ ✗✗✗✗ ✗✓✗ |
52390 |
switch (BYTE_TYPE(enc, ptr)) { |
948 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
50540 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
949 |
case BT_CR: case BT_LF: case BT_S: |
||
950 |
case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: |
||
951 |
1770 |
*nextTokPtr = ptr; |
|
952 |
1770 |
return XML_TOK_POUND_NAME; |
|
953 |
default: |
||
954 |
*nextTokPtr = ptr; |
||
955 |
return XML_TOK_INVALID; |
||
956 |
} |
||
957 |
} |
||
958 |
12590 |
return -XML_TOK_POUND_NAME; |
|
959 |
16140 |
} |
|
960 |
|||
961 |
static int PTRCALL |
||
962 |
PREFIX(scanLit)(int open, const ENCODING *enc, |
||
963 |
const char *ptr, const char *end, |
||
964 |
const char **nextTokPtr) |
||
965 |
{ |
||
966 |
✓✓✓✓ ✓✓ |
919949086 |
while (HAS_CHAR(enc, ptr, end)) { |
967 |
✓✗✓✓ |
914142516 |
int t = BYTE_TYPE(enc, ptr); |
968 |
✓✗✗✗ ✗✗✗✓ ✓✗✗✗ ✗✗✗✗ ✓✓✗✗ ✗✗✗✗ ✗✓✓ |
914122396 |
switch (t) { |
969 |
✓✓✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
110 |
INVALID_CASES(ptr, nextTokPtr) |
970 |
case BT_QUOT: |
||
971 |
case BT_APOS: |
||
972 |
161806 |
ptr += MINBPC(enc); |
|
973 |
✓✓✓✗ ✓✗ |
161806 |
if (t != open) |
974 |
break; |
||
975 |
✓✓✓✓ ✓✓ |
121016 |
if (! HAS_CHAR(enc, ptr, end)) |
976 |
10518 |
return -XML_TOK_LITERAL; |
|
977 |
110498 |
*nextTokPtr = ptr; |
|
978 |
✗✗✗✗ ✗✓✗✓ ✗✗✗✗ ✗✗✓✗ ✓✗✗✗ ✗✗✗✓ ✗ |
110718 |
switch (BYTE_TYPE(enc, ptr)) { |
979 |
case BT_S: case BT_CR: case BT_LF: |
||
980 |
case BT_GT: case BT_PERCNT: case BT_LSQB: |
||
981 |
110498 |
return XML_TOK_LITERAL; |
|
982 |
default: |
||
983 |
return XML_TOK_INVALID; |
||
984 |
} |
||
985 |
default: |
||
986 |
913960550 |
ptr += MINBPC(enc); |
|
987 |
913960550 |
break; |
|
988 |
} |
||
989 |
✓✓✓✓ ✓✓ |
914001370 |
} |
990 |
1861546 |
return XML_TOK_PARTIAL; |
|
991 |
1982572 |
} |
|
992 |
|||
993 |
static int PTRCALL |
||
994 |
PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, |
||
995 |
const char **nextTokPtr) |
||
996 |
{ |
||
997 |
int tok; |
||
998 |
✓✓✓✓ ✓✓ |
10170272 |
if (ptr >= end) |
999 |
185806 |
return XML_TOK_NONE; |
|
1000 |
if (MINBPC(enc) > 1) { |
||
1001 |
35380 |
size_t n = end - ptr; |
|
1002 |
✓✓✓✓ |
35380 |
if (n & (MINBPC(enc) - 1)) { |
1003 |
17100 |
n &= ~(MINBPC(enc) - 1); |
|
1004 |
✓✓✓✓ |
17100 |
if (n == 0) |
1005 |
1650 |
return XML_TOK_PARTIAL; |
|
1006 |
15450 |
end = ptr + n; |
|
1007 |
15450 |
} |
|
1008 |
✓✓✓✓ |
33730 |
} |
1009 |
✓✓✓✓ ✗✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✗✗✓ ✗✗✗✓ ✓✓✗✗ ✓✓✓✗ ✓✓✗✓ ✓✗✗✗ ✓✗✗✗ ✗✗✓✗ ✗✗✗✗ ✗✓✓✗ ✓✓✗✗ ✓✓✗✓ ✓✗✗✗ ✓✓✗✗ ✗✗✓✗ ✗✗✗✓ ✗ |
4965140 |
switch (BYTE_TYPE(enc, ptr)) { |
1010 |
case BT_QUOT: |
||
1011 |
100040 |
return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1012 |
case BT_APOS: |
||
1013 |
1882532 |
return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1014 |
case BT_LT: |
||
1015 |
{ |
||
1016 |
872186 |
ptr += MINBPC(enc); |
|
1017 |
✓✓✓✓ ✓✓ |
890818 |
REQUIRE_CHAR(enc, ptr, end); |
1018 |
✓✓✗✗ ✗✗✗✓ ✓✓✗✓ ✓✗✗✗ ✗✗✓✗ ✓✗✓✓ ✗✗✗✗ ✗✓✗ |
906234 |
switch (BYTE_TYPE(enc, ptr)) { |
1019 |
case BT_EXCL: |
||
1020 |
429170 |
return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1021 |
case BT_QUEST: |
||
1022 |
364220 |
return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1023 |
case BT_NMSTRT: |
||
1024 |
case BT_HEX: |
||
1025 |
case BT_NONASCII: |
||
1026 |
case BT_LEAD2: |
||
1027 |
case BT_LEAD3: |
||
1028 |
case BT_LEAD4: |
||
1029 |
60144 |
*nextTokPtr = ptr - MINBPC(enc); |
|
1030 |
60144 |
return XML_TOK_INSTANCE_START; |
|
1031 |
} |
||
1032 |
20 |
*nextTokPtr = ptr; |
|
1033 |
20 |
return XML_TOK_INVALID; |
|
1034 |
} |
||
1035 |
case BT_CR: |
||
1036 |
✓✗✓✗ ✗✗ |
30 |
if (ptr + MINBPC(enc) == end) { |
1037 |
30 |
*nextTokPtr = end; |
|
1038 |
/* indicate that this might be part of a CR/LF pair */ |
||
1039 |
30 |
return -XML_TOK_PROLOG_S; |
|
1040 |
} |
||
1041 |
/* fall through */ |
||
1042 |
case BT_S: case BT_LF: |
||
1043 |
for (;;) { |
||
1044 |
2448010 |
ptr += MINBPC(enc); |
|
1045 |
✓✓✓✓ ✗✓ |
2448010 |
if (! HAS_CHAR(enc, ptr, end)) |
1046 |
break; |
||
1047 |
✗✓✗✓ ✓✗✗✗ ✗✓✗✗ ✗✗✗✗ |
2309160 |
switch (BYTE_TYPE(enc, ptr)) { |
1048 |
case BT_S: case BT_LF: |
||
1049 |
break; |
||
1050 |
case BT_CR: |
||
1051 |
/* don't split CR/LF pair */ |
||
1052 |
if (ptr + MINBPC(enc) != end) |
||
1053 |
break; |
||
1054 |
/* fall through */ |
||
1055 |
default: |
||
1056 |
559100 |
*nextTokPtr = ptr; |
|
1057 |
559100 |
return XML_TOK_PROLOG_S; |
|
1058 |
} |
||
1059 |
} |
||
1060 |
138870 |
*nextTokPtr = ptr; |
|
1061 |
138870 |
return XML_TOK_PROLOG_S; |
|
1062 |
case BT_PERCNT: |
||
1063 |
24300 |
return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1064 |
case BT_COMMA: |
||
1065 |
3260 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1066 |
3260 |
return XML_TOK_COMMA; |
|
1067 |
case BT_LSQB: |
||
1068 |
5694 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1069 |
5694 |
return XML_TOK_OPEN_BRACKET; |
|
1070 |
case BT_RSQB: |
||
1071 |
7968 |
ptr += MINBPC(enc); |
|
1072 |
✓✓✓✓ ✓✓ |
7968 |
if (! HAS_CHAR(enc, ptr, end)) |
1073 |
3984 |
return -XML_TOK_CLOSE_BRACKET; |
|
1074 |
✗✓✓✗ ✗✓✓✗ ✗✓ |
4034 |
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { |
1075 |
REQUIRE_CHARS(enc, ptr, end, 2); |
||
1076 |
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { |
||
1077 |
*nextTokPtr = ptr + 2*MINBPC(enc); |
||
1078 |
return XML_TOK_COND_SECT_CLOSE; |
||
1079 |
} |
||
1080 |
} |
||
1081 |
3984 |
*nextTokPtr = ptr; |
|
1082 |
3984 |
return XML_TOK_CLOSE_BRACKET; |
|
1083 |
case BT_LPAR: |
||
1084 |
4890 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1085 |
4890 |
return XML_TOK_OPEN_PAREN; |
|
1086 |
case BT_RPAR: |
||
1087 |
7140 |
ptr += MINBPC(enc); |
|
1088 |
✓✓✗✗ ✗✗ |
7140 |
if (! HAS_CHAR(enc, ptr, end)) |
1089 |
3560 |
return -XML_TOK_CLOSE_PAREN; |
|
1090 |
✓✗✓✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗ |
3580 |
switch (BYTE_TYPE(enc, ptr)) { |
1091 |
case BT_AST: |
||
1092 |
350 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1093 |
350 |
return XML_TOK_CLOSE_PAREN_ASTERISK; |
|
1094 |
case BT_QUEST: |
||
1095 |
*nextTokPtr = ptr + MINBPC(enc); |
||
1096 |
return XML_TOK_CLOSE_PAREN_QUESTION; |
||
1097 |
case BT_PLUS: |
||
1098 |
70 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1099 |
70 |
return XML_TOK_CLOSE_PAREN_PLUS; |
|
1100 |
case BT_CR: case BT_LF: case BT_S: |
||
1101 |
case BT_GT: case BT_COMMA: case BT_VERBAR: |
||
1102 |
case BT_RPAR: |
||
1103 |
3160 |
*nextTokPtr = ptr; |
|
1104 |
3160 |
return XML_TOK_CLOSE_PAREN; |
|
1105 |
} |
||
1106 |
*nextTokPtr = ptr; |
||
1107 |
return XML_TOK_INVALID; |
||
1108 |
case BT_VERBAR: |
||
1109 |
9050 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1110 |
9050 |
return XML_TOK_OR; |
|
1111 |
case BT_GT: |
||
1112 |
65642 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1113 |
65642 |
return XML_TOK_DECL_CLOSE; |
|
1114 |
case BT_NUM: |
||
1115 |
16140 |
return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1116 |
#define LEAD_CASE(n) \ |
||
1117 |
case BT_LEAD ## n: \ |
||
1118 |
if (end - ptr < n) \ |
||
1119 |
return XML_TOK_PARTIAL_CHAR; \ |
||
1120 |
if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
||
1121 |
ptr += n; \ |
||
1122 |
tok = XML_TOK_NAME; \ |
||
1123 |
break; \ |
||
1124 |
} \ |
||
1125 |
if (IS_NAME_CHAR(enc, ptr, n)) { \ |
||
1126 |
ptr += n; \ |
||
1127 |
tok = XML_TOK_NMTOKEN; \ |
||
1128 |
break; \ |
||
1129 |
} \ |
||
1130 |
*nextTokPtr = ptr; \ |
||
1131 |
return XML_TOK_INVALID; |
||
1132 |
✓✓✓✗ ✗✗✓✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗ |
300 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
1133 |
#undef LEAD_CASE |
||
1134 |
case BT_NMSTRT: |
||
1135 |
case BT_HEX: |
||
1136 |
tok = XML_TOK_NAME; |
||
1137 |
1200508 |
ptr += MINBPC(enc); |
|
1138 |
1200508 |
break; |
|
1139 |
case BT_DIGIT: |
||
1140 |
case BT_NAME: |
||
1141 |
case BT_MINUS: |
||
1142 |
#ifdef XML_NS |
||
1143 |
case BT_COLON: |
||
1144 |
#endif |
||
1145 |
tok = XML_TOK_NMTOKEN; |
||
1146 |
60 |
ptr += MINBPC(enc); |
|
1147 |
60 |
break; |
|
1148 |
case BT_NONASCII: |
||
1149 |
✗✗✓✓ |
80 |
if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { |
1150 |
50 |
ptr += MINBPC(enc); |
|
1151 |
tok = XML_TOK_NAME; |
||
1152 |
50 |
break; |
|
1153 |
} |
||
1154 |
✗✗✓✗ |
30 |
if (IS_NAME_CHAR_MINBPC(enc, ptr)) { |
1155 |
30 |
ptr += MINBPC(enc); |
|
1156 |
tok = XML_TOK_NMTOKEN; |
||
1157 |
30 |
break; |
|
1158 |
} |
||
1159 |
/* fall through */ |
||
1160 |
default: |
||
1161 |
50 |
*nextTokPtr = ptr; |
|
1162 |
50 |
return XML_TOK_INVALID; |
|
1163 |
} |
||
1164 |
✓✓✓✓ ✓✓ |
429803914 |
while (HAS_CHAR(enc, ptr, end)) { |
1165 |
✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✓✓ ✓✓✓✓ ✗✗✗✗ ✗✗✓✗ ✗✗✗✗ ✗✗✗✗ ✗✗✓✗ ✗✗✗✗ ✓✓✓✗ ✗✗✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✓ ✓✗✗✗ ✗ |
428765252 |
switch (BYTE_TYPE(enc, ptr)) { |
1166 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
428595076 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
1167 |
case BT_GT: case BT_RPAR: case BT_COMMA: |
||
1168 |
case BT_VERBAR: case BT_LSQB: case BT_PERCNT: |
||
1169 |
case BT_S: case BT_CR: case BT_LF: |
||
1170 |
150256 |
*nextTokPtr = ptr; |
|
1171 |
150256 |
return tok; |
|
1172 |
#ifdef XML_NS |
||
1173 |
case BT_COLON: |
||
1174 |
9400 |
ptr += MINBPC(enc); |
|
1175 |
✓✓✓✗ ✗✗✗✓ ✗ |
9400 |
switch (tok) { |
1176 |
case XML_TOK_NAME: |
||
1177 |
✓✓✗✗ ✓✓ |
10450 |
REQUIRE_CHAR(enc, ptr, end); |
1178 |
tok = XML_TOK_PREFIXED_NAME; |
||
1179 |
✗✗✗✗ ✗✓✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✓ ✓✗✗✗ ✗✓✗✗ ✗✗ |
8240 |
switch (BYTE_TYPE(enc, ptr)) { |
1180 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ |
8170 |
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
1181 |
default: |
||
1182 |
tok = XML_TOK_NMTOKEN; |
||
1183 |
10 |
break; |
|
1184 |
} |
||
1185 |
break; |
||
1186 |
case XML_TOK_PREFIXED_NAME: |
||
1187 |
tok = XML_TOK_NMTOKEN; |
||
1188 |
50 |
break; |
|
1189 |
} |
||
1190 |
break; |
||
1191 |
#endif |
||
1192 |
case BT_PLUS: |
||
1193 |
✓✓✗✗ ✗✗ |
290 |
if (tok == XML_TOK_NMTOKEN) { |
1194 |
10 |
*nextTokPtr = ptr; |
|
1195 |
10 |
return XML_TOK_INVALID; |
|
1196 |
} |
||
1197 |
280 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1198 |
280 |
return XML_TOK_NAME_PLUS; |
|
1199 |
case BT_AST: |
||
1200 |
✓✗✗✗ ✗✗ |
10 |
if (tok == XML_TOK_NMTOKEN) { |
1201 |
10 |
*nextTokPtr = ptr; |
|
1202 |
10 |
return XML_TOK_INVALID; |
|
1203 |
} |
||
1204 |
*nextTokPtr = ptr + MINBPC(enc); |
||
1205 |
return XML_TOK_NAME_ASTERISK; |
||
1206 |
case BT_QUEST: |
||
1207 |
✓✓✗✗ ✗✗ |
3240 |
if (tok == XML_TOK_NMTOKEN) { |
1208 |
10 |
*nextTokPtr = ptr; |
|
1209 |
10 |
return XML_TOK_INVALID; |
|
1210 |
} |
||
1211 |
3230 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1212 |
3230 |
return XML_TOK_NAME_QUESTION; |
|
1213 |
default: |
||
1214 |
10 |
*nextTokPtr = ptr; |
|
1215 |
10 |
return XML_TOK_INVALID; |
|
1216 |
} |
||
1217 |
} |
||
1218 |
1045712 |
return -tok; |
|
1219 |
5085136 |
} |
|
1220 |
|||
1221 |
static int PTRCALL |
||
1222 |
PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, |
||
1223 |
const char *end, const char **nextTokPtr) |
||
1224 |
{ |
||
1225 |
const char *start; |
||
1226 |
✓✓✓✓ ✓✓ |
12320 |
if (ptr >= end) |
1227 |
890 |
return XML_TOK_NONE; |
|
1228 |
✗✓✗✓ ✗✓ |
5270 |
else if (! HAS_CHAR(enc, ptr, end)) { |
1229 |
/* This line cannot be executed. The incoming data has already |
||
1230 |
* been tokenized once, so incomplete characters like this have |
||
1231 |
* already been eliminated from the input. Retaining the paranoia |
||
1232 |
* check is still valuable, however. |
||
1233 |
*/ |
||
1234 |
return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ |
||
1235 |
} |
||
1236 |
start = ptr; |
||
1237 |
✓✓✓✗ ✓✗ |
401200 |
while (HAS_CHAR(enc, ptr, end)) { |
1238 |
✓✗✗✓ ✓✓✓✓ ✓✓✗✗ ✗✗✓✗ ✗✗✓✗ ✓✗✗✗ ✗✓✗✗ ✗✓✗ |
395320 |
switch (BYTE_TYPE(enc, ptr)) { |
1239 |
#define LEAD_CASE(n) \ |
||
1240 |
case BT_LEAD ## n: ptr += n; break; |
||
1241 |
10 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1242 |
#undef LEAD_CASE |
||
1243 |
case BT_AMP: |
||
1244 |
✓✓✓✗ ✓✗ |
470 |
if (ptr == start) |
1245 |
430 |
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1246 |
40 |
*nextTokPtr = ptr; |
|
1247 |
40 |
return XML_TOK_DATA_CHARS; |
|
1248 |
case BT_LT: |
||
1249 |
/* this is for inside entity references */ |
||
1250 |
10 |
*nextTokPtr = ptr; |
|
1251 |
10 |
return XML_TOK_INVALID; |
|
1252 |
case BT_LF: |
||
1253 |
✓✗✗✗ ✗✗ |
20 |
if (ptr == start) { |
1254 |
20 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1255 |
20 |
return XML_TOK_DATA_NEWLINE; |
|
1256 |
} |
||
1257 |
*nextTokPtr = ptr; |
||
1258 |
return XML_TOK_DATA_CHARS; |
||
1259 |
case BT_CR: |
||
1260 |
✓✓✗✗ ✗✗ |
30 |
if (ptr == start) { |
1261 |
20 |
ptr += MINBPC(enc); |
|
1262 |
✓✓✗✗ ✗✗ |
20 |
if (! HAS_CHAR(enc, ptr, end)) |
1263 |
10 |
return XML_TOK_TRAILING_CR; |
|
1264 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
10 |
if (BYTE_TYPE(enc, ptr) == BT_LF) |
1265 |
10 |
ptr += MINBPC(enc); |
|
1266 |
10 |
*nextTokPtr = ptr; |
|
1267 |
10 |
return XML_TOK_DATA_NEWLINE; |
|
1268 |
} |
||
1269 |
10 |
*nextTokPtr = ptr; |
|
1270 |
10 |
return XML_TOK_DATA_CHARS; |
|
1271 |
case BT_S: |
||
1272 |
✓✓✓✗ ✓✗ |
4010 |
if (ptr == start) { |
1273 |
2400 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1274 |
2400 |
return XML_TOK_ATTRIBUTE_VALUE_S; |
|
1275 |
} |
||
1276 |
1610 |
*nextTokPtr = ptr; |
|
1277 |
1610 |
return XML_TOK_DATA_CHARS; |
|
1278 |
default: |
||
1279 |
390650 |
ptr += MINBPC(enc); |
|
1280 |
390650 |
break; |
|
1281 |
} |
||
1282 |
} |
||
1283 |
730 |
*nextTokPtr = ptr; |
|
1284 |
730 |
return XML_TOK_DATA_CHARS; |
|
1285 |
6160 |
} |
|
1286 |
|||
1287 |
static int PTRCALL |
||
1288 |
PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, |
||
1289 |
const char *end, const char **nextTokPtr) |
||
1290 |
{ |
||
1291 |
const char *start; |
||
1292 |
✓✓✓✓ ✓✓ |
4936 |
if (ptr >= end) |
1293 |
1054 |
return XML_TOK_NONE; |
|
1294 |
✗✓✗✓ ✗✓ |
1414 |
else if (! HAS_CHAR(enc, ptr, end)) { |
1295 |
/* This line cannot be executed. The incoming data has already |
||
1296 |
* been tokenized once, so incomplete characters like this have |
||
1297 |
* already been eliminated from the input. Retaining the paranoia |
||
1298 |
* check is still valuable, however. |
||
1299 |
*/ |
||
1300 |
return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ |
||
1301 |
} |
||
1302 |
start = ptr; |
||
1303 |
✓✓✓✓ ✓✓ |
382654 |
while (HAS_CHAR(enc, ptr, end)) { |
1304 |
✗✗✗✓ ✓✓✓✓ ✓✗✗✗ ✗✗✗✗ ✗✓✓✗ ✗✗✗✓ ✗✗✗✓ |
381206 |
switch (BYTE_TYPE(enc, ptr)) { |
1305 |
#define LEAD_CASE(n) \ |
||
1306 |
case BT_LEAD ## n: ptr += n; break; |
||
1307 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
||
1308 |
#undef LEAD_CASE |
||
1309 |
case BT_AMP: |
||
1310 |
✓✓✗✗ ✓✗ |
180 |
if (ptr == start) |
1311 |
140 |
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1312 |
40 |
*nextTokPtr = ptr; |
|
1313 |
40 |
return XML_TOK_DATA_CHARS; |
|
1314 |
case BT_PERCNT: |
||
1315 |
✓✗✗✗ ✗✗ |
350 |
if (ptr == start) { |
1316 |
350 |
int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), |
|
1317 |
end, nextTokPtr); |
||
1318 |
350 |
return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; |
|
1319 |
} |
||
1320 |
*nextTokPtr = ptr; |
||
1321 |
return XML_TOK_DATA_CHARS; |
||
1322 |
case BT_LF: |
||
1323 |
✓✓✗✗ ✗✗ |
150 |
if (ptr == start) { |
1324 |
120 |
*nextTokPtr = ptr + MINBPC(enc); |
|
1325 |
120 |
return XML_TOK_DATA_NEWLINE; |
|
1326 |
} |
||
1327 |
30 |
*nextTokPtr = ptr; |
|
1328 |
30 |
return XML_TOK_DATA_CHARS; |
|
1329 |
case BT_CR: |
||
1330 |
✓✓✗✗ ✗✗ |
20 |
if (ptr == start) { |
1331 |
10 |
ptr += MINBPC(enc); |
|
1332 |
✓✗✗✗ ✗✗ |
10 |
if (! HAS_CHAR(enc, ptr, end)) |
1333 |
10 |
return XML_TOK_TRAILING_CR; |
|
1334 |
if (BYTE_TYPE(enc, ptr) == BT_LF) |
||
1335 |
ptr += MINBPC(enc); |
||
1336 |
*nextTokPtr = ptr; |
||
1337 |
return XML_TOK_DATA_NEWLINE; |
||
1338 |
} |
||
1339 |
10 |
*nextTokPtr = ptr; |
|
1340 |
10 |
return XML_TOK_DATA_CHARS; |
|
1341 |
default: |
||
1342 |
379826 |
ptr += MINBPC(enc); |
|
1343 |
379826 |
break; |
|
1344 |
} |
||
1345 |
} |
||
1346 |
714 |
*nextTokPtr = ptr; |
|
1347 |
714 |
return XML_TOK_DATA_CHARS; |
|
1348 |
2468 |
} |
|
1349 |
|||
1350 |
#ifdef XML_DTD |
||
1351 |
|||
1352 |
static int PTRCALL |
||
1353 |
PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, |
||
1354 |
const char *end, const char **nextTokPtr) |
||
1355 |
{ |
||
1356 |
int level = 0; |
||
1357 |
if (MINBPC(enc) > 1) { |
||
1358 |
2120 |
size_t n = end - ptr; |
|
1359 |
✓✓✓✓ |
1060 |
if (n & (MINBPC(enc) - 1)) { |
1360 |
520 |
n &= ~(MINBPC(enc) - 1); |
|
1361 |
520 |
end = ptr + n; |
|
1362 |
520 |
} |
|
1363 |
} |
||
1364 |
✓✓✓✓ ✓✓ |
19900 |
while (HAS_CHAR(enc, ptr, end)) { |
1365 |
✓✓✗✗ ✗✓✓✓ ✓✓✗✗ ✗✗✗✗ ✗✓✓✓ ✓✗✗✗ ✗✗✗✗ ✓✓✓ |
41460 |
switch (BYTE_TYPE(enc, ptr)) { |
1366 |
✓✓✓✗ ✓✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
100 |
INVALID_CASES(ptr, nextTokPtr) |
1367 |
case BT_LT: |
||
1368 |
1460 |
ptr += MINBPC(enc); |
|
1369 |
✓✓✓✓ ✓✓ |
1530 |
REQUIRE_CHAR(enc, ptr, end); |
1370 |
✗✓✗✓ ✗✓✗✓ ✗✓ |
2370 |
if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { |
1371 |
1390 |
ptr += MINBPC(enc); |
|
1372 |
✓✓✓✓ ✓✓ |
1460 |
REQUIRE_CHAR(enc, ptr, end); |
1373 |
✓✗✗✓ ✓✗✗✓ ✓✗ |
2260 |
if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { |
1374 |
++level; |
||
1375 |
ptr += MINBPC(enc); |
||
1376 |
} |
||
1377 |
} |
||
1378 |
break; |
||
1379 |
case BT_RSQB: |
||
1380 |
130 |
ptr += MINBPC(enc); |
|
1381 |
✓✓✓✓ ✓✓ |
180 |
REQUIRE_CHAR(enc, ptr, end); |
1382 |
✗✓✗✓ ✗✓✗✓ ✗✓ |
140 |
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { |
1383 |
80 |
ptr += MINBPC(enc); |
|
1384 |
✓✓✓✓ ✓✓ |
130 |
REQUIRE_CHAR(enc, ptr, end); |
1385 |
✗✓✗✓ ✗✓✗✓ ✗✓ |
50 |
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
1386 |
30 |
ptr += MINBPC(enc); |
|
1387 |
✓✗✓✗ ✓✗ |
30 |
if (level == 0) { |
1388 |
30 |
*nextTokPtr = ptr; |
|
1389 |
30 |
return XML_TOK_IGNORE_SECT; |
|
1390 |
} |
||
1391 |
--level; |
||
1392 |
} |
||
1393 |
} |
||
1394 |
break; |
||
1395 |
default: |
||
1396 |
14900 |
ptr += MINBPC(enc); |
|
1397 |
14900 |
break; |
|
1398 |
} |
||
1399 |
} |
||
1400 |
1260 |
return XML_TOK_PARTIAL; |
|
1401 |
1580 |
} |
|
1402 |
|||
1403 |
#endif /* XML_DTD */ |
||
1404 |
|||
1405 |
static int PTRCALL |
||
1406 |
PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, |
||
1407 |
const char **badPtr) |
||
1408 |
{ |
||
1409 |
101360 |
ptr += MINBPC(enc); |
|
1410 |
50680 |
end -= MINBPC(enc); |
|
1411 |
✓✓✓✓ ✓✓ |
4182280 |
for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
1412 |
✓✗✓✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✓✓ ✗✗✗✓ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✓ ✓✗✗✗ ✓✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✓ |
3148110 |
switch (BYTE_TYPE(enc, ptr)) { |
1413 |
case BT_DIGIT: |
||
1414 |
case BT_HEX: |
||
1415 |
case BT_MINUS: |
||
1416 |
case BT_APOS: |
||
1417 |
case BT_LPAR: |
||
1418 |
case BT_RPAR: |
||
1419 |
case BT_PLUS: |
||
1420 |
case BT_COMMA: |
||
1421 |
case BT_SOL: |
||
1422 |
case BT_EQUALS: |
||
1423 |
case BT_QUEST: |
||
1424 |
case BT_CR: |
||
1425 |
case BT_LF: |
||
1426 |
case BT_SEMI: |
||
1427 |
case BT_EXCL: |
||
1428 |
case BT_AST: |
||
1429 |
case BT_PERCNT: |
||
1430 |
case BT_NUM: |
||
1431 |
#ifdef XML_NS |
||
1432 |
case BT_COLON: |
||
1433 |
#endif |
||
1434 |
break; |
||
1435 |
case BT_S: |
||
1436 |
✗✓✗✗ ✗✗✗✗ ✗✗ |
150000 |
if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { |
1437 |
*badPtr = ptr; |
||
1438 |
return 0; |
||
1439 |
} |
||
1440 |
break; |
||
1441 |
case BT_NAME: |
||
1442 |
case BT_NMSTRT: |
||
1443 |
✗✓✓✗ ✗✓✓✗ ✗✓ |
957600 |
if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) |
1444 |
break; |
||
1445 |
default: |
||
1446 |
✓✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗ |
10 |
switch (BYTE_TO_ASCII(enc, ptr)) { |
1447 |
case 0x24: /* $ */ |
||
1448 |
case 0x40: /* @ */ |
||
1449 |
break; |
||
1450 |
default: |
||
1451 |
10 |
*badPtr = ptr; |
|
1452 |
10 |
return 0; |
|
1453 |
} |
||
1454 |
break; |
||
1455 |
} |
||
1456 |
} |
||
1457 |
50670 |
return 1; |
|
1458 |
50680 |
} |
|
1459 |
|||
1460 |
/* This must only be called for a well-formed start-tag or empty |
||
1461 |
element tag. Returns the number of attributes. Pointers to the |
||
1462 |
first attsMax attributes are stored in atts. |
||
1463 |
*/ |
||
1464 |
|||
1465 |
static int PTRCALL |
||
1466 |
PREFIX(getAtts)(const ENCODING *enc, const char *ptr, |
||
1467 |
int attsMax, ATTRIBUTE *atts) |
||
1468 |
{ |
||
1469 |
enum { other, inName, inValue } state = inName; |
||
1470 |
int nAtts = 0; |
||
1471 |
int open = 0; /* defined when state == inValue; |
||
1472 |
initialization just to shut up compilers */ |
||
1473 |
|||
1474 |
248242940 |
for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { |
|
1475 |
✓✗✗✗ ✗✓✓✓ ✓✓✗✓ ✗✓✓✓ ✓✗✗✗ ✗✗✓✗ ✓✓✓✗ ✗✗✓✓ ✓✓✗✗ ✗✗✗✓ ✗✓✓✓ ✗✗✗✓ ✓ |
435841048 |
switch (BYTE_TYPE(enc, ptr)) { |
1476 |
#define START_NAME \ |
||
1477 |
if (state == other) { \ |
||
1478 |
if (nAtts < attsMax) { \ |
||
1479 |
atts[nAtts].name = ptr; \ |
||
1480 |
atts[nAtts].normalized = 1; \ |
||
1481 |
} \ |
||
1482 |
state = inName; \ |
||
1483 |
} |
||
1484 |
#define LEAD_CASE(n) \ |
||
1485 |
case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; |
||
1486 |
✗✓✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗ |
120 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
1487 |
#undef LEAD_CASE |
||
1488 |
case BT_NONASCII: |
||
1489 |
case BT_NMSTRT: |
||
1490 |
case BT_HEX: |
||
1491 |
✓✓✓✓ ✓✓✓✗ ✓✓✓✗ |
196708062 |
START_NAME |
1492 |
break; |
||
1493 |
#undef START_NAME |
||
1494 |
case BT_QUOT: |
||
1495 |
✓✓✗✗ ✗✗ |
9500020 |
if (state != inValue) { |
1496 |
✓✗✗✗ ✗✗ |
4750010 |
if (nAtts < attsMax) |
1497 |
4750010 |
atts[nAtts].valuePtr = ptr + MINBPC(enc); |
|
1498 |
state = inValue; |
||
1499 |
open = BT_QUOT; |
||
1500 |
4750010 |
} |
|
1501 |
✓✗✗✗ ✗✗ |
4750010 |
else if (open == BT_QUOT) { |
1502 |
state = other; |
||
1503 |
✓✗✗✗ ✗✗ |
4750010 |
if (nAtts < attsMax) |
1504 |
4750010 |
atts[nAtts].valueEnd = ptr; |
|
1505 |
4750010 |
nAtts++; |
|
1506 |
4750010 |
} |
|
1507 |
break; |
||
1508 |
case BT_APOS: |
||
1509 |
✓✓✓✓ ✓✓ |
10680 |
if (state != inValue) { |
1510 |
✓✓✓✗ ✓✗ |
5340 |
if (nAtts < attsMax) |
1511 |
5300 |
atts[nAtts].valuePtr = ptr + MINBPC(enc); |
|
1512 |
state = inValue; |
||
1513 |
open = BT_APOS; |
||
1514 |
5340 |
} |
|
1515 |
✓✗✓✗ ✓✗ |
5340 |
else if (open == BT_APOS) { |
1516 |
state = other; |
||
1517 |
✓✓✓✗ ✓✗ |
5340 |
if (nAtts < attsMax) |
1518 |
5300 |
atts[nAtts].valueEnd = ptr; |
|
1519 |
5340 |
nAtts++; |
|
1520 |
5340 |
} |
|
1521 |
break; |
||
1522 |
case BT_AMP: |
||
1523 |
✓✗✓✗ ✓✗ |
430 |
if (nAtts < attsMax) |
1524 |
430 |
atts[nAtts].normalized = 0; |
|
1525 |
break; |
||
1526 |
case BT_S: |
||
1527 |
✓✓✓✓ ✓✓ |
9958520 |
if (state == inName) |
1528 |
5352900 |
state = other; |
|
1529 |
✗✓✗✗ ✗✗ |
4605740 |
else if (state == inValue |
1530 |
✓✓✓✓ ✓✗ |
4606430 |
&& nAtts < attsMax |
1531 |
✓✗✓✗ ✓✗ |
1620 |
&& atts[nAtts].normalized |
1532 |
✓✓✗✓ ✗✓ |
1000 |
&& (ptr == atts[nAtts].valuePtr |
1533 |
✓✓✗✗ ✗✗✗✗ ✗✗ |
310 |
|| BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE |
1534 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
240 |
|| BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE |
1535 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
240 |
|| BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) |
1536 |
70 |
atts[nAtts].normalized = 0; |
|
1537 |
break; |
||
1538 |
case BT_CR: case BT_LF: |
||
1539 |
/* This case ensures that the first attribute name is counted |
||
1540 |
Apart from that we could just change state on the quote. */ |
||
1541 |
✓✓✗✗ ✗✗ |
650790 |
if (state == inName) |
1542 |
450120 |
state = other; |
|
1543 |
✓✓✓✗ ✗✗✗✗ ✗✗✗✗ |
200700 |
else if (state == inValue && nAtts < attsMax) |
1544 |
30 |
atts[nAtts].normalized = 0; |
|
1545 |
break; |
||
1546 |
case BT_GT: |
||
1547 |
case BT_SOL: |
||
1548 |
✓✓✓✗ ✓✗ |
17548324 |
if (state != inValue) |
1549 |
16258934 |
return nAtts; |
|
1550 |
break; |
||
1551 |
default: |
||
1552 |
break; |
||
1553 |
} |
||
1554 |
} |
||
1555 |
/* not reached */ |
||
1556 |
} |
||
1557 |
|||
1558 |
static int PTRFASTCALL |
||
1559 |
PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr) |
||
1560 |
{ |
||
1561 |
int result = 0; |
||
1562 |
/* skip &# */ |
||
1563 |
1580 |
ptr += 2*MINBPC(enc); |
|
1564 |
✓✓✓✗ ✓✓✓✗ ✓✓ |
830 |
if (CHAR_MATCHES(enc, ptr, ASCII_x)) { |
1565 |
✓✓✓✓ ✓✓ |
2960 |
for (ptr += MINBPC(enc); |
1566 |
✓✗✓✗ |
1680 |
!CHAR_MATCHES(enc, ptr, ASCII_SEMI); |
1567 |
1090 |
ptr += MINBPC(enc)) { |
|
1568 |
✓✗✓✗ |
320 |
int c = BYTE_TO_ASCII(enc, ptr); |
1569 |
✗✗✗✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✓ ✗✗✗✗ ✗✗✓✗ ✗✗✗✗ ✗✗✗✗ ✓✗✗✗ ✗✗✓✗ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✗✗✗✓ ✗✗✗✗ ✗✓✗✗ ✗✗✗✗ ✓ |
2200 |
switch (c) { |
1570 |
case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: |
||
1571 |
case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: |
||
1572 |
690 |
result <<= 4; |
|
1573 |
690 |
result |= (c - ASCII_0); |
|
1574 |
690 |
break; |
|
1575 |
case ASCII_A: case ASCII_B: case ASCII_C: |
||
1576 |
case ASCII_D: case ASCII_E: case ASCII_F: |
||
1577 |
410 |
result <<= 4; |
|
1578 |
410 |
result += 10 + (c - ASCII_A); |
|
1579 |
410 |
break; |
|
1580 |
case ASCII_a: case ASCII_b: case ASCII_c: |
||
1581 |
case ASCII_d: case ASCII_e: case ASCII_f: |
||
1582 |
result <<= 4; |
||
1583 |
result += 10 + (c - ASCII_a); |
||
1584 |
break; |
||
1585 |
} |
||
1586 |
✓✓✗✓ ✗✓ |
1100 |
if (result >= 0x110000) |
1587 |
10 |
return -1; |
|
1588 |
✓✓✓✗ ✓✗ |
1090 |
} |
1589 |
} |
||
1590 |
else { |
||
1591 |
✓✓✓✗ ✓✓✓✗ ✓✓ |
2860 |
for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { |
1592 |
✓✗✓✗ |
180 |
int c = BYTE_TO_ASCII(enc, ptr); |
1593 |
1160 |
result *= 10; |
|
1594 |
1160 |
result += (c - ASCII_0); |
|
1595 |
✓✓✗✓ ✗✓ |
1160 |
if (result >= 0x110000) |
1596 |
10 |
return -1; |
|
1597 |
✓✓✓✗ ✓✗ |
1150 |
} |
1598 |
} |
||
1599 |
770 |
return checkCharRefNumber(result); |
|
1600 |
790 |
} |
|
1601 |
|||
1602 |
static int PTRCALL |
||
1603 |
PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr, |
||
1604 |
const char *end) |
||
1605 |
{ |
||
1606 |
✓✓✓✓ ✓✗✗✓ ✓✓✗✓ |
10021096 |
switch ((end - ptr)/MINBPC(enc)) { |
1607 |
case 2: |
||
1608 |
✓✓✓✗ ✗✓✓✗ ✗✓ |
4754992 |
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { |
1609 |
✓✓✓✗ ✗✗✗✗ ✗✗✗✗ ✗ |
4750050 |
switch (BYTE_TO_ASCII(enc, ptr)) { |
1610 |
case ASCII_l: |
||
1611 |
2650020 |
return ASCII_LT; |
|
1612 |
case ASCII_g: |
||
1613 |
2100020 |
return ASCII_GT; |
|
1614 |
} |
||
1615 |
} |
||
1616 |
break; |
||
1617 |
case 3: |
||
1618 |
✓✓✗✗ ✗✗✓✗ ✗✓ |
250490 |
if (CHAR_MATCHES(enc, ptr, ASCII_a)) { |
1619 |
250050 |
ptr += MINBPC(enc); |
|
1620 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
250050 |
if (CHAR_MATCHES(enc, ptr, ASCII_m)) { |
1621 |
250050 |
ptr += MINBPC(enc); |
|
1622 |
✓✓✗✗ ✗✗✗✗ ✗✗ |
250050 |
if (CHAR_MATCHES(enc, ptr, ASCII_p)) |
1623 |
250040 |
return ASCII_AMP; |
|
1624 |
} |
||
1625 |
} |
||
1626 |
break; |
||
1627 |
case 4: |
||
1628 |
✓✓✓✗ ✗✗✗✗ ✗✗✗✗ ✗ |
100 |
switch (BYTE_TO_ASCII(enc, ptr)) { |
1629 |
case ASCII_q: |
||
1630 |
30 |
ptr += MINBPC(enc); |
|
1631 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
30 |
if (CHAR_MATCHES(enc, ptr, ASCII_u)) { |
1632 |
30 |
ptr += MINBPC(enc); |
|
1633 |
✓✓✗✗ ✗✗✗✗ ✗✗ |
30 |
if (CHAR_MATCHES(enc, ptr, ASCII_o)) { |
1634 |
20 |
ptr += MINBPC(enc); |
|
1635 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
20 |
if (CHAR_MATCHES(enc, ptr, ASCII_t)) |
1636 |
20 |
return ASCII_QUOT; |
|
1637 |
} |
||
1638 |
} |
||
1639 |
break; |
||
1640 |
case ASCII_a: |
||
1641 |
50 |
ptr += MINBPC(enc); |
|
1642 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
50 |
if (CHAR_MATCHES(enc, ptr, ASCII_p)) { |
1643 |
50 |
ptr += MINBPC(enc); |
|
1644 |
✓✗✗✗ ✗✗✗✗ ✗✗ |
50 |
if (CHAR_MATCHES(enc, ptr, ASCII_o)) { |
1645 |
50 |
ptr += MINBPC(enc); |
|
1646 |
✓✓✗✗ ✗✗✗✗ ✗✗ |
50 |
if (CHAR_MATCHES(enc, ptr, ASCII_s)) |
1647 |
40 |
return ASCII_APOS; |
|
1648 |
} |
||
1649 |
} |
||
1650 |
break; |
||
1651 |
} |
||
1652 |
} |
||
1653 |
7732 |
return 0; |
|
1654 |
5007872 |
} |
|
1655 |
|||
1656 |
/* This function does not appear to be called from anywhere within the |
||
1657 |
* library code. It is used via the macro XmlSameName(), which is |
||
1658 |
* defined but never used. Since it appears in the encoding function |
||
1659 |
* table, removing it is not a thing to be undertaken lightly. For |
||
1660 |
* the moment, we simply exclude it from coverage tests. |
||
1661 |
* |
||
1662 |
* LCOV_EXCL_START |
||
1663 |
*/ |
||
1664 |
static int PTRCALL |
||
1665 |
PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) |
||
1666 |
{ |
||
1667 |
for (;;) { |
||
1668 |
switch (BYTE_TYPE(enc, ptr1)) { |
||
1669 |
#define LEAD_CASE(n) \ |
||
1670 |
case BT_LEAD ## n: \ |
||
1671 |
if (*ptr1++ != *ptr2++) \ |
||
1672 |
return 0; |
||
1673 |
LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) |
||
1674 |
#undef LEAD_CASE |
||
1675 |
/* fall through */ |
||
1676 |
if (*ptr1++ != *ptr2++) |
||
1677 |
return 0; |
||
1678 |
break; |
||
1679 |
case BT_NONASCII: |
||
1680 |
case BT_NMSTRT: |
||
1681 |
#ifdef XML_NS |
||
1682 |
case BT_COLON: |
||
1683 |
#endif |
||
1684 |
case BT_HEX: |
||
1685 |
case BT_DIGIT: |
||
1686 |
case BT_NAME: |
||
1687 |
case BT_MINUS: |
||
1688 |
if (*ptr2++ != *ptr1++) |
||
1689 |
return 0; |
||
1690 |
if (MINBPC(enc) > 1) { |
||
1691 |
if (*ptr2++ != *ptr1++) |
||
1692 |
return 0; |
||
1693 |
if (MINBPC(enc) > 2) { |
||
1694 |
if (*ptr2++ != *ptr1++) |
||
1695 |
return 0; |
||
1696 |
if (MINBPC(enc) > 3) { |
||
1697 |
if (*ptr2++ != *ptr1++) |
||
1698 |
return 0; |
||
1699 |
} |
||
1700 |
} |
||
1701 |
} |
||
1702 |
break; |
||
1703 |
default: |
||
1704 |
if (MINBPC(enc) == 1 && *ptr1 == *ptr2) |
||
1705 |
return 1; |
||
1706 |
switch (BYTE_TYPE(enc, ptr2)) { |
||
1707 |
case BT_LEAD2: |
||
1708 |
case BT_LEAD3: |
||
1709 |
case BT_LEAD4: |
||
1710 |
case BT_NONASCII: |
||
1711 |
case BT_NMSTRT: |
||
1712 |
#ifdef XML_NS |
||
1713 |
case BT_COLON: |
||
1714 |
#endif |
||
1715 |
case BT_HEX: |
||
1716 |
case BT_DIGIT: |
||
1717 |
case BT_NAME: |
||
1718 |
case BT_MINUS: |
||
1719 |
return 0; |
||
1720 |
default: |
||
1721 |
return 1; |
||
1722 |
} |
||
1723 |
} |
||
1724 |
} |
||
1725 |
/* not reached */ |
||
1726 |
} |
||
1727 |
/* LCOV_EXCL_STOP */ |
||
1728 |
|||
1729 |
static int PTRCALL |
||
1730 |
PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1, |
||
1731 |
const char *end1, const char *ptr2) |
||
1732 |
{ |
||
1733 |
✓✓✓✓ ✓✓ |
4224628 |
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { |
1734 |
✗✓✗✓ ✗✓ |
1720940 |
if (end1 - ptr1 < MINBPC(enc)) { |
1735 |
/* This line cannot be executed. THe incoming data has already |
||
1736 |
* been tokenized once, so imcomplete characters like this have |
||
1737 |
* already been eliminated from the input. Retaining the |
||
1738 |
* paranoia check is still valuable, however. |
||
1739 |
*/ |
||
1740 |
return 0; /* LCOV_EXCL_LINE */ |
||
1741 |
} |
||
1742 |
✓✓✓✗ ✓✓✓✓ ✓✓ |
1727110 |
if (!CHAR_MATCHES(enc, ptr1, *ptr2)) |
1743 |
65160 |
return 0; |
|
1744 |
} |
||
1745 |
239196 |
return ptr1 == end1; |
|
1746 |
304356 |
} |
|
1747 |
|||
1748 |
static int PTRFASTCALL |
||
1749 |
PREFIX(nameLength)(const ENCODING *enc, const char *ptr) |
||
1750 |
{ |
||
1751 |
const char *start = ptr; |
||
1752 |
74535948 |
for (;;) { |
|
1753 |
✓✗✗✗ ✗✗✗✗ ✗✓✓✓ ✓✗✗✗ ✗✗✗✗ ✗✗✓✓ ✓✓✗✗ ✗✗✗✗ ✗✗✗✓ ✓ |
279453916 |
switch (BYTE_TYPE(enc, ptr)) { |
1754 |
#define LEAD_CASE(n) \ |
||
1755 |
case BT_LEAD ## n: ptr += n; break; |
||
1756 |
120 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1757 |
#undef LEAD_CASE |
||
1758 |
case BT_NONASCII: |
||
1759 |
case BT_NMSTRT: |
||
1760 |
#ifdef XML_NS |
||
1761 |
case BT_COLON: |
||
1762 |
#endif |
||
1763 |
case BT_HEX: |
||
1764 |
case BT_DIGIT: |
||
1765 |
case BT_NAME: |
||
1766 |
case BT_MINUS: |
||
1767 |
242181342 |
ptr += MINBPC(enc); |
|
1768 |
242181342 |
break; |
|
1769 |
default: |
||
1770 |
37267974 |
return (int)(ptr - start); |
|
1771 |
} |
||
1772 |
} |
||
1773 |
} |
||
1774 |
|||
1775 |
static const char * PTRFASTCALL |
||
1776 |
PREFIX(skipS)(const ENCODING *enc, const char *ptr) |
||
1777 |
{ |
||
1778 |
640 |
for (;;) { |
|
1779 |
✗✗✓✓ ✓✗✗✗ ✗✓✓✗ ✗✗✗✓ |
460 |
switch (BYTE_TYPE(enc, ptr)) { |
1780 |
case BT_LF: |
||
1781 |
case BT_CR: |
||
1782 |
case BT_S: |
||
1783 |
200 |
ptr += MINBPC(enc); |
|
1784 |
break; |
||
1785 |
default: |
||
1786 |
220 |
return ptr; |
|
1787 |
} |
||
1788 |
} |
||
1789 |
} |
||
1790 |
|||
1791 |
static void PTRCALL |
||
1792 |
PREFIX(updatePosition)(const ENCODING *enc, |
||
1793 |
const char *ptr, |
||
1794 |
const char *end, |
||
1795 |
POSITION *pos) |
||
1796 |
{ |
||
1797 |
✓✓✓✓ ✓✓ |
30387366 |
while (HAS_CHAR(enc, ptr, end)) { |
1798 |
✓✓✗✓ ✓✓✓✓ ✗✗✗✓ ✓✓✓✓ ✗✗✓✓ ✗✓ |
4268276 |
switch (BYTE_TYPE(enc, ptr)) { |
1799 |
#define LEAD_CASE(n) \ |
||
1800 |
case BT_LEAD ## n: \ |
||
1801 |
ptr += n; \ |
||
1802 |
break; |
||
1803 |
180 |
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1804 |
#undef LEAD_CASE |
||
1805 |
case BT_LF: |
||
1806 |
22666 |
pos->columnNumber = (XML_Size)-1; |
|
1807 |
22666 |
pos->lineNumber++; |
|
1808 |
22666 |
ptr += MINBPC(enc); |
|
1809 |
22666 |
break; |
|
1810 |
case BT_CR: |
||
1811 |
60 |
pos->lineNumber++; |
|
1812 |
60 |
ptr += MINBPC(enc); |
|
1813 |
✓✗✓✓ ✗✓✗✗ ✗✗✗✗ ✗✗✗✗ |
100 |
if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF) |
1814 |
20 |
ptr += MINBPC(enc); |
|
1815 |
60 |
pos->columnNumber = (XML_Size)-1; |
|
1816 |
60 |
break; |
|
1817 |
default: |
||
1818 |
4205030 |
ptr += MINBPC(enc); |
|
1819 |
4205030 |
break; |
|
1820 |
} |
||
1821 |
4227936 |
pos->columnNumber++; |
|
1822 |
} |
||
1823 |
7310498 |
} |
|
1824 |
|||
1825 |
#undef DO_LEAD_CASE |
||
1826 |
#undef MULTIBYTE_CASES |
||
1827 |
#undef INVALID_CASES |
||
1828 |
#undef CHECK_NAME_CASE |
||
1829 |
#undef CHECK_NAME_CASES |
||
1830 |
#undef CHECK_NMSTRT_CASE |
||
1831 |
#undef CHECK_NMSTRT_CASES |
||
1832 |
|||
1833 |
#endif /* XML_TOK_IMPL_C */ |
Generated by: GCOVR (Version 3.3) |