1 |
|
|
/* $OpenBSD: a_utf8.c,v 1.8 2014/07/11 08:44:47 jsing Exp $ */ |
2 |
|
|
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 |
|
|
* All rights reserved. |
4 |
|
|
* |
5 |
|
|
* This package is an SSL implementation written |
6 |
|
|
* by Eric Young (eay@cryptsoft.com). |
7 |
|
|
* The implementation was written so as to conform with Netscapes SSL. |
8 |
|
|
* |
9 |
|
|
* This library is free for commercial and non-commercial use as long as |
10 |
|
|
* the following conditions are aheared to. The following conditions |
11 |
|
|
* apply to all code found in this distribution, be it the RC4, RSA, |
12 |
|
|
* lhash, DES, etc., code; not just the SSL code. The SSL documentation |
13 |
|
|
* included with this distribution is covered by the same copyright terms |
14 |
|
|
* except that the holder is Tim Hudson (tjh@cryptsoft.com). |
15 |
|
|
* |
16 |
|
|
* Copyright remains Eric Young's, and as such any Copyright notices in |
17 |
|
|
* the code are not to be removed. |
18 |
|
|
* If this package is used in a product, Eric Young should be given attribution |
19 |
|
|
* as the author of the parts of the library used. |
20 |
|
|
* This can be in the form of a textual message at program startup or |
21 |
|
|
* in documentation (online or textual) provided with the package. |
22 |
|
|
* |
23 |
|
|
* Redistribution and use in source and binary forms, with or without |
24 |
|
|
* modification, are permitted provided that the following conditions |
25 |
|
|
* are met: |
26 |
|
|
* 1. Redistributions of source code must retain the copyright |
27 |
|
|
* notice, this list of conditions and the following disclaimer. |
28 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
29 |
|
|
* notice, this list of conditions and the following disclaimer in the |
30 |
|
|
* documentation and/or other materials provided with the distribution. |
31 |
|
|
* 3. All advertising materials mentioning features or use of this software |
32 |
|
|
* must display the following acknowledgement: |
33 |
|
|
* "This product includes cryptographic software written by |
34 |
|
|
* Eric Young (eay@cryptsoft.com)" |
35 |
|
|
* The word 'cryptographic' can be left out if the rouines from the library |
36 |
|
|
* being used are not cryptographic related :-). |
37 |
|
|
* 4. If you include any Windows specific code (or a derivative thereof) from |
38 |
|
|
* the apps directory (application code) you must include an acknowledgement: |
39 |
|
|
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" |
40 |
|
|
* |
41 |
|
|
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND |
42 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
43 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
44 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
45 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
46 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
47 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
48 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
49 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
50 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
51 |
|
|
* SUCH DAMAGE. |
52 |
|
|
* |
53 |
|
|
* The licence and distribution terms for any publically available version or |
54 |
|
|
* derivative of this code cannot be changed. i.e. this code cannot simply be |
55 |
|
|
* copied and put under another distribution licence |
56 |
|
|
* [including the GNU Public Licence.] |
57 |
|
|
*/ |
58 |
|
|
|
59 |
|
|
#include <stdio.h> |
60 |
|
|
|
61 |
|
|
#include <openssl/asn1.h> |
62 |
|
|
|
63 |
|
|
#include "asn1_locl.h" |
64 |
|
|
|
65 |
|
|
/* UTF8 utilities */ |
66 |
|
|
|
67 |
|
|
/* |
68 |
|
|
* This parses a UTF8 string one character at a time. It is passed a pointer |
69 |
|
|
* to the string and the length of the string. It sets 'value' to the value of |
70 |
|
|
* the current character. It returns the number of characters read or a |
71 |
|
|
* negative error code: |
72 |
|
|
* -1 = string too short |
73 |
|
|
* -2 = illegal character |
74 |
|
|
* -3 = subsequent characters not of the form 10xxxxxx |
75 |
|
|
* -4 = character encoded incorrectly (not minimal length). |
76 |
|
|
*/ |
77 |
|
|
|
78 |
|
|
int |
79 |
|
|
UTF8_getc(const unsigned char *str, int len, unsigned long *val) |
80 |
|
|
{ |
81 |
|
|
const unsigned char *p; |
82 |
|
|
unsigned long value; |
83 |
|
|
int ret; |
84 |
✓✓ |
1027752708 |
if (len <= 0) |
85 |
|
3 |
return 0; |
86 |
|
|
p = str; |
87 |
|
|
|
88 |
|
|
/* Check syntax and work out the encoded value (if correct) */ |
89 |
✓✓ |
513876351 |
if ((*p & 0x80) == 0) { |
90 |
|
882168 |
value = *p++ & 0x7f; |
91 |
|
|
ret = 1; |
92 |
✓✓ |
513876351 |
} else if ((*p & 0xe0) == 0xc0) { |
93 |
✓✓ |
55974 |
if (*p < 0xc2) |
94 |
|
6 |
return -2; |
95 |
✓✓ |
55968 |
if (len < 2) |
96 |
|
23040 |
return -1; |
97 |
✓✓ |
32928 |
if ((p[1] & 0xc0) != 0x80) |
98 |
|
17280 |
return -3; |
99 |
|
15648 |
value = (*p++ & 0x1f) << 6; |
100 |
|
15648 |
value |= *p++ & 0x3f; |
101 |
✗✓ |
15648 |
if (value < 0x80) |
102 |
|
|
return -4; |
103 |
|
|
ret = 2; |
104 |
✓✓ |
512953857 |
} else if ((*p & 0xf0) == 0xe0) { |
105 |
✓✓ |
6475776 |
if (len < 3) |
106 |
|
3145728 |
return -1; |
107 |
✓✓✓✓
|
4300800 |
if (((p[1] & 0xc0) != 0x80) || |
108 |
|
970752 |
((p[2] & 0xc0) != 0x80)) |
109 |
|
2949120 |
return -3; |
110 |
|
380928 |
value = (*p++ & 0xf) << 12; |
111 |
|
380928 |
value |= (*p++ & 0x3f) << 6; |
112 |
|
380928 |
value |= *p++ & 0x3f; |
113 |
✓✓ |
380928 |
if (value < 0x800) |
114 |
|
6144 |
return -4; |
115 |
|
|
/* surrogate pair code points are not valid */ |
116 |
✓✓ |
374784 |
if (value >= 0xd800 && value < 0xe000) |
117 |
|
6144 |
return -2; |
118 |
|
|
ret = 3; |
119 |
✓✓✓✓
|
1013293290 |
} else if ((*p & 0xf8) == 0xf0 && (*p < 0xf5)) { |
120 |
✓✓ |
506462208 |
if (len < 4) |
121 |
|
251658240 |
return -1; |
122 |
✓✓✓✓
|
273678336 |
if (((p[1] & 0xc0) != 0x80) || |
123 |
✓✓ |
66060288 |
((p[2] & 0xc0) != 0x80) || |
124 |
|
18874368 |
((p[3] & 0xc0) != 0x80)) |
125 |
|
247726080 |
return -3; |
126 |
|
7077888 |
value = ((unsigned long)(*p++ & 0x7)) << 18; |
127 |
|
7077888 |
value |= (*p++ & 0x3f) << 12; |
128 |
|
7077888 |
value |= (*p++ & 0x3f) << 6; |
129 |
|
7077888 |
value |= *p++ & 0x3f; |
130 |
✓✓ |
7077888 |
if (value < 0x10000) |
131 |
|
196608 |
return -4; |
132 |
✓✓ |
6881280 |
if (value > UNICODE_MAX) |
133 |
|
589824 |
return -2; |
134 |
|
|
ret = 4; |
135 |
|
|
} else |
136 |
|
225 |
return -2; |
137 |
|
7557912 |
*val = value; |
138 |
|
7557912 |
return ret; |
139 |
|
513876354 |
} |
140 |
|
|
|
141 |
|
|
/* This takes a Unicode code point 'value' and writes its UTF-8 encoded form |
142 |
|
|
* in 'str' where 'str' is a buffer of at least length 'len'. If 'str' |
143 |
|
|
* is NULL, then nothing is written and just the return code is determined. |
144 |
|
|
|
145 |
|
|
* Returns less than zero on error: |
146 |
|
|
* -1 if 'str' is not NULL and 'len' is too small |
147 |
|
|
* -2 if 'value' is an invalid character (surrogate or out-of-range) |
148 |
|
|
* |
149 |
|
|
* Otherwise, returns the number of bytes in 'value's encoded form |
150 |
|
|
* (i.e., the number of bytes written to 'str' when it's not NULL). |
151 |
|
|
* |
152 |
|
|
* It will need at most 4 characters. |
153 |
|
|
*/ |
154 |
|
|
|
155 |
|
|
int |
156 |
|
|
UTF8_putc(unsigned char *str, int len, unsigned long value) |
157 |
|
|
{ |
158 |
✓✓ |
25065142 |
if (value < 0x80) { |
159 |
✓✓ |
2517470 |
if (str != NULL) { |
160 |
✓✓ |
1258943 |
if (len < 1) |
161 |
|
384 |
return -1; |
162 |
|
1258559 |
str[0] = (unsigned char)value; |
163 |
|
1258559 |
} |
164 |
|
2517086 |
return 1; |
165 |
|
|
} |
166 |
✓✓ |
10015101 |
if (value < 0x800) { |
167 |
✓✓ |
17280 |
if (str != NULL) { |
168 |
✓✓ |
11520 |
if (len < 2) |
169 |
|
5760 |
return -1; |
170 |
|
5760 |
str[0] = (unsigned char)(((value >> 6) & 0x1f) | 0xc0); |
171 |
|
5760 |
str[1] = (unsigned char)((value & 0x3f) | 0x80); |
172 |
|
5760 |
} |
173 |
|
11520 |
return 2; |
174 |
|
|
} |
175 |
✓✓ |
9997821 |
if (value < 0x10000) { |
176 |
✓✓ |
559104 |
if (UNICODE_IS_SURROGATE(value)) |
177 |
|
6144 |
return -2; |
178 |
✓✓ |
552960 |
if (str != NULL) { |
179 |
✓✓ |
368640 |
if (len < 3) |
180 |
|
184320 |
return -1; |
181 |
|
184320 |
str[0] = (unsigned char)(((value >> 12) & 0xf) | 0xe0); |
182 |
|
184320 |
str[1] = (unsigned char)(((value >> 6) & 0x3f) | 0x80); |
183 |
|
184320 |
str[2] = (unsigned char)((value & 0x3f) | 0x80); |
184 |
|
184320 |
} |
185 |
|
368640 |
return 3; |
186 |
|
|
} |
187 |
✓✓ |
9438717 |
if (value <= UNICODE_MAX) { |
188 |
✓✓ |
9437184 |
if (str != NULL) { |
189 |
✓✓ |
6291456 |
if (len < 4) |
190 |
|
3145728 |
return -1; |
191 |
|
3145728 |
str[0] = (unsigned char)(((value >> 18) & 0x7) | 0xf0); |
192 |
|
3145728 |
str[1] = (unsigned char)(((value >> 12) & 0x3f) | 0x80); |
193 |
|
3145728 |
str[2] = (unsigned char)(((value >> 6) & 0x3f) | 0x80); |
194 |
|
3145728 |
str[3] = (unsigned char)((value & 0x3f) | 0x80); |
195 |
|
3145728 |
} |
196 |
|
6291456 |
return 4; |
197 |
|
|
} |
198 |
|
1533 |
return -2; |
199 |
|
12532571 |
} |