1 |
|
|
/* $OpenBSD: a_utf8.c,v 1.8 2014/07/11 08:44:47 jsing Exp $ */ |
2 |
|
|
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 |
|
|
* All rights reserved. |
4 |
|
|
* |
5 |
|
|
* This package is an SSL implementation written |
6 |
|
|
* by Eric Young (eay@cryptsoft.com). |
7 |
|
|
* The implementation was written so as to conform with Netscapes SSL. |
8 |
|
|
* |
9 |
|
|
* This library is free for commercial and non-commercial use as long as |
10 |
|
|
* the following conditions are aheared to. The following conditions |
11 |
|
|
* apply to all code found in this distribution, be it the RC4, RSA, |
12 |
|
|
* lhash, DES, etc., code; not just the SSL code. The SSL documentation |
13 |
|
|
* included with this distribution is covered by the same copyright terms |
14 |
|
|
* except that the holder is Tim Hudson (tjh@cryptsoft.com). |
15 |
|
|
* |
16 |
|
|
* Copyright remains Eric Young's, and as such any Copyright notices in |
17 |
|
|
* the code are not to be removed. |
18 |
|
|
* If this package is used in a product, Eric Young should be given attribution |
19 |
|
|
* as the author of the parts of the library used. |
20 |
|
|
* This can be in the form of a textual message at program startup or |
21 |
|
|
* in documentation (online or textual) provided with the package. |
22 |
|
|
* |
23 |
|
|
* Redistribution and use in source and binary forms, with or without |
24 |
|
|
* modification, are permitted provided that the following conditions |
25 |
|
|
* are met: |
26 |
|
|
* 1. Redistributions of source code must retain the copyright |
27 |
|
|
* notice, this list of conditions and the following disclaimer. |
28 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
29 |
|
|
* notice, this list of conditions and the following disclaimer in the |
30 |
|
|
* documentation and/or other materials provided with the distribution. |
31 |
|
|
* 3. All advertising materials mentioning features or use of this software |
32 |
|
|
* must display the following acknowledgement: |
33 |
|
|
* "This product includes cryptographic software written by |
34 |
|
|
* Eric Young (eay@cryptsoft.com)" |
35 |
|
|
* The word 'cryptographic' can be left out if the rouines from the library |
36 |
|
|
* being used are not cryptographic related :-). |
37 |
|
|
* 4. If you include any Windows specific code (or a derivative thereof) from |
38 |
|
|
* the apps directory (application code) you must include an acknowledgement: |
39 |
|
|
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" |
40 |
|
|
* |
41 |
|
|
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND |
42 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
43 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
44 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
45 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
46 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
47 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
48 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
49 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
50 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
51 |
|
|
* SUCH DAMAGE. |
52 |
|
|
* |
53 |
|
|
* The licence and distribution terms for any publically available version or |
54 |
|
|
* derivative of this code cannot be changed. i.e. this code cannot simply be |
55 |
|
|
* copied and put under another distribution licence |
56 |
|
|
* [including the GNU Public Licence.] |
57 |
|
|
*/ |
58 |
|
|
|
59 |
|
|
#include <stdio.h> |
60 |
|
|
|
61 |
|
|
#include <openssl/asn1.h> |
62 |
|
|
|
63 |
|
|
#include "asn1_locl.h" |
64 |
|
|
|
65 |
|
|
/* UTF8 utilities */ |
66 |
|
|
|
67 |
|
|
/* |
68 |
|
|
* This parses a UTF8 string one character at a time. It is passed a pointer |
69 |
|
|
* to the string and the length of the string. It sets 'value' to the value of |
70 |
|
|
* the current character. It returns the number of characters read or a |
71 |
|
|
* negative error code: |
72 |
|
|
* -1 = string too short |
73 |
|
|
* -2 = illegal character |
74 |
|
|
* -3 = subsequent characters not of the form 10xxxxxx |
75 |
|
|
* -4 = character encoded incorrectly (not minimal length). |
76 |
|
|
*/ |
77 |
|
|
|
78 |
|
|
int |
79 |
|
|
UTF8_getc(const unsigned char *str, int len, unsigned long *val) |
80 |
|
|
{ |
81 |
|
|
const unsigned char *p; |
82 |
|
|
unsigned long value; |
83 |
|
|
int ret; |
84 |
✓✓ |
2053843964 |
if (len <= 0) |
85 |
|
6 |
return 0; |
86 |
|
|
p = str; |
87 |
|
|
|
88 |
|
|
/* Check syntax and work out the encoded value (if correct) */ |
89 |
✓✓ |
1026921976 |
if ((*p & 0x80) == 0) { |
90 |
|
936682 |
value = *p++ & 0x7f; |
91 |
|
|
ret = 1; |
92 |
✓✓ |
1026921976 |
} else if ((*p & 0xe0) == 0xc0) { |
93 |
✓✓ |
108876 |
if (*p < 0xc2) |
94 |
|
12 |
return -2; |
95 |
✓✓ |
108864 |
if (len < 2) |
96 |
|
46080 |
return -1; |
97 |
✓✓ |
62784 |
if ((p[1] & 0xc0) != 0x80) |
98 |
|
34560 |
return -3; |
99 |
|
28224 |
value = (*p++ & 0x1f) << 6; |
100 |
|
28224 |
value |= *p++ & 0x3f; |
101 |
✗✓ |
28224 |
if (value < 0x80) |
102 |
|
|
return -4; |
103 |
|
|
ret = 2; |
104 |
✓✓ |
1025904642 |
} else if ((*p & 0xf0) == 0xe0) { |
105 |
✓✓ |
12951552 |
if (len < 3) |
106 |
|
6291456 |
return -1; |
107 |
✓✓✓✓
|
8601600 |
if (((p[1] & 0xc0) != 0x80) || |
108 |
|
1941504 |
((p[2] & 0xc0) != 0x80)) |
109 |
|
5898240 |
return -3; |
110 |
|
761856 |
value = (*p++ & 0xf) << 12; |
111 |
|
761856 |
value |= (*p++ & 0x3f) << 6; |
112 |
|
761856 |
value |= *p++ & 0x3f; |
113 |
✓✓ |
761856 |
if (value < 0x800) |
114 |
|
12288 |
return -4; |
115 |
|
|
/* surrogate pair code points are not valid */ |
116 |
✓✓ |
749568 |
if (value >= 0xd800 && value < 0xe000) |
117 |
|
12288 |
return -2; |
118 |
|
|
ret = 3; |
119 |
✓✓✓✓
|
2026586580 |
} else if ((*p & 0xf8) == 0xf0 && (*p < 0xf5)) { |
120 |
✓✓ |
1012924416 |
if (len < 4) |
121 |
|
503316480 |
return -1; |
122 |
✓✓✓✓
|
547356672 |
if (((p[1] & 0xc0) != 0x80) || |
123 |
✓✓ |
132120576 |
((p[2] & 0xc0) != 0x80) || |
124 |
|
37748736 |
((p[3] & 0xc0) != 0x80)) |
125 |
|
495452160 |
return -3; |
126 |
|
14155776 |
value = ((unsigned long)(*p++ & 0x7)) << 18; |
127 |
|
14155776 |
value |= (*p++ & 0x3f) << 12; |
128 |
|
14155776 |
value |= (*p++ & 0x3f) << 6; |
129 |
|
14155776 |
value |= *p++ & 0x3f; |
130 |
✓✓ |
14155776 |
if (value < 0x10000) |
131 |
|
393216 |
return -4; |
132 |
✓✓ |
13762560 |
if (value > UNICODE_MAX) |
133 |
|
1179648 |
return -2; |
134 |
|
|
ret = 4; |
135 |
|
|
} else |
136 |
|
450 |
return -2; |
137 |
|
14285098 |
*val = value; |
138 |
|
14285098 |
return ret; |
139 |
|
1026921982 |
} |
140 |
|
|
|
141 |
|
|
/* This takes a Unicode code point 'value' and writes its UTF-8 encoded form |
142 |
|
|
* in 'str' where 'str' is a buffer of at least length 'len'. If 'str' |
143 |
|
|
* is NULL, then nothing is written and just the return code is determined. |
144 |
|
|
|
145 |
|
|
* Returns less than zero on error: |
146 |
|
|
* -1 if 'str' is not NULL and 'len' is too small |
147 |
|
|
* -2 if 'value' is an invalid character (surrogate or out-of-range) |
148 |
|
|
* |
149 |
|
|
* Otherwise, returns the number of bytes in 'value's encoded form |
150 |
|
|
* (i.e., the number of bytes written to 'str' when it's not NULL). |
151 |
|
|
* |
152 |
|
|
* It will need at most 4 characters. |
153 |
|
|
*/ |
154 |
|
|
|
155 |
|
|
int |
156 |
|
|
UTF8_putc(unsigned char *str, int len, unsigned long value) |
157 |
|
|
{ |
158 |
✓✓ |
46371220 |
if (value < 0x80) { |
159 |
✓✓ |
3155408 |
if (str != NULL) { |
160 |
✓✓ |
1578104 |
if (len < 1) |
161 |
|
768 |
return -1; |
162 |
|
1577336 |
str[0] = (unsigned char)value; |
163 |
|
1577336 |
} |
164 |
|
3154640 |
return 1; |
165 |
|
|
} |
166 |
✓✓ |
20030202 |
if (value < 0x800) { |
167 |
✓✓ |
34560 |
if (str != NULL) { |
168 |
✓✓ |
23040 |
if (len < 2) |
169 |
|
11520 |
return -1; |
170 |
|
11520 |
str[0] = (unsigned char)(((value >> 6) & 0x1f) | 0xc0); |
171 |
|
11520 |
str[1] = (unsigned char)((value & 0x3f) | 0x80); |
172 |
|
11520 |
} |
173 |
|
23040 |
return 2; |
174 |
|
|
} |
175 |
✓✓ |
19995642 |
if (value < 0x10000) { |
176 |
✓✓ |
1118208 |
if (UNICODE_IS_SURROGATE(value)) |
177 |
|
12288 |
return -2; |
178 |
✓✓ |
1105920 |
if (str != NULL) { |
179 |
✓✓ |
737280 |
if (len < 3) |
180 |
|
368640 |
return -1; |
181 |
|
368640 |
str[0] = (unsigned char)(((value >> 12) & 0xf) | 0xe0); |
182 |
|
368640 |
str[1] = (unsigned char)(((value >> 6) & 0x3f) | 0x80); |
183 |
|
368640 |
str[2] = (unsigned char)((value & 0x3f) | 0x80); |
184 |
|
368640 |
} |
185 |
|
737280 |
return 3; |
186 |
|
|
} |
187 |
✓✓ |
18877434 |
if (value <= UNICODE_MAX) { |
188 |
✓✓ |
18874368 |
if (str != NULL) { |
189 |
✓✓ |
12582912 |
if (len < 4) |
190 |
|
6291456 |
return -1; |
191 |
|
6291456 |
str[0] = (unsigned char)(((value >> 18) & 0x7) | 0xf0); |
192 |
|
6291456 |
str[1] = (unsigned char)(((value >> 12) & 0x3f) | 0x80); |
193 |
|
6291456 |
str[2] = (unsigned char)(((value >> 6) & 0x3f) | 0x80); |
194 |
|
6291456 |
str[3] = (unsigned char)((value & 0x3f) | 0x80); |
195 |
|
6291456 |
} |
196 |
|
12582912 |
return 4; |
197 |
|
|
} |
198 |
|
3066 |
return -2; |
199 |
|
23185610 |
} |