1  | 
     | 
     | 
    /* $OpenBSD: a_utf8.c,v 1.8 2014/07/11 08:44:47 jsing Exp $ */  | 
    
    
    2  | 
     | 
     | 
    /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)  | 
    
    
    3  | 
     | 
     | 
     * All rights reserved.  | 
    
    
    4  | 
     | 
     | 
     *  | 
    
    
    5  | 
     | 
     | 
     * This package is an SSL implementation written  | 
    
    
    6  | 
     | 
     | 
     * by Eric Young (eay@cryptsoft.com).  | 
    
    
    7  | 
     | 
     | 
     * The implementation was written so as to conform with Netscapes SSL.  | 
    
    
    8  | 
     | 
     | 
     *  | 
    
    
    9  | 
     | 
     | 
     * This library is free for commercial and non-commercial use as long as  | 
    
    
    10  | 
     | 
     | 
     * the following conditions are aheared to.  The following conditions  | 
    
    
    11  | 
     | 
     | 
     * apply to all code found in this distribution, be it the RC4, RSA,  | 
    
    
    12  | 
     | 
     | 
     * lhash, DES, etc., code; not just the SSL code.  The SSL documentation  | 
    
    
    13  | 
     | 
     | 
     * included with this distribution is covered by the same copyright terms  | 
    
    
    14  | 
     | 
     | 
     * except that the holder is Tim Hudson (tjh@cryptsoft.com).  | 
    
    
    15  | 
     | 
     | 
     *  | 
    
    
    16  | 
     | 
     | 
     * Copyright remains Eric Young's, and as such any Copyright notices in  | 
    
    
    17  | 
     | 
     | 
     * the code are not to be removed.  | 
    
    
    18  | 
     | 
     | 
     * If this package is used in a product, Eric Young should be given attribution  | 
    
    
    19  | 
     | 
     | 
     * as the author of the parts of the library used.  | 
    
    
    20  | 
     | 
     | 
     * This can be in the form of a textual message at program startup or  | 
    
    
    21  | 
     | 
     | 
     * in documentation (online or textual) provided with the package.  | 
    
    
    22  | 
     | 
     | 
     *  | 
    
    
    23  | 
     | 
     | 
     * Redistribution and use in source and binary forms, with or without  | 
    
    
    24  | 
     | 
     | 
     * modification, are permitted provided that the following conditions  | 
    
    
    25  | 
     | 
     | 
     * are met:  | 
    
    
    26  | 
     | 
     | 
     * 1. Redistributions of source code must retain the copyright  | 
    
    
    27  | 
     | 
     | 
     *    notice, this list of conditions and the following disclaimer.  | 
    
    
    28  | 
     | 
     | 
     * 2. Redistributions in binary form must reproduce the above copyright  | 
    
    
    29  | 
     | 
     | 
     *    notice, this list of conditions and the following disclaimer in the  | 
    
    
    30  | 
     | 
     | 
     *    documentation and/or other materials provided with the distribution.  | 
    
    
    31  | 
     | 
     | 
     * 3. All advertising materials mentioning features or use of this software  | 
    
    
    32  | 
     | 
     | 
     *    must display the following acknowledgement:  | 
    
    
    33  | 
     | 
     | 
     *    "This product includes cryptographic software written by  | 
    
    
    34  | 
     | 
     | 
     *     Eric Young (eay@cryptsoft.com)"  | 
    
    
    35  | 
     | 
     | 
     *    The word 'cryptographic' can be left out if the rouines from the library  | 
    
    
    36  | 
     | 
     | 
     *    being used are not cryptographic related :-).  | 
    
    
    37  | 
     | 
     | 
     * 4. If you include any Windows specific code (or a derivative thereof) from  | 
    
    
    38  | 
     | 
     | 
     *    the apps directory (application code) you must include an acknowledgement:  | 
    
    
    39  | 
     | 
     | 
     *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"  | 
    
    
    40  | 
     | 
     | 
     *  | 
    
    
    41  | 
     | 
     | 
     * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND  | 
    
    
    42  | 
     | 
     | 
     * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE  | 
    
    
    43  | 
     | 
     | 
     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE  | 
    
    
    44  | 
     | 
     | 
     * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE  | 
    
    
    45  | 
     | 
     | 
     * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  | 
    
    
    46  | 
     | 
     | 
     * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS  | 
    
    
    47  | 
     | 
     | 
     * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)  | 
    
    
    48  | 
     | 
     | 
     * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT  | 
    
    
    49  | 
     | 
     | 
     * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY  | 
    
    
    50  | 
     | 
     | 
     * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF  | 
    
    
    51  | 
     | 
     | 
     * SUCH DAMAGE.  | 
    
    
    52  | 
     | 
     | 
     *  | 
    
    
    53  | 
     | 
     | 
     * The licence and distribution terms for any publically available version or  | 
    
    
    54  | 
     | 
     | 
     * derivative of this code cannot be changed.  i.e. this code cannot simply be  | 
    
    
    55  | 
     | 
     | 
     * copied and put under another distribution licence  | 
    
    
    56  | 
     | 
     | 
     * [including the GNU Public Licence.]  | 
    
    
    57  | 
     | 
     | 
     */  | 
    
    
    58  | 
     | 
     | 
     | 
    
    
    59  | 
     | 
     | 
    #include <stdio.h>  | 
    
    
    60  | 
     | 
     | 
     | 
    
    
    61  | 
     | 
     | 
    #include <openssl/asn1.h>  | 
    
    
    62  | 
     | 
     | 
     | 
    
    
    63  | 
     | 
     | 
    #include "asn1_locl.h"  | 
    
    
    64  | 
     | 
     | 
     | 
    
    
    65  | 
     | 
     | 
    /* UTF8 utilities */  | 
    
    
    66  | 
     | 
     | 
     | 
    
    
    67  | 
     | 
     | 
    /*  | 
    
    
    68  | 
     | 
     | 
     * This parses a UTF8 string one character at a time. It is passed a pointer  | 
    
    
    69  | 
     | 
     | 
     * to the string and the length of the string. It sets 'value' to the value of  | 
    
    
    70  | 
     | 
     | 
     * the current character. It returns the number of characters read or a  | 
    
    
    71  | 
     | 
     | 
     * negative error code:  | 
    
    
    72  | 
     | 
     | 
     * -1 = string too short  | 
    
    
    73  | 
     | 
     | 
     * -2 = illegal character  | 
    
    
    74  | 
     | 
     | 
     * -3 = subsequent characters not of the form 10xxxxxx  | 
    
    
    75  | 
     | 
     | 
     * -4 = character encoded incorrectly (not minimal length).  | 
    
    
    76  | 
     | 
     | 
     */  | 
    
    
    77  | 
     | 
     | 
     | 
    
    
    78  | 
     | 
     | 
    int  | 
    
    
    79  | 
     | 
     | 
    UTF8_getc(const unsigned char *str, int len, unsigned long *val)  | 
    
    
    80  | 
     | 
     | 
    { | 
    
    
    81  | 
     | 
     | 
    	const unsigned char *p;  | 
    
    
    82  | 
     | 
     | 
    	unsigned long value;  | 
    
    
    83  | 
     | 
     | 
    	int ret;  | 
    
    
    84  | 
    ✓✓ | 
    2053843964  | 
    	if (len <= 0)  | 
    
    
    85  | 
     | 
    6  | 
    		return 0;  | 
    
    
    86  | 
     | 
     | 
    	p = str;  | 
    
    
    87  | 
     | 
     | 
     | 
    
    
    88  | 
     | 
     | 
    	/* Check syntax and work out the encoded value (if correct) */  | 
    
    
    89  | 
    ✓✓ | 
    1026921976  | 
    	if ((*p & 0x80) == 0) { | 
    
    
    90  | 
     | 
    936682  | 
    		value = *p++ & 0x7f;  | 
    
    
    91  | 
     | 
     | 
    		ret = 1;  | 
    
    
    92  | 
    ✓✓ | 
    1026921976  | 
    	} else if ((*p & 0xe0) == 0xc0) { | 
    
    
    93  | 
    ✓✓ | 
    108876  | 
    		if (*p < 0xc2)  | 
    
    
    94  | 
     | 
    12  | 
    			return -2;  | 
    
    
    95  | 
    ✓✓ | 
    108864  | 
    		if (len < 2)  | 
    
    
    96  | 
     | 
    46080  | 
    			return -1;  | 
    
    
    97  | 
    ✓✓ | 
    62784  | 
    		if ((p[1] & 0xc0) != 0x80)  | 
    
    
    98  | 
     | 
    34560  | 
    			return -3;  | 
    
    
    99  | 
     | 
    28224  | 
    		value = (*p++ & 0x1f) << 6;  | 
    
    
    100  | 
     | 
    28224  | 
    		value |= *p++ & 0x3f;  | 
    
    
    101  | 
    ✗✓ | 
    28224  | 
    		if (value < 0x80)  | 
    
    
    102  | 
     | 
     | 
    			return -4;  | 
    
    
    103  | 
     | 
     | 
    		ret = 2;  | 
    
    
    104  | 
    ✓✓ | 
    1025904642  | 
    	} else if ((*p & 0xf0) == 0xe0) { | 
    
    
    105  | 
    ✓✓ | 
    12951552  | 
    		if (len < 3)  | 
    
    
    106  | 
     | 
    6291456  | 
    			return -1;  | 
    
    
    107  | 
    ✓✓✓✓
  | 
    8601600  | 
    		if (((p[1] & 0xc0) != 0x80) ||  | 
    
    
    108  | 
     | 
    1941504  | 
    		    ((p[2] & 0xc0) != 0x80))  | 
    
    
    109  | 
     | 
    5898240  | 
    			return -3;  | 
    
    
    110  | 
     | 
    761856  | 
    		value = (*p++ & 0xf) << 12;  | 
    
    
    111  | 
     | 
    761856  | 
    		value |= (*p++ & 0x3f) << 6;  | 
    
    
    112  | 
     | 
    761856  | 
    		value |= *p++ & 0x3f;  | 
    
    
    113  | 
    ✓✓ | 
    761856  | 
    		if (value < 0x800)  | 
    
    
    114  | 
     | 
    12288  | 
    			return -4;  | 
    
    
    115  | 
     | 
     | 
    		/* surrogate pair code points are not valid */  | 
    
    
    116  | 
    ✓✓ | 
    749568  | 
    		if (value >= 0xd800 && value < 0xe000)  | 
    
    
    117  | 
     | 
    12288  | 
    			return -2;  | 
    
    
    118  | 
     | 
     | 
    		ret = 3;  | 
    
    
    119  | 
    ✓✓✓✓
  | 
    2026586580  | 
    	} else if ((*p & 0xf8) == 0xf0 && (*p < 0xf5)) { | 
    
    
    120  | 
    ✓✓ | 
    1012924416  | 
    		if (len < 4)  | 
    
    
    121  | 
     | 
    503316480  | 
    			return -1;  | 
    
    
    122  | 
    ✓✓✓✓
  | 
    547356672  | 
    		if (((p[1] & 0xc0) != 0x80) ||  | 
    
    
    123  | 
    ✓✓ | 
    132120576  | 
    		    ((p[2] & 0xc0) != 0x80) ||  | 
    
    
    124  | 
     | 
    37748736  | 
    		    ((p[3] & 0xc0) != 0x80))  | 
    
    
    125  | 
     | 
    495452160  | 
    			return -3;  | 
    
    
    126  | 
     | 
    14155776  | 
    		value = ((unsigned long)(*p++ & 0x7)) << 18;  | 
    
    
    127  | 
     | 
    14155776  | 
    		value |= (*p++ & 0x3f) << 12;  | 
    
    
    128  | 
     | 
    14155776  | 
    		value |= (*p++ & 0x3f) << 6;  | 
    
    
    129  | 
     | 
    14155776  | 
    		value |= *p++ & 0x3f;  | 
    
    
    130  | 
    ✓✓ | 
    14155776  | 
    		if (value < 0x10000)  | 
    
    
    131  | 
     | 
    393216  | 
    			return -4;  | 
    
    
    132  | 
    ✓✓ | 
    13762560  | 
    		if (value > UNICODE_MAX)  | 
    
    
    133  | 
     | 
    1179648  | 
    			return -2;  | 
    
    
    134  | 
     | 
     | 
    		ret = 4;  | 
    
    
    135  | 
     | 
     | 
    	} else  | 
    
    
    136  | 
     | 
    450  | 
    		return -2;  | 
    
    
    137  | 
     | 
    14285098  | 
    	*val = value;  | 
    
    
    138  | 
     | 
    14285098  | 
    	return ret;  | 
    
    
    139  | 
     | 
    1026921982  | 
    }  | 
    
    
    140  | 
     | 
     | 
     | 
    
    
    141  | 
     | 
     | 
    /* This takes a Unicode code point 'value' and writes its UTF-8 encoded form  | 
    
    
    142  | 
     | 
     | 
     * in 'str' where 'str' is a buffer of at least length 'len'.  If 'str'  | 
    
    
    143  | 
     | 
     | 
     * is NULL, then nothing is written and just the return code is determined.  | 
    
    
    144  | 
     | 
     | 
     | 
    
    
    145  | 
     | 
     | 
     * Returns less than zero on error:  | 
    
    
    146  | 
     | 
     | 
     *  -1 if 'str' is not NULL and 'len' is too small  | 
    
    
    147  | 
     | 
     | 
     *  -2 if 'value' is an invalid character (surrogate or out-of-range)  | 
    
    
    148  | 
     | 
     | 
     *  | 
    
    
    149  | 
     | 
     | 
     * Otherwise, returns the number of bytes in 'value's encoded form  | 
    
    
    150  | 
     | 
     | 
     * (i.e., the number of bytes written to 'str' when it's not NULL).  | 
    
    
    151  | 
     | 
     | 
     *  | 
    
    
    152  | 
     | 
     | 
     * It will need at most 4 characters.  | 
    
    
    153  | 
     | 
     | 
     */  | 
    
    
    154  | 
     | 
     | 
     | 
    
    
    155  | 
     | 
     | 
    int  | 
    
    
    156  | 
     | 
     | 
    UTF8_putc(unsigned char *str, int len, unsigned long value)  | 
    
    
    157  | 
     | 
     | 
    { | 
    
    
    158  | 
    ✓✓ | 
    46371220  | 
    	if (value < 0x80) { | 
    
    
    159  | 
    ✓✓ | 
    3155408  | 
    		if (str != NULL) { | 
    
    
    160  | 
    ✓✓ | 
    1578104  | 
    			if (len < 1)  | 
    
    
    161  | 
     | 
    768  | 
    				return -1;  | 
    
    
    162  | 
     | 
    1577336  | 
    			str[0] = (unsigned char)value;  | 
    
    
    163  | 
     | 
    1577336  | 
    		}  | 
    
    
    164  | 
     | 
    3154640  | 
    		return 1;  | 
    
    
    165  | 
     | 
     | 
    	}  | 
    
    
    166  | 
    ✓✓ | 
    20030202  | 
    	if (value < 0x800) { | 
    
    
    167  | 
    ✓✓ | 
    34560  | 
    		if (str != NULL) { | 
    
    
    168  | 
    ✓✓ | 
    23040  | 
    			if (len < 2)  | 
    
    
    169  | 
     | 
    11520  | 
    				return -1;  | 
    
    
    170  | 
     | 
    11520  | 
    			str[0] = (unsigned char)(((value >> 6) & 0x1f) | 0xc0);  | 
    
    
    171  | 
     | 
    11520  | 
    			str[1] = (unsigned char)((value & 0x3f) | 0x80);  | 
    
    
    172  | 
     | 
    11520  | 
    		}  | 
    
    
    173  | 
     | 
    23040  | 
    		return 2;  | 
    
    
    174  | 
     | 
     | 
    	}  | 
    
    
    175  | 
    ✓✓ | 
    19995642  | 
    	if (value < 0x10000) { | 
    
    
    176  | 
    ✓✓ | 
    1118208  | 
    		if (UNICODE_IS_SURROGATE(value))  | 
    
    
    177  | 
     | 
    12288  | 
    			return -2;  | 
    
    
    178  | 
    ✓✓ | 
    1105920  | 
    		if (str != NULL) { | 
    
    
    179  | 
    ✓✓ | 
    737280  | 
    			if (len < 3)  | 
    
    
    180  | 
     | 
    368640  | 
    				return -1;  | 
    
    
    181  | 
     | 
    368640  | 
    			str[0] = (unsigned char)(((value >> 12) & 0xf) | 0xe0);  | 
    
    
    182  | 
     | 
    368640  | 
    			str[1] = (unsigned char)(((value >> 6) & 0x3f) | 0x80);  | 
    
    
    183  | 
     | 
    368640  | 
    			str[2] = (unsigned char)((value & 0x3f) | 0x80);  | 
    
    
    184  | 
     | 
    368640  | 
    		}  | 
    
    
    185  | 
     | 
    737280  | 
    		return 3;  | 
    
    
    186  | 
     | 
     | 
    	}  | 
    
    
    187  | 
    ✓✓ | 
    18877434  | 
    	if (value <= UNICODE_MAX) { | 
    
    
    188  | 
    ✓✓ | 
    18874368  | 
    		if (str != NULL) { | 
    
    
    189  | 
    ✓✓ | 
    12582912  | 
    			if (len < 4)  | 
    
    
    190  | 
     | 
    6291456  | 
    				return -1;  | 
    
    
    191  | 
     | 
    6291456  | 
    			str[0] = (unsigned char)(((value >> 18) & 0x7) | 0xf0);  | 
    
    
    192  | 
     | 
    6291456  | 
    			str[1] = (unsigned char)(((value >> 12) & 0x3f) | 0x80);  | 
    
    
    193  | 
     | 
    6291456  | 
    			str[2] = (unsigned char)(((value >> 6) & 0x3f) | 0x80);  | 
    
    
    194  | 
     | 
    6291456  | 
    			str[3] = (unsigned char)((value & 0x3f) | 0x80);  | 
    
    
    195  | 
     | 
    6291456  | 
    		}  | 
    
    
    196  | 
     | 
    12582912  | 
    		return 4;  | 
    
    
    197  | 
     | 
     | 
    	}  | 
    
    
    198  | 
     | 
    3066  | 
    	return -2;  | 
    
    
    199  | 
     | 
    23185610  | 
    }  |