@@ -1052,31 +1052,18 @@ static const char * const hangul_syllables[][3] = {
10521052 { 0 , 0 , "H" }
10531053};
10541054
1055- /* These ranges need to match makeunicodedata.py:cjk_ranges. */
10561055static int
1057- is_cjk_unified_ideograph (Py_UCS4 code )
1058- {
1059- return
1060- (0x3400 <= code && code <= 0x4DBF ) || /* CJK Ideograph Extension A */
1061- (0x4E00 <= code && code <= 0x9FFF ) || /* CJK Ideograph */
1062- (0x20000 <= code && code <= 0x2A6DF ) || /* CJK Ideograph Extension B */
1063- (0x2A700 <= code && code <= 0x2B73F ) || /* CJK Ideograph Extension C */
1064- (0x2B740 <= code && code <= 0x2B81D ) || /* CJK Ideograph Extension D */
1065- (0x2B820 <= code && code <= 0x2CEAD ) || /* CJK Ideograph Extension E */
1066- (0x2CEB0 <= code && code <= 0x2EBE0 ) || /* CJK Ideograph Extension F */
1067- (0x2EBF0 <= code && code <= 0x2EE5D ) || /* CJK Ideograph Extension I */
1068- (0x30000 <= code && code <= 0x3134A ) || /* CJK Ideograph Extension G */
1069- (0x31350 <= code && code <= 0x323AF ) || /* CJK Ideograph Extension H */
1070- (0x323B0 <= code && code <= 0x33479 ); /* CJK Ideograph Extension J */
1071- }
1072-
1073- /* These ranges need to match makeunicodedata.py:tangut_ranges. */
1074- static int
1075- is_tangut_ideograph (Py_UCS4 code )
1056+ find_prefix_id (Py_UCS4 code )
10761057{
1077- return
1078- (0x17000 <= code && code <= 0x187FF ) || /* Tangut */
1079- (0x18D00 <= code && code <= 0x18D1E ); /* Tangut Supplement */
1058+ for (int i = 0 ; i < (int )Py_ARRAY_LENGTH (derived_name_ranges ); i ++ ) {
1059+ if (code < derived_name_ranges [i ].first ) {
1060+ return -1 ;
1061+ }
1062+ if (code <= derived_name_ranges [i ].last ) {
1063+ return derived_name_ranges [i ].prefixid ;
1064+ }
1065+ }
1066+ return -1 ;
10801067}
10811068
10821069/* macros used to determine if the given code point is in the PUA range that
@@ -1354,7 +1341,9 @@ _getucname(PyObject *self,
13541341 }
13551342 }
13561343
1357- if (SBase <= code && code < SBase + SCount ) {
1344+ int prefixid = find_prefix_id (code );
1345+ if (prefixid == 0 ) {
1346+ assert (SBase <= code && code < SBase + SCount );
13581347 /* Hangul syllable. */
13591348 int SIndex = code - SBase ;
13601349 int L = SIndex / NCount ;
@@ -1376,19 +1365,11 @@ _getucname(PyObject *self,
13761365 return 1 ;
13771366 }
13781367
1379- if (is_cjk_unified_ideograph (code )) {
1380- if (buflen < 28 )
1381- /* Worst case: CJK UNIFIED IDEOGRAPH-20000 */
1382- return 0 ;
1383- sprintf (buffer , "CJK UNIFIED IDEOGRAPH-%X" , code );
1384- return 1 ;
1385- }
1386-
1387- if (is_tangut_ideograph (code )) {
1388- if (buflen < 23 )
1389- /* Worst case: TANGUT IDEOGRAPH-18D08 */
1368+ if (prefixid > 0 ) {
1369+ const char * prefix = derived_name_prefixes [prefixid ];
1370+ if (snprintf (buffer , buflen , "%s%04X" , prefix , code ) >= buflen ) {
13901371 return 0 ;
1391- sprintf ( buffer , "TANGUT IDEOGRAPH-%X" , code );
1372+ }
13921373 return 1 ;
13931374 }
13941375
@@ -1482,8 +1463,19 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
14821463 * Named aliases are not resolved, they are returned as a code point in the
14831464 * PUA */
14841465
1485- /* Check for hangul syllables. */
1486- if (PyOS_strnicmp (name , "HANGUL SYLLABLE " , 16 ) == 0 ) {
1466+ int i = 0 ;
1467+ size_t prefixlen ;
1468+ for (; i < (int )Py_ARRAY_LENGTH (derived_name_prefixes ); i ++ ) {
1469+ const char * prefix = derived_name_prefixes [i ];
1470+ prefixlen = strlen (derived_name_prefixes [i ]);
1471+ if (PyOS_strnicmp (name , prefix , prefixlen ) == 0 ) {
1472+ break ;
1473+ }
1474+ }
1475+
1476+ if (i == 0 ) {
1477+ /* Hangul syllables. */
1478+ assert (PyOS_strnicmp (name , "HANGUL SYLLABLE " , 16 ) == 0 );
14871479 int len , L = -1 , V = -1 , T = -1 ;
14881480 const char * pos = name + 16 ;
14891481 find_syllable (pos , & len , & L , LCount , 0 );
@@ -1500,22 +1492,9 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
15001492 return 0 ;
15011493 }
15021494
1503- /* Check for CJK unified ideographs. */
1504- if (PyOS_strnicmp (name , "CJK UNIFIED IDEOGRAPH-" , 22 ) == 0 ) {
1505- /* Four or five hexdigits must follow. */
1506- Py_UCS4 v = parse_hex_code (name + 22 , namelen - 22 );
1507- if (!is_cjk_unified_ideograph (v )) {
1508- return 0 ;
1509- }
1510- * code = v ;
1511- return 1 ;
1512- }
1513-
1514- /* Check for Tangut ideographs. */
1515- if (PyOS_strnicmp (name , "TANGUT IDEOGRAPH-" , 17 ) == 0 ) {
1516- /* Five hexdigits must follow. */
1517- Py_UCS4 v = parse_hex_code (name + 17 , namelen - 17 );
1518- if (!is_tangut_ideograph (v )) {
1495+ if (i < (int )Py_ARRAY_LENGTH (derived_name_prefixes )) {
1496+ Py_UCS4 v = parse_hex_code (name + prefixlen , namelen - prefixlen );
1497+ if (find_prefix_id (v ) != i ) {
15191498 return 0 ;
15201499 }
15211500 * code = v ;
0 commit comments