Skip to content

Commit 1100c07

Browse files
minor #454 Conform to IDNA version 15.1.0 revision 31 (TRowbotham)
This PR was merged into the 1.x branch. Discussion ---------- Conform to IDNA version 15.1.0 revision 31 This adds the necessary changes to conform to [IDNA version 15.1.0 revision 31](https://www.unicode.org/reports/tr46/tr46-31.html#Modifications). Notable Changes * Transitional processing (the default in PHP) is now deprecated. No deprecation notices were added as PHP does not yet report a deprecation notice in this case. * An error is no longer recorded for characters with a status of disallowed. * When performing code point mapping and transitional processing is enabled the code point U+1E9E capital sharp s (ẞ), is replaced by the string “ss” * A new internal option "IgnoreInvalidPunycode" was added, which is supposed to allow for an all-ASCII fast-path, however, the official tests do not test this configuration option. Commits ------- 385d1d5 Conform to IDNA version 15.1.0 revision 31
2 parents 6d5a7aa + 385d1d5 commit 1100c07

File tree

2 files changed

+64
-5
lines changed

2 files changed

+64
-5
lines changed

src/Intl/Idn/Idn.php

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -280,10 +280,6 @@ private static function mapCodePoints($input, array $options, Info $info)
280280

281281
switch ($data['status']) {
282282
case 'disallowed':
283-
$info->errors |= self::ERROR_DISALLOWED;
284-
285-
// no break.
286-
287283
case 'valid':
288284
$str .= mb_chr($codePoint, 'utf-8');
289285

@@ -294,7 +290,7 @@ private static function mapCodePoints($input, array $options, Info $info)
294290
break;
295291

296292
case 'mapped':
297-
$str .= $data['mapping'];
293+
$str .= $transitional && 0x1E9E === $codePoint ? 'ss' : $data['mapping'];
298294

299295
break;
300296

@@ -346,6 +342,18 @@ private static function process($domain, array $options, Info $info)
346342
$validationOptions = $options;
347343

348344
if ('xn--' === substr($label, 0, 4)) {
345+
// Step 4.1. If the label contains any non-ASCII code point (i.e., a code point greater than U+007F),
346+
// record that there was an error, and continue with the next label.
347+
if (preg_match('/[^\x00-\x7F]/', $label)) {
348+
$info->errors |= self::ERROR_PUNYCODE;
349+
350+
continue;
351+
}
352+
353+
// Step 4.2. Attempt to convert the rest of the label to Unicode according to Punycode [RFC3492]. If
354+
// that conversion fails, record that there was an error, and continue
355+
// with the next label. Otherwise replace the original label in the string by the results of the
356+
// conversion.
349357
try {
350358
$label = self::punycodeDecode(substr($label, 4));
351359
} catch (\Exception $e) {
@@ -516,6 +524,8 @@ private static function validateLabel($label, Info $info, array $options, $canBe
516524
if ('-' === substr($label, -1, 1)) {
517525
$info->errors |= self::ERROR_TRAILING_HYPHEN;
518526
}
527+
} elseif ('xn--' === substr($label, 0, 4)) {
528+
$info->errors |= self::ERROR_PUNYCODE;
519529
}
520530

521531
// Step 4. The label must not contain a U+002E (.) FULL STOP.

tests/Intl/Idn/IdnTest.php

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,55 @@ public function testEncodePhp53($decoded, $encoded)
284284
$this->assertSame($encoded, $result);
285285
}
286286

287+
/**
288+
* IDNA 15.1.0 revision 31
289+
*
290+
* This tests the change in "Section 4 Processing step 1. Map" which conditionally maps U+1E9E capital sharp s to
291+
* "ss" if Transitional_Processing is used.
292+
*
293+
* @dataProvider captialSharpSProvider
294+
*/
295+
public function testCapitalSharpSProcessing($input, $expected, $flags)
296+
{
297+
idn_to_utf8($input, $flags, \INTL_IDNA_VARIANT_UTS46, $info);
298+
$this->assertSame($expected, $info['result']);
299+
}
300+
301+
/**
302+
* IDNA 15.1.0 revision 31
303+
*
304+
* This tests the additional validity check in "Section 4.1 Validity Criteria Processing step 4", which is used to
305+
* disallow labels that do not round trip.
306+
*/
307+
public function testLabelsThatDoNotRoundTripAreDisallowed()
308+
{
309+
idn_to_utf8('xn--xn---epa.', \IDNA_DEFAULT, \INTL_IDNA_VARIANT_UTS46, $info1);
310+
idn_to_ascii($info1['result'], \IDNA_DEFAULT, \INTL_IDNA_VARIANT_UTS46, $info2);
311+
$this->assertSame(\IDNA_ERROR_PUNYCODE, \IDNA_ERROR_PUNYCODE & $info2['errors']);
312+
}
313+
314+
/**
315+
* IDNA 15.1.0 revision 31
316+
*
317+
* This tests the the additional condition in "Section 4 Processing step 4.1" where a label that starts with "xn--"
318+
* and contains a non-ASCII codepoint records an error and the processing steps continue with the next label.
319+
*/
320+
public function testLabelStartingWithPunycodePrefixWithNonAsciiCharacterRecordsErrorAndIsSkipped()
321+
{
322+
\idn_to_utf8('xn--🌈.xn--fa-hia.de', \IDNA_DEFAULT, \INTL_IDNA_VARIANT_UTS46, $info);
323+
$this->assertSame(\IDNA_ERROR_PUNYCODE, \IDNA_ERROR_PUNYCODE & $info['errors']);
324+
$this->assertSame('xn--🌈.faß.de', $info['result']);
325+
}
326+
327+
public static function captialSharpSProvider()
328+
{
329+
return [
330+
['Faß.de', 'fass.de', \IDNA_DEFAULT],
331+
['Faß.de', 'faß.de', \IDNA_NONTRANSITIONAL_TO_UNICODE],
332+
];
333+
}
334+
335+
287336
public static function domainNamesProvider()
288337
{
289338
return [

0 commit comments

Comments
 (0)