Skip to content

Commit ef9a15e

Browse files
authored
Add namereplace error handler (#1140)
* Add namereplace error handler * Fix test failure * Add internal method to initialize
1 parent 2fef598 commit ef9a15e

File tree

4 files changed

+125
-20
lines changed

4 files changed

+125
-20
lines changed

Src/IronPython/Modules/unicodedata.cs

Lines changed: 52 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using System;
99
using System.Collections;
1010
using System.Collections.Generic;
11+
using System.Diagnostics.CodeAnalysis;
1112
using System.IO;
1213
using System.IO.Compression;
1314
using System.Linq;
@@ -18,8 +19,9 @@
1819
using IronPython.Runtime;
1920
using IronPython.Runtime.Operations;
2021

21-
[assembly: PythonModule("unicodedata", typeof(IronPython.Modules.unicodedata))]
22+
using NotNullAttribute = Microsoft.Scripting.Runtime.NotNullAttribute;
2223

24+
[assembly: PythonModule("unicodedata", typeof(IronPython.Modules.unicodedata))]
2325
namespace IronPython.Modules {
2426
public static class unicodedata {
2527
private static UCD ucd_5_2_0 = null;
@@ -41,6 +43,14 @@ public static UCD ucd_3_2_0 {
4143

4244
[SpecialName]
4345
public static void PerformModuleReload(PythonContext/*!*/ context, IDictionary/*!*/ dict) {
46+
EnsureInitialized();
47+
}
48+
49+
/// <summary>
50+
/// Ensures that the modules is initialized so that static methods don't throw.
51+
/// </summary>
52+
[MemberNotNull(nameof(ucd_5_2_0))]
53+
internal static void EnsureInitialized() {
4454
if (ucd_5_2_0 == null) {
4555
// This is a lie. The version of Unicode depends on the .NET version as well as the OS. The
4656
// version of the database stored internally is 5.2, so just say that.
@@ -52,9 +62,18 @@ public static string lookup(string name) {
5262
return ucd_5_2_0.lookup(name);
5363
}
5464

55-
public static string name(char unichr, string @default = null) {
56-
return ucd_5_2_0.name(unichr, @default);
57-
}
65+
#nullable enable
66+
67+
public static string name([NotNull] string unichr)
68+
=> ucd_5_2_0.name(unichr);
69+
70+
public static object? name([NotNull] string unichr, object? @default)
71+
=> ucd_5_2_0.name(unichr, @default);
72+
73+
internal static bool TryGetName(int rune, [NotNullWhen(true)] out string? name)
74+
=> ucd_5_2_0.TryGetName(rune, out name);
75+
76+
#nullable restore
5877

5978
public static int @decimal(char unichr, int @default) {
6079
return ucd_5_2_0.@decimal(unichr, @default);
@@ -140,20 +159,34 @@ public string lookup(string name) {
140159
return char.ConvertFromUtf32(nameLookup[name]);
141160
}
142161

143-
public string name(char unichr, string @default) {
144-
if (TryGetInfo(unichr, out CharInfo info)) {
145-
return info.Name;
162+
#nullable enable
163+
164+
public string name([NotNull] string unichr)
165+
=> TryGetName(GetRune(unichr), out var name) ? name : throw PythonOps.ValueError("no such name");
166+
167+
public object? name([NotNull] string unichr, object? @default)
168+
=> TryGetName(GetRune(unichr), out var name) ? name : @default;
169+
170+
internal bool TryGetName(int rune, [NotNullWhen(true)] out string? name) {
171+
if (TryGetInfo(rune, out CharInfo info, excludeRanges: true)) {
172+
name = info.Name;
173+
return true;
146174
}
147-
return @default;
175+
name = null;
176+
return false;
148177
}
149178

150-
public string name(char unichr) {
151-
if (TryGetInfo(unichr, out CharInfo info)) {
152-
return info.Name;
179+
private int GetRune(string unichr) {
180+
if (unichr.Length == 1) {
181+
return unichr[0];
182+
} else if (unichr.Length == 2 && char.IsSurrogatePair(unichr, 0)) {
183+
return char.ConvertToUtf32(unichr, 0);
153184
}
154-
throw PythonOps.ValueError("no such name");
185+
throw PythonOps.TypeError("argument 1 must be a unicode character, not str");
155186
}
156187

188+
#nullable restore
189+
157190
public int @decimal(char unichr, int @default) {
158191
if (TryGetInfo(unichr, out CharInfo info)) {
159192
var d = info.Numeric_Value_Decimal;
@@ -338,12 +371,14 @@ private void BuildNameLookup() {
338371
nameLookup = database.Where(c => !c.Value.Name.StartsWith("<")).ToDictionary(c => c.Value.Name, c => c.Key, StringComparer.OrdinalIgnoreCase);
339372
}
340373

341-
private bool TryGetInfo(char unichr, out CharInfo charInfo) {
374+
internal bool TryGetInfo(int unichr, out CharInfo charInfo, bool excludeRanges = false) {
342375
if (database.TryGetValue(unichr, out charInfo)) return true;
343-
foreach (var range in ranges) {
344-
if (range.First <= unichr && unichr <= range.Last) {
345-
charInfo = range;
346-
return true;
376+
if (!excludeRanges) {
377+
foreach (var range in ranges) {
378+
if (range.First <= unichr && unichr <= range.Last) {
379+
charInfo = range;
380+
return true;
381+
}
347382
}
348383
}
349384
return false;

Src/IronPython/Runtime/LiteralParser.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ private static string DoParseString<T>(ReadOnlySpan<T> data, bool isRaw, bool is
109109
}
110110
continue;
111111
case 'N': {
112-
IronPython.Modules.unicodedata.PerformModuleReload(null, null);
112+
Modules.unicodedata.EnsureInitialized();
113113
StringBuilder namebuf = new StringBuilder();
114114
bool namestarted = false;
115115
bool namecomplete = false;

Src/IronPython/Runtime/Operations/StringOps.cs

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1839,6 +1839,7 @@ Encoding setFallback(Encoding enc, DecoderFallback fb) {
18391839
case null:
18401840
case "backslashreplace":
18411841
case "xmlcharrefreplace":
1842+
case "namereplace":
18421843
case "strict": e = setFallback(e, new ExceptionFallback(e is UTF8Encoding)); break;
18431844
case "replace": e = setFallback(e, ReplacementFallback); break;
18441845
case "ignore": e = setFallback(e, new DecoderReplacementFallback(string.Empty)); break;
@@ -2093,6 +2094,11 @@ internal static ConcurrentDictionary<string, object> MakeErrorHandlersDict() {
20932094
ReflectionUtils.GetMethodInfos(typeof(StringOps).GetMember(nameof(BackslashReplaceErrors), BindingFlags.Static | BindingFlags.NonPublic)),
20942095
typeof(StringOps));
20952096

2097+
d["namereplace"] = BuiltinFunction.MakeFunction(
2098+
"namereplace_errors",
2099+
ReflectionUtils.GetMethodInfos(typeof(StringOps).GetMember(nameof(NameReplaceErrors), BindingFlags.Static | BindingFlags.NonPublic)),
2100+
typeof(StringOps));
2101+
20962102
d["surrogateescape"] = BuiltinFunction.MakeFunction(
20972103
"surrogateescape_errors",
20982104
ReflectionUtils.GetMethodInfos(typeof(StringOps).GetMember(nameof(SurrogateEscapeErrors), BindingFlags.Static | BindingFlags.NonPublic)),
@@ -2638,6 +2644,69 @@ private static object BackslashReplaceErrors(object unicodeError) {
26382644
}
26392645
}
26402646

2647+
private static object NameReplaceErrors(object unicodeError) {
2648+
Modules.unicodedata.EnsureInitialized();
2649+
2650+
switch (unicodeError) {
2651+
case PythonExceptions._UnicodeDecodeError ude:
2652+
throw PythonOps.TypeError("don't know how to handle UnicodeDecodeError in error callback");
2653+
2654+
case PythonExceptions._UnicodeEncodeError uee:
2655+
if (uee.@object is string text && uee.start is int start && uee.end is int end) {
2656+
start = Math.Max(0, Math.Min(start, text.Length - 1));
2657+
end = Math.Max(start, Math.Min(end, text.Length));
2658+
return PythonTuple.MakeTuple(NameReplaceEncode(text, start, end - start), end);
2659+
}
2660+
goto default;
2661+
2662+
case PythonExceptions._UnicodeTranslateError ute:
2663+
throw PythonOps.TypeError("don't know how to handle UnicodeTranslateError in error callback");
2664+
2665+
case DecoderFallbackException dfe:
2666+
throw PythonOps.TypeError("don't know how to handle DecoderFallbackException in error callback");
2667+
2668+
case EncoderFallbackException efe:
2669+
string chars = (efe.CharUnknownHigh != '\0') ? new string(new[] { efe.CharUnknownHigh, efe.CharUnknownLow }) : new string(efe.CharUnknown, 1);
2670+
return PythonTuple.MakeTuple(NameReplaceEncode(chars, 0, chars.Length), efe.Index + chars.Length);
2671+
2672+
default:
2673+
throw PythonOps.TypeError("codec must pass exception instance");
2674+
}
2675+
2676+
static string NameReplaceEncode(string s, int start, int count) {
2677+
StringBuilder b = new StringBuilder();
2678+
2679+
int i = start;
2680+
int end = start + count;
2681+
while (i < end) {
2682+
char ch = s[i];
2683+
if (char.IsSurrogatePair(s, i)) {
2684+
var rune = char.ConvertToUtf32(s, i++);
2685+
if (Modules.unicodedata.TryGetName(rune, out var name)) {
2686+
b.AppendFormat("\\N{{{0}}}", name);
2687+
} else {
2688+
b.AppendFormat("\\U{0:x8}", rune);
2689+
}
2690+
} else if (ch > 0xFF) {
2691+
if (Modules.unicodedata.TryGetName(ch, out var name)) {
2692+
b.AppendFormat("\\N{{{0}}}", name);
2693+
} else {
2694+
b.AppendFormat("\\u{0:x4}", (int)ch);
2695+
}
2696+
} else {
2697+
if (Modules.unicodedata.TryGetName(ch, out var name)) {
2698+
b.AppendFormat("\\N{{{0}}}", name);
2699+
} else {
2700+
b.AppendFormat("\\x{0:x2}", (int)ch);
2701+
}
2702+
}
2703+
i++;
2704+
}
2705+
2706+
return b.ToString();
2707+
}
2708+
}
2709+
26412710
private delegate string? DecodeErrorHandler(IList<byte> bytes, int start, ref int end);
26422711
private delegate Bytes? EncodeErrorHandler(string text, int start, ref int end);
26432712

@@ -2851,6 +2920,6 @@ internal static void IdentifyUtfEncoding(string encodingName, out int charWidth,
28512920
}
28522921
}
28532922

2854-
#endregion
2923+
#endregion
28552924
}
28562925
}

Tests/test_regressions.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1087,7 +1087,8 @@ def test_ipy2_gh357(self):
10871087
import unicodedata
10881088

10891089
if is_cli:
1090-
self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>')
1090+
with self.assertRaises(ValueError):
1091+
unicodedata.name(u'\u4e2d')
10911092
else:
10921093
self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D')
10931094

0 commit comments

Comments
 (0)