Skip to content

Commit bdedad3

Browse files
authored
Added a new popGrapheme function to std.uni (#9053)
* Added a new popGrapheme function to std.uni * A changelog clarification and fix of inaccurate static condition * Committing to restart the tests, and adding the new function to the module doc index while there.
1 parent eab6595 commit bdedad3

File tree

2 files changed

+135
-17
lines changed

2 files changed

+135
-17
lines changed

changelog/pop-grapheme.dd

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
Added popGrapheme function to std.uni.
2+
3+
The new function is a cross between the existing $(REF graphemeStride, std,
4+
uni) and $(REF decodeGrapheme, std, uni) functions. The new function both
5+
supports `@safe pure nothrow @nogc` like `graphemeStride` does as long as you
6+
don't rely on autodecoding (side node: `@nogc` support for `graphemeStride`
7+
added in this release), and works with any non-array ranges just like
8+
`decodeGrapheme` does.
9+
10+
Example:
11+
12+
-------
13+
import std.uni;
14+
15+
// Two Union Jacks of the Great Britain in each
16+
string s = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
17+
wstring ws = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
18+
dstring ds = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
19+
20+
// String pop length in code units, not points.
21+
assert(s.popGrapheme() == 8);
22+
assert(ws.popGrapheme() == 4);
23+
assert(ds.popGrapheme() == 2);
24+
25+
assert(s == "\U0001F1EC\U0001F1E7");
26+
assert(ws == "\U0001F1EC\U0001F1E7");
27+
assert(ds == "\U0001F1EC\U0001F1E7");
28+
29+
import std.algorithm.comparison : equal;
30+
import std.algorithm.iteration : filter;
31+
32+
// Also works for non-random access ranges as long as the
33+
// character type is 32-bit.
34+
auto testPiece = "\r\nhello!"d.filter!(x => !x.isAlpha);
35+
// Windows-style line ending is two code point in a single grapheme.
36+
assert(testPiece.popGrapheme() == 2);
37+
assert(testPiece.equal("!"d));
38+
-------

std/uni/package.d

Lines changed: 97 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ $(TR $(TD Decode) $(TD
1616
$(LREF byGrapheme)
1717
$(LREF decodeGrapheme)
1818
$(LREF graphemeStride)
19+
$(LREF popGrapheme)
1920
))
2021
$(TR $(TD Comparison) $(TD
2122
$(LREF icmp)
@@ -708,8 +709,8 @@ import std.meta : AliasSeq;
708709
import std.range.primitives : back, ElementEncodingType, ElementType, empty,
709710
front, hasLength, hasSlicing, isForwardRange, isInputRange,
710711
isRandomAccessRange, popFront, put, save;
711-
import std.traits : isConvertibleToString, isIntegral, isSomeChar,
712-
isSomeString, Unqual, isDynamicArray;
712+
import std.traits : isAutodecodableString, isConvertibleToString, isIntegral,
713+
isSomeChar, isSomeString, Unqual, isDynamicArray;
713714
// debug = std_uni;
714715

715716
import std.internal.unicode_tables; // generated file
@@ -7148,17 +7149,25 @@ private immutable TransformRes
71487149
TransformRes.goOn
71497150
];
71507151

7151-
template genericDecodeGrapheme(bool getValue)
7152-
{
7153-
static if (getValue)
7152+
enum GraphemeRet { none, step, value }
7153+
7154+
template genericDecodeGrapheme(GraphemeRet retType)
7155+
{ alias Ret = GraphemeRet;
7156+
7157+
static if (retType == Ret.value)
71547158
alias Value = Grapheme;
7155-
else
7159+
else static if (retType == Ret.step)
7160+
alias Value = size_t;
7161+
else static if (retType == Ret.none)
71567162
alias Value = void;
71577163

71587164
Value genericDecodeGrapheme(Input)(ref Input range)
71597165
{
7160-
static if (getValue)
7161-
Grapheme grapheme;
7166+
static if (retType == Ret.value)
7167+
Grapheme result;
7168+
else static if (retType == Ret.step)
7169+
size_t result = 0;
7170+
71627171
auto state = GraphemeState.Start;
71637172
dchar ch;
71647173

@@ -7173,17 +7182,21 @@ template genericDecodeGrapheme(bool getValue)
71737182
with(TransformRes)
71747183
{
71757184
case goOn:
7176-
static if (getValue)
7177-
grapheme ~= ch;
7185+
static if (retType == Ret.value)
7186+
result ~= ch;
7187+
else static if (retType == Ret.step)
7188+
result++;
71787189
range.popFront();
71797190
continue;
71807191

71817192
case redo:
71827193
goto rerun;
71837194

71847195
case retInclude:
7185-
static if (getValue)
7186-
grapheme ~= ch;
7196+
static if (retType == Ret.value)
7197+
result ~= ch;
7198+
else static if (retType == Ret.step)
7199+
result++;
71877200
range.popFront();
71887201
break outer;
71897202

@@ -7192,8 +7205,8 @@ template genericDecodeGrapheme(bool getValue)
71927205
}
71937206
}
71947207

7195-
static if (getValue)
7196-
return grapheme;
7208+
static if (retType != Ret.none)
7209+
return result;
71977210
}
71987211
}
71997212

@@ -7217,7 +7230,7 @@ if (is(C : dchar))
72177230
{
72187231
auto src = input[index..$];
72197232
auto n = src.length;
7220-
genericDecodeGrapheme!(false)(src);
7233+
genericDecodeGrapheme!(GraphemeRet.none)(src);
72217234
return n - src.length;
72227235
}
72237236

@@ -7279,7 +7292,7 @@ if (is(C : dchar))
72797292
Grapheme decodeGrapheme(Input)(ref Input inp)
72807293
if (isInputRange!Input && is(immutable ElementType!Input == immutable dchar))
72817294
{
7282-
return genericDecodeGrapheme!true(inp);
7295+
return genericDecodeGrapheme!(GraphemeRet.value)(inp);
72837296
}
72847297

72857298
@safe unittest
@@ -7304,6 +7317,73 @@ if (isInputRange!Input && is(immutable ElementType!Input == immutable dchar))
73047317
assert(equal(decodeGrapheme(s)[], "\U0001F1EC\U0001F1E7"));
73057318
}
73067319

7320+
/++
7321+
Reads one full grapheme cluster from an
7322+
$(REF_ALTTEXT input range, isInputRange, std,range,primitives) of dchar `inp`,
7323+
but doesn't return it. Instead returns the number of code units read.
7324+
This differs from number of code points read only if `input` is an
7325+
autodecodable string.
7326+
7327+
Note:
7328+
This function modifies `inp` and thus `inp`
7329+
must be an L-value.
7330+
+/
7331+
size_t popGrapheme(Input)(ref Input inp)
7332+
if (isInputRange!Input && is(immutable ElementType!Input == immutable dchar))
7333+
{
7334+
static if (isAutodecodableString!Input || hasLength!Input)
7335+
{
7336+
// Why count each step in the decoder when you can just
7337+
// measure the grapheme in one go?
7338+
auto n = inp.length;
7339+
genericDecodeGrapheme!(GraphemeRet.none)(inp);
7340+
return n - inp.length;
7341+
}
7342+
else return genericDecodeGrapheme!(GraphemeRet.step)(inp);
7343+
}
7344+
7345+
///
7346+
@safe pure unittest
7347+
{
7348+
// Two Union Jacks of the Great Britain in each
7349+
string s = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
7350+
wstring ws = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
7351+
dstring ds = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
7352+
7353+
// String pop length in code units, not points.
7354+
assert(s.popGrapheme() == 8);
7355+
assert(ws.popGrapheme() == 4);
7356+
assert(ds.popGrapheme() == 2);
7357+
7358+
assert(s == "\U0001F1EC\U0001F1E7");
7359+
assert(ws == "\U0001F1EC\U0001F1E7");
7360+
assert(ds == "\U0001F1EC\U0001F1E7");
7361+
7362+
import std.algorithm.comparison : equal;
7363+
import std.algorithm.iteration : filter;
7364+
7365+
// Also works for non-random access ranges as long as the
7366+
// character type is 32-bit.
7367+
auto testPiece = "\r\nhello!"d.filter!(x => !x.isAlpha);
7368+
// Windows-style line ending is two code points in a single grapheme.
7369+
assert(testPiece.popGrapheme() == 2);
7370+
assert(testPiece.equal("!"d));
7371+
}
7372+
7373+
// Attribute compliance test. Should be nothrow `@nogc` when
7374+
// no autodecoding needed.
7375+
@safe pure nothrow @nogc unittest
7376+
{
7377+
import std.algorithm.iteration : filter;
7378+
7379+
auto str = "abcdef"d;
7380+
assert(str.popGrapheme() == 1);
7381+
7382+
// also test with non-random access
7383+
auto filtered = "abcdef"d.filter!(x => x%2);
7384+
assert(filtered.popGrapheme() == 1);
7385+
}
7386+
73077387
/++
73087388
$(P Iterate a string by $(LREF Grapheme).)
73097389
@@ -7722,7 +7802,7 @@ public:
77227802
@property bool valid()() /*const*/
77237803
{
77247804
auto r = this[];
7725-
genericDecodeGrapheme!false(r);
7805+
genericDecodeGrapheme!(GraphemeRet.none)(r);
77267806
return r.length == 0;
77277807
}
77287808

0 commit comments

Comments
 (0)