@@ -16,6 +16,7 @@ $(TR $(TD Decode) $(TD
16
16
$(LREF byGrapheme)
17
17
$(LREF decodeGrapheme)
18
18
$(LREF graphemeStride)
19
+ $(LREF popGrapheme)
19
20
))
20
21
$(TR $(TD Comparison) $(TD
21
22
$(LREF icmp)
@@ -708,8 +709,8 @@ import std.meta : AliasSeq;
708
709
import std.range.primitives : back, ElementEncodingType, ElementType, empty,
709
710
front, hasLength, hasSlicing, isForwardRange, isInputRange,
710
711
isRandomAccessRange, popFront, put, save;
711
- import std.traits : isConvertibleToString, isIntegral, isSomeChar ,
712
- isSomeString, Unqual, isDynamicArray;
712
+ import std.traits : isAutodecodableString, isConvertibleToString, isIntegral ,
713
+ isSomeChar, isSomeString, Unqual, isDynamicArray;
713
714
// debug = std_uni;
714
715
715
716
import std.internal.unicode_tables ; // generated file
@@ -7148,17 +7149,25 @@ private immutable TransformRes
7148
7149
TransformRes.goOn
7149
7150
];
7150
7151
7151
- template genericDecodeGrapheme (bool getValue)
7152
- {
7153
- static if (getValue)
7152
+ enum GraphemeRet { none, step, value }
7153
+
7154
+ template genericDecodeGrapheme (GraphemeRet retType)
7155
+ { alias Ret = GraphemeRet;
7156
+
7157
+ static if (retType == Ret.value)
7154
7158
alias Value = Grapheme;
7155
- else
7159
+ else static if (retType == Ret.step)
7160
+ alias Value = size_t ;
7161
+ else static if (retType == Ret.none)
7156
7162
alias Value = void ;
7157
7163
7158
7164
Value genericDecodeGrapheme (Input)(ref Input range)
7159
7165
{
7160
- static if (getValue)
7161
- Grapheme grapheme;
7166
+ static if (retType == Ret.value)
7167
+ Grapheme result;
7168
+ else static if (retType == Ret.step)
7169
+ size_t result = 0 ;
7170
+
7162
7171
auto state = GraphemeState.Start;
7163
7172
dchar ch;
7164
7173
@@ -7173,17 +7182,21 @@ template genericDecodeGrapheme(bool getValue)
7173
7182
with (TransformRes)
7174
7183
{
7175
7184
case goOn:
7176
- static if (getValue)
7177
- grapheme ~= ch;
7185
+ static if (retType == Ret.value)
7186
+ result ~= ch;
7187
+ else static if (retType == Ret.step)
7188
+ result++ ;
7178
7189
range.popFront();
7179
7190
continue ;
7180
7191
7181
7192
case redo:
7182
7193
goto rerun;
7183
7194
7184
7195
case retInclude:
7185
- static if (getValue)
7186
- grapheme ~= ch;
7196
+ static if (retType == Ret.value)
7197
+ result ~= ch;
7198
+ else static if (retType == Ret.step)
7199
+ result++ ;
7187
7200
range.popFront();
7188
7201
break outer;
7189
7202
@@ -7192,8 +7205,8 @@ template genericDecodeGrapheme(bool getValue)
7192
7205
}
7193
7206
}
7194
7207
7195
- static if (getValue )
7196
- return grapheme ;
7208
+ static if (retType != Ret.none )
7209
+ return result ;
7197
7210
}
7198
7211
}
7199
7212
@@ -7217,7 +7230,7 @@ if (is(C : dchar))
7217
7230
{
7218
7231
auto src = input[index.. $];
7219
7232
auto n = src.length;
7220
- genericDecodeGrapheme! (false )(src);
7233
+ genericDecodeGrapheme! (GraphemeRet.none )(src);
7221
7234
return n - src.length;
7222
7235
}
7223
7236
@@ -7279,7 +7292,7 @@ if (is(C : dchar))
7279
7292
Grapheme decodeGrapheme (Input)(ref Input inp)
7280
7293
if (isInputRange! Input && is (immutable ElementType! Input == immutable dchar ))
7281
7294
{
7282
- return genericDecodeGrapheme! true (inp);
7295
+ return genericDecodeGrapheme! (GraphemeRet.value) (inp);
7283
7296
}
7284
7297
7285
7298
@safe unittest
@@ -7304,6 +7317,73 @@ if (isInputRange!Input && is(immutable ElementType!Input == immutable dchar))
7304
7317
assert (equal(decodeGrapheme(s)[], " \U0001F1EC\U0001F1E7 " ));
7305
7318
}
7306
7319
7320
+ /+ +
7321
+ Reads one full grapheme cluster from an
7322
+ $(REF_ALTTEXT input range, isInputRange, std,range,primitives) of dchar `inp`,
7323
+ but doesn't return it. Instead returns the number of code units read.
7324
+ This differs from number of code points read only if `input` is an
7325
+ autodecodable string.
7326
+
7327
+ Note:
7328
+ This function modifies `inp` and thus `inp`
7329
+ must be an L-value.
7330
+ +/
7331
+ size_t popGrapheme (Input)(ref Input inp)
7332
+ if (isInputRange! Input && is (immutable ElementType! Input == immutable dchar ))
7333
+ {
7334
+ static if (isAutodecodableString! Input || hasLength! Input)
7335
+ {
7336
+ // Why count each step in the decoder when you can just
7337
+ // measure the grapheme in one go?
7338
+ auto n = inp.length;
7339
+ genericDecodeGrapheme! (GraphemeRet.none)(inp);
7340
+ return n - inp.length;
7341
+ }
7342
+ else return genericDecodeGrapheme! (GraphemeRet.step)(inp);
7343
+ }
7344
+
7345
+ // /
7346
+ @safe pure unittest
7347
+ {
7348
+ // Two Union Jacks of the Great Britain in each
7349
+ string s = " \U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7 " ;
7350
+ wstring ws = " \U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7 " ;
7351
+ dstring ds = " \U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7 " ;
7352
+
7353
+ // String pop length in code units, not points.
7354
+ assert (s.popGrapheme() == 8 );
7355
+ assert (ws.popGrapheme() == 4 );
7356
+ assert (ds.popGrapheme() == 2 );
7357
+
7358
+ assert (s == " \U0001F1EC\U0001F1E7 " );
7359
+ assert (ws == " \U0001F1EC\U0001F1E7 " );
7360
+ assert (ds == " \U0001F1EC\U0001F1E7 " );
7361
+
7362
+ import std.algorithm.comparison : equal;
7363
+ import std.algorithm.iteration : filter;
7364
+
7365
+ // Also works for non-random access ranges as long as the
7366
+ // character type is 32-bit.
7367
+ auto testPiece = " \r\n hello!" d.filter! (x => ! x.isAlpha);
7368
+ // Windows-style line ending is two code points in a single grapheme.
7369
+ assert (testPiece.popGrapheme() == 2 );
7370
+ assert (testPiece.equal(" !" d));
7371
+ }
7372
+
7373
+ // Attribute compliance test. Should be nothrow `@nogc` when
7374
+ // no autodecoding needed.
7375
+ @safe pure nothrow @nogc unittest
7376
+ {
7377
+ import std.algorithm.iteration : filter;
7378
+
7379
+ auto str = " abcdef" d;
7380
+ assert (str.popGrapheme() == 1 );
7381
+
7382
+ // also test with non-random access
7383
+ auto filtered = " abcdef" d.filter! (x => x% 2 );
7384
+ assert (filtered.popGrapheme() == 1 );
7385
+ }
7386
+
7307
7387
/+ +
7308
7388
$(P Iterate a string by $(LREF Grapheme).)
7309
7389
@@ -7722,7 +7802,7 @@ public:
7722
7802
@property bool valid()() /* const*/
7723
7803
{
7724
7804
auto r = this [];
7725
- genericDecodeGrapheme! false (r);
7805
+ genericDecodeGrapheme! (GraphemeRet.none) (r);
7726
7806
return r.length == 0 ;
7727
7807
}
7728
7808
0 commit comments