Skip to content

Commit 683a00a

Browse files
committed
string扩展模块补充utf8各种支持(只是给自带utf8库弄了一套和其他函数风格的名称和注释)
整理代码
1 parent b2c77f2 commit 683a00a

File tree

3 files changed

+124
-55
lines changed

3 files changed

+124
-55
lines changed

stringExtend.lua

Lines changed: 86 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ end
107107
---Paste new string into original string, won't exceed the length of original string
108108
---@param str string
109109
---@param str2 string
110-
---@param pos number
110+
---@param pos integer
111111
---@return string
112112
---@nodiscard
113113
function STRING.paste(str,str2,pos)
@@ -156,6 +156,80 @@ function STRING.unshift(str)
156156
return unshiftMap[str] or lower(str)
157157
end
158158

159+
local utf8=require'utf8'
160+
161+
---Simple utf8 coding
162+
---@param num integer
163+
---@return string
164+
---@nodiscard
165+
function STRING.UTF8(num)
166+
assertf(type(num)=='number',"Wrong type (%s)",type(num))
167+
assertf(num>=0 and num<2^31,"Out of range (%d)",num)
168+
-- if num<2^7 then return char(num)
169+
-- elseif num<2^11 then return char(192+floor(num/2^06),128+num%2^6)
170+
-- elseif num<2^16 then return char(224+floor(num/2^12),128+floor(num/2^06)%2^6,128+num%2^6)
171+
if num<0x10ffff then return utf8.char(num)
172+
elseif num<2^21 then return char(240+floor(num/2^18),128+floor(num/2^12)%2^6,128+floor(num/2^06)%2^6,128+num%2^6)
173+
elseif num<2^26 then return char(248+floor(num/2^24),128+floor(num/2^18)%2^6,128+floor(num/2^12)%2^6,128+floor(num/2^06)%2^6,128+num%2^6)
174+
else return char(252+floor(num/2^30),128+floor(num/2^24)%2^6,128+floor(num/2^18)%2^6,128+floor(num/2^12)%2^6,128+floor(num/2^06)%2^6,128+num%2^6)
175+
end
176+
end
177+
178+
---Get the (unicode) char count of (part of) a utf8 string
179+
---@param str string an utf8 string
180+
---@param i? integer start byte (cannot start from middle of a char)
181+
---@param j? integer end byte
182+
---@param lax? boolean
183+
---@nodiscard
184+
---@diagnostic disable-next-line
185+
function STRING.u8len(str,i,j,lax) end
186+
STRING.u8len=utf8[('len')]
187+
188+
---Get a utf8 string with codepoint numbers
189+
---@param ... integer
190+
---@return string
191+
---@nodiscard
192+
---@diagnostic disable-next-line
193+
function STRING.u8char(...) end
194+
STRING.u8char=utf8[('char')]
195+
196+
---Get the codepoint(s) of a utf8 string
197+
---@param str string
198+
---@param i? integer start byte (cannot start from middle of a char)
199+
---@param j? integer end byte
200+
---@param lax? boolean
201+
---@return integer ...
202+
---@nodiscard
203+
---@diagnostic disable-next-line
204+
function STRING.u8byte(str,i,j,lax) end
205+
STRING.u8byte=utf8[('codepoint')]
206+
207+
---Get the end byte of `n` characters starting from `i` byte
208+
---@param str string
209+
---@param n integer char count
210+
---@param i? integer start byte
211+
---@return integer p end byte
212+
---@nodiscard
213+
---@diagnostic disable-next-line
214+
function STRING.u8offset(str,n,i) end
215+
STRING.u8offset=utf8[('offset')]
216+
217+
---Get a char iterator function of a utf8 string, similar to `str:gmatch('.')`
218+
---
219+
--- ```lua
220+
--- for bytePos,codepoint in STRING.u8codes(str) do
221+
--- -- code here
222+
--- end
223+
---@param str string
224+
---@param lax? boolean
225+
---@return fun(s:string, p:integer): integer,integer
226+
---@diagnostic disable-next-line
227+
function STRING.u8codes(str,lax) end
228+
STRING.u8codes=utf8[('codes')]
229+
230+
---Matches exactly one UTF-8 byte sequence, assuming that the subject is a valid UTF-8 string
231+
STRING.u8pattern="[\0-\x7F\xC2-\xFD][\x80-\xBF]*"
232+
159233
local upperData,lowerData,diaData -- Data is filled later in this file
160234

161235
---string.upper with utf8 support, warning: low performance
@@ -213,7 +287,7 @@ end
213287
---Count the number of occurrences of a regex pattern in a string
214288
---@param str string
215289
---@param regex string
216-
---@return number
290+
---@return integer
217291
---@nodiscard
218292
function STRING.count(str,regex)
219293
local _,count=gsub(str,regex,'')
@@ -231,7 +305,7 @@ end
231305
---end
232306
---```
233307
---@param str string
234-
---@param keep? boolean | number Max number of leading spaces to be trimmed: `nil`- all `number` - as you want `true` - same with line#1
308+
---@param keep? boolean | integer Max number of leading spaces to be trimmed: `nil`- all `number` - as you want `true` - same with line#1
235309
---@return string
236310
---@nodiscard
237311
function STRING.trimIndent(str,keep)
@@ -313,7 +387,7 @@ end
313387
---Calculate the edit distance between two strings
314388
---@param s1 string
315389
---@param s2 string
316-
---@return number
390+
---@return integer
317391
---@nodiscard
318392
function STRING.editDist(s1,s2) -- By Copilot
319393
local len1,len2=#s1,#s2
@@ -389,7 +463,7 @@ end
389463

390464
---**Warning:** don't support number format like .26, must have digits before the dot, like 0.26
391465
---@param str string
392-
---@return number | nil, string | nil
466+
---@return integer | nil, string | nil
393467
---@nodiscard
394468
function STRING.cutUnit(str)
395469
local _s,_e=find(str,'^-?%d+%.?%d*')
@@ -426,22 +500,6 @@ STRING.base64={} for c in gmatch('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrst
426500
table.insert(STRING.base64,c)
427501
end
428502

429-
---Simple utf8 coding
430-
---@param num number
431-
---@return string
432-
---@nodiscard
433-
function STRING.UTF8(num)
434-
assertf(type(num)=='number',"Wrong type (%s)",type(num))
435-
assertf(num>=0 and num<2^31,"Out of range (%d)",num)
436-
if num<2^7 then return char(num)
437-
elseif num<2^11 then return char(192+floor(num/2^06),128+num%2^6)
438-
elseif num<2^16 then return char(224+floor(num/2^12),128+floor(num/2^06)%2^6,128+num%2^6)
439-
elseif num<2^21 then return char(240+floor(num/2^18),128+floor(num/2^12)%2^6,128+floor(num/2^06)%2^6,128+num%2^6)
440-
elseif num<2^26 then return char(248+floor(num/2^24),128+floor(num/2^18)%2^6,128+floor(num/2^12)%2^6,128+floor(num/2^06)%2^6,128+num%2^6)
441-
else return char(252+floor(num/2^30),128+floor(num/2^24)%2^6,128+floor(num/2^18)%2^6,128+floor(num/2^12)%2^6,128+floor(num/2^06)%2^6,128+num%2^6)
442-
end
443-
end
444-
445503
---Parse binary number from string
446504
---@param str string
447505
---@return number
@@ -484,7 +542,7 @@ end
484542

485543
---Convert a number to binary string
486544
---@param num number
487-
---@param len? number
545+
---@param len? integer
488546
---@return string
489547
---@nodiscard
490548
function STRING.toBin(num,len)
@@ -498,7 +556,7 @@ end
498556

499557
---Convert a number to octal string
500558
---@param num number
501-
---@param len? number
559+
---@param len? integer
502560
---@return string
503561
---@nodiscard
504562
function STRING.toOct(num,len)
@@ -515,7 +573,7 @@ b16[0]='0'
515573

516574
---Convert an integer to hexadecimal string
517575
---@param num number
518-
---@param len? number
576+
---@param len? integer
519577
---@return string
520578
---@nodiscard
521579
function STRING.toHex(num,len)
@@ -587,8 +645,8 @@ end
587645

588646
---Return 16 byte string. Not powerful hash, just simply protect the original text
589647
---@param text string
590-
---@param seedRange? number default to 26
591-
---@param seed? number default to 0
648+
---@param seedRange? integer default to 26
649+
---@param seed? integer default to 0
592650
---@return string
593651
---@nodiscard
594652
function STRING.digezt(text,seedRange,seed)
@@ -628,7 +686,7 @@ end
628686

629687
---Cut n bytes off a string
630688
---@param str string
631-
---@param n number
689+
---@param n integer
632690
---@return string, string #`n` bytes, and the rest of string
633691
---@nodiscard
634692
function STRING.readChars(str,n)
@@ -642,7 +700,7 @@ end
642700
---STRING.simplifyPath('Documents/Project/xxx.lua',3) --> 'Doc/Pro/xxx.lua'
643701
---```
644702
---@param path string
645-
---@param len? number default to 1
703+
---@param len? integer default to 1
646704
---@return string
647705
---@nodiscard
648706
function STRING.simplifyPath(path,len)

tableExtend.lua

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ for k,v in next,table do TABLE[k]=v end
1717
---https://luajit.org/extensions.html
1818
---@generic T
1919
---@param val T value to fill
20-
---@param count number how many elements
20+
---@param count integer how many elements
2121
---@return T[]
2222
---@nodiscard
2323
function TABLE.new(val,count)
@@ -28,11 +28,22 @@ function TABLE.new(val,count)
2828
return L
2929
end
3030

31+
---Create a new table with specific size allocated
32+
---
33+
---Fallback to `return {}` if failed to require `table.new`
34+
---@param nArray? integer the size of "list part" of the table
35+
---@param nHash? integer the size of "hash part" of the table
36+
---@return table
37+
---@nodiscard
38+
---@diagnostic disable-next-line
39+
function TABLE.newSize(nArray,nHash) return {} end
40+
pcall(function() TABLE[('newSize')]=require'table.new' end)
41+
3142
---Create a new filled matrix
3243
---@generic T
3344
---@param val T value to fill
34-
---@param height number
35-
---@param width number
45+
---@param height integer
46+
---@param width integer
3647
---@return Mat<T>
3748
---@nodiscard
3849
function TABLE.newMat(val,height,width)
@@ -51,8 +62,8 @@ end
5162
---leave `start&stop` as `nil` will simply copy
5263
---@generic T
5364
---@param org T original table
54-
---@param start? number start pos (default 1)
55-
---@param stop? number end pos (default #org)
65+
---@param start? integer start pos (default 1)
66+
---@param stop? integer end pos (default #org)
5667
---@return T
5768
---@nodiscard
5869
function TABLE.sub(org,start,stop)
@@ -67,7 +78,7 @@ end
6778
---Create a copy of [1~#] elements
6879
---@generic T
6980
---@param org T original table
70-
---@param depth? number how many layers will be recreate, default to inf
81+
---@param depth? integer how many layers will be recreate, default to inf
7182
---@return T
7283
---@nodiscard
7384
function TABLE.copy(org,depth)
@@ -86,7 +97,7 @@ end
8697
---Create a full copy of org, depth = how many layers will be recreate, default to inf
8798
---@generic T
8899
---@param org T original table
89-
---@param depth? number how many layers will be recreate, default to inf
100+
---@param depth? integer how many layers will be recreate, default to inf
90101
---@return T
91102
---@nodiscard
92103
function TABLE.copyAll(org,depth)
@@ -303,7 +314,7 @@ end
303314
---Update old table with new table (recursive when both table type and below specifiled depth)
304315
---@param new table
305316
---@param old table
306-
---@param depth? number how many layer will be entered, default to inf
317+
---@param depth? integer how many layer will be entered, default to inf
307318
function TABLE.update(old,new,depth)
308319
if not depth then depth=1e99 end
309320
for k,v in next,new do
@@ -506,7 +517,7 @@ end
506517
---]]
507518
---```
508519
---@param org table
509-
---@param depth? number how many layer will be entered and flattened, default to inf
520+
---@param depth? integer how many layer will be entered and flattened, default to inf
510521
function TABLE.flatten(org,depth)
511522
if not depth then depth=1e99 end
512523
while depth>0 do
@@ -534,8 +545,8 @@ end
534545
---Find value in [1~#], like string.find
535546
---@param t any[]
536547
---@param val any
537-
---@param start? number
538-
---@return number? key
548+
---@param start? integer
549+
---@return integer? key
539550
---@nodiscard
540551
function TABLE.find(t,val,start)
541552
for i=start or 1,#t do if t[i]==val then return i end end
@@ -544,7 +555,7 @@ end
544555
---TABLE.find for ordered list only, faster (binary search)
545556
---@param t any[]
546557
---@param val any
547-
---@return number | nil key
558+
---@return integer | nil key
548559
---@nodiscard
549560
function TABLE.findOrdered(t,val)
550561
if val<t[1] or val>t[#t] then return nil end
@@ -576,7 +587,7 @@ end
576587
---@param t any[]
577588
---@param v_old any
578589
---@param v_new any
579-
---@param start? number
590+
---@param start? integer
580591
function TABLE.replace(t,v_old,v_new,start)
581592
for i=start or 1,#t do
582593
if t[i]==v_old then
@@ -673,7 +684,7 @@ do -- function TABLE.dumpDeflate(t,depth)
673684
end
674685
---Dump a simple lua table (no whitespaces)
675686
---@param t table
676-
---@param depth? number how many layers will be dumped, default to inf
687+
---@param depth? integer how many layers will be dumped, default to inf
677688
---@return string
678689
---@nodiscard
679690
function TABLE.dumpDeflate(t,depth)
@@ -740,7 +751,7 @@ do -- function TABLE.dump(t,depth)
740751
end
741752
---Dump a simple lua table
742753
---@param t table
743-
---@param depth? number how many layers will be dumped, default to inf
754+
---@param depth? integer how many layers will be dumped, default to inf
744755
---@return string
745756
---@nodiscard
746757
function TABLE.dump(t,depth)
@@ -754,7 +765,7 @@ end
754765

755766
---Get element count of table
756767
---@param t table
757-
---@return number
768+
---@return integer
758769
---@nodiscard
759770
function TABLE.getSize(t)
760771
local size=0
@@ -765,7 +776,7 @@ end
765776
---Count value repeating time in [1~#]
766777
---@param t any[]
767778
---@param val any
768-
---@return number
779+
---@return integer
769780
---@nodiscard
770781
function TABLE.count(t,val)
771782
local count=0
@@ -780,7 +791,7 @@ end
780791
---Count value repeating time in whole table
781792
---@param t table
782793
---@param val any
783-
---@return number
794+
---@return integer
784795
---@nodiscard
785796
function TABLE.countAll(t,val)
786797
local count=0
@@ -810,7 +821,7 @@ end
810821
---Execute func(table[i],i) in [1~#]
811822
---@generic T
812823
---@param t T[]
813-
---@param f fun(v:T, i:number)
824+
---@param f fun(v:T, i:integer)
814825
---@param rev? boolean Reverse the order, allow removing elements from list
815826
function TABLE.foreach(t,f,rev)
816827
if rev then
@@ -826,7 +837,7 @@ end
826837

827838
---Execute func(table[k],k) for all elements in table (only allow removing elements with t[k]=nil)
828839
---@param t table
829-
---@param f fun(v:any, k:number)
840+
---@param f fun(v:any, k:any)
830841
function TABLE.foreachAll(t,f)
831842
for k,v in next,t do
832843
f(v,k)

0 commit comments

Comments
 (0)