Skip to content

Commit 82a0957

Browse files
committed
SSE2 CRC Implementation
1 parent d6e7f28 commit 82a0957

9 files changed

Lines changed: 1253 additions & 280 deletions

File tree

HashLib/src/Checksum/HlpCRC.pas

Lines changed: 29 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ interface
1818
HlpHashResult,
1919
HlpIHashResult,
2020
HlpICRC,
21-
HlpGF2;
21+
HlpCRCDispatch;
2222

2323
resourcestring
2424
SUnSupportedCRCType = 'UnSupported CRC Type: "%s"';
@@ -577,7 +577,7 @@ TCRC = class sealed(THash, IChecksum, ICRC, ITransformBlock)
577577
type
578578
TCRCCacheValue = record
579579
Table: THashLibMatrixUInt64Array;
580-
FoldConstants: TCRCFoldConstants;
580+
FoldRuntime: TCRCFoldRuntimeCtx64;
581581
end;
582582

583583
class var
@@ -622,11 +622,10 @@ TCRCCacheValue = record
622622
function GetCheckValue: UInt64; inline;
623623
procedure SetCheckValue(AValue: UInt64); inline;
624624

625-
// tables work only for CRCs with width > 7
626-
procedure CalculateCRCbyTable(AData: PByte; ADataLength, AIndex: Int32);
627-
// fast bit by bit algorithm without augmented zero bytes.
628-
// does not use lookup table, suited for polynomial orders between 1...32.
629-
procedure CalculateCRCdirect(AData: PByte; ADataLength, AIndex: Int32);
625+
// Table-driven byte path: length < MinSimdBytes or tail after fold (no 16-byte block).
626+
procedure UpdateCRCViaByteTable(AData: PByte; ADataLength, AIndex: Int32);
627+
// Bit-serial update without table (width <= MinTableWidth).
628+
procedure UpdateCRCViaBitSerial(AData: PByte; ADataLength, AIndex: Int32);
630629

631630
// reflects the lower 'width' LBits of 'value'
632631
class function Reflect(AValue: UInt64; AWidth: Int32): UInt64; static;
@@ -664,9 +663,6 @@ TCRCCacheValue = record
664663

665664
implementation
666665

667-
uses
668-
HlpCRCDispatch;
669-
670666
{ TCRC }
671667

672668
function TCRC.GetCheckValue: UInt64;
@@ -754,14 +750,12 @@ function TCRC.GetName: String;
754750
Result := Format('T%s', [Names[0]]);
755751
end;
756752

757-
procedure TCRC.CalculateCRCbyTable(AData: PByte; ADataLength, AIndex: Int32);
753+
procedure TCRC.UpdateCRCViaByteTable(AData: PByte; ADataLength, AIndex: Int32);
758754
var
759755
LLength: Int32;
760-
LTemp, LQWord1, LQWord2, LNewTemp, LTempCopy: UInt64;
756+
LTemp: UInt64;
761757
LCRCTable: THashLibMatrixUInt64Array;
762758
LPtrData: PByte;
763-
LBIdx, LCrcBytes: Int32;
764-
LByte: Byte;
765759
begin
766760
LLength := ADataLength;
767761
LPtrData := AData + AIndex;
@@ -770,34 +764,6 @@ procedure TCRC.CalculateCRCbyTable(AData: PByte; ADataLength, AIndex: Int32);
770764

771765
if IsInputReflected then
772766
begin
773-
// Slicing-by-16: process 16 bytes per iteration using UInt64 reads
774-
while LLength >= 16 do
775-
begin
776-
LQWord1 := PUInt64(LPtrData)^ xor LTemp;
777-
LQWord2 := PUInt64(LPtrData + 8)^;
778-
779-
LTemp := LCRCTable[15][Byte(LQWord1)]
780-
xor LCRCTable[14][Byte(LQWord1 shr 8)]
781-
xor LCRCTable[13][Byte(LQWord1 shr 16)]
782-
xor LCRCTable[12][Byte(LQWord1 shr 24)]
783-
xor LCRCTable[11][Byte(LQWord1 shr 32)]
784-
xor LCRCTable[10][Byte(LQWord1 shr 40)]
785-
xor LCRCTable[9][Byte(LQWord1 shr 48)]
786-
xor LCRCTable[8][Byte(LQWord1 shr 56)]
787-
xor LCRCTable[7][Byte(LQWord2)]
788-
xor LCRCTable[6][Byte(LQWord2 shr 8)]
789-
xor LCRCTable[5][Byte(LQWord2 shr 16)]
790-
xor LCRCTable[4][Byte(LQWord2 shr 24)]
791-
xor LCRCTable[3][Byte(LQWord2 shr 32)]
792-
xor LCRCTable[2][Byte(LQWord2 shr 40)]
793-
xor LCRCTable[1][Byte(LQWord2 shr 48)]
794-
xor LCRCTable[0][Byte(LQWord2 shr 56)];
795-
796-
System.Inc(LPtrData, 16);
797-
System.Dec(LLength, 16);
798-
end;
799-
800-
// Remaining 1..15 bytes: byte-at-a-time using row 0
801767
while LLength > 0 do
802768
begin
803769
LTemp := (LTemp shr 8) xor LCRCTable[0][Byte(LTemp xor LPtrData^)];
@@ -807,34 +773,6 @@ procedure TCRC.CalculateCRCbyTable(AData: PByte; ADataLength, AIndex: Int32);
807773
end
808774
else
809775
begin
810-
// Non-reflected: slicing-by-16 with byte reads
811-
LCrcBytes := (Width + 7) shr 3;
812-
813-
while LLength >= 16 do
814-
begin
815-
LNewTemp := UInt64(0);
816-
LTempCopy := LTemp;
817-
818-
LBIdx := 0;
819-
while LBIdx < LCrcBytes do
820-
begin
821-
LByte := LPtrData[LBIdx] xor Byte(LTempCopy shr (Width - 8));
822-
LTempCopy := (LTempCopy shl 8) and FCRCMask;
823-
LNewTemp := LNewTemp xor LCRCTable[15 - LBIdx][LByte];
824-
System.Inc(LBIdx);
825-
end;
826-
while LBIdx < 16 do
827-
begin
828-
LNewTemp := LNewTemp xor LCRCTable[15 - LBIdx][LPtrData[LBIdx]];
829-
System.Inc(LBIdx);
830-
end;
831-
832-
LTemp := LNewTemp;
833-
System.Inc(LPtrData, 16);
834-
System.Dec(LLength, 16);
835-
end;
836-
837-
// Remaining 1..15 bytes: byte-at-a-time using row 0
838776
while LLength > 0 do
839777
begin
840778
LTemp := (LTemp shl 8) xor LCRCTable[0]
@@ -847,39 +785,10 @@ procedure TCRC.CalculateCRCbyTable(AData: PByte; ADataLength, AIndex: Int32);
847785
FHash := LTemp;
848786
end;
849787

850-
procedure TCRC.CalculateCRCdirect(AData: PByte; ADataLength, AIndex: Int32);
851-
var
852-
LLength, LIdx: Int32;
853-
LTemp, LBit, LJdx, LHash: UInt64;
788+
procedure TCRC.UpdateCRCViaBitSerial(AData: PByte; ADataLength, AIndex: Int32);
854789
begin
855-
856-
LLength := ADataLength;
857-
LIdx := AIndex;
858-
while LLength > 0 do
859-
begin
860-
LTemp := UInt64(AData[LIdx]);
861-
if (IsInputReflected) then
862-
begin
863-
LTemp := Reflect(LTemp, 8);
864-
end;
865-
866-
LJdx := $80;
867-
LHash := FHash;
868-
while LJdx > 0 do
869-
begin
870-
LBit := LHash and FCRCHighBitMask;
871-
LHash := LHash shl 1;
872-
if ((LTemp and LJdx) > 0) then
873-
LBit := LBit xor FCRCHighBitMask;
874-
if (LBit > 0) then
875-
LHash := LHash xor Polynomial;
876-
LJdx := LJdx shr 1;
877-
end;
878-
FHash := LHash;
879-
System.Inc(LIdx);
880-
System.Dec(LLength);
881-
end;
882-
790+
CRC_UpdateViaBitSerial(AData, ADataLength, AIndex, FHash, Polynomial, Width,
791+
IsInputReflected, FCRCHighBitMask);
883792
end;
884793

885794
function TCRC.Clone(): IHash;
@@ -1484,8 +1393,8 @@ class function TCRC.GetOrCreateCacheEntry(APoly: UInt64; AWidth: Int32;
14841393
if not FCache.TryGetValue(LKey, Result) then
14851394
begin
14861395
Result.Table := GenerateCRCTable(APoly, AWidth, AReflected);
1487-
TGF2.GenerateFoldConstants(APoly, AWidth, AReflected,
1488-
Result.FoldConstants);
1396+
CRCDispatch_InitRuntimeCtx64(Result.Table, APoly, AWidth, AReflected,
1397+
Result.FoldRuntime);
14891398
FCache.Add(LKey, Result);
14901399
end;
14911400
end;
@@ -1543,30 +1452,30 @@ procedure TCRC.TransformBytes(const AData: THashLibByteArray;
15431452
else
15441453
begin
15451454
LFoldFunc := CRC_Fold_Msb;
1546-
if Width < 64 then
1547-
LState[0] := FHash shl (64 - Width)
1455+
if CRC_Fold_UsesPclmul then
1456+
begin
1457+
if Width < 64 then
1458+
LState[0] := FHash shl (64 - Width)
1459+
else
1460+
LState[0] := FHash;
1461+
end
15481462
else
15491463
LState[0] := FHash;
15501464
end;
15511465

1552-
if Assigned(LFoldFunc) then
1553-
begin
1554-
LState[1] := 0;
1555-
LProcessed := ALength and (not Int32(15));
1556-
FHash := LFoldFunc(LPtrAData + AIndex, UInt32(LProcessed),
1557-
@LState[0], @FCacheEntry.FoldConstants) and FCRCMask;
1558-
LTail := ALength - LProcessed;
1559-
if LTail > 0 then
1560-
CalculateCRCbyTable(LPtrAData, LTail, AIndex + LProcessed);
1561-
end
1562-
else
1563-
CalculateCRCbyTable(LPtrAData, ALength, AIndex);
1466+
LState[1] := 0;
1467+
LProcessed := ALength and (not Int32(15));
1468+
FHash := LFoldFunc(LPtrAData + AIndex, UInt32(LProcessed), @LState[0],
1469+
@FCacheEntry.FoldRuntime) and FCRCMask;
1470+
LTail := ALength - LProcessed;
1471+
if LTail > 0 then
1472+
UpdateCRCViaByteTable(LPtrAData, LTail, AIndex + LProcessed);
15641473
end
15651474
else
1566-
CalculateCRCbyTable(LPtrAData, ALength, AIndex);
1475+
UpdateCRCViaByteTable(LPtrAData, ALength, AIndex);
15671476
end
15681477
else
1569-
CalculateCRCdirect(LPtrAData, ALength, AIndex);
1478+
UpdateCRCViaBitSerial(LPtrAData, ALength, AIndex);
15701479
end;
15711480

15721481
function TCRC.TransformFinal: IHashResult;

0 commit comments

Comments
 (0)