Skip to content

Commit 9be24ba

Browse files
committed
[GR-65890] Conversions between floats and unsigned integers, SSE/AVX/AVX512
PullRequest: graal/21300
2 parents e4889ae + 2b1c2b1 commit 9be24ba

13 files changed

+443
-74
lines changed

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,7 @@ public static class AMD64MIOp extends AMD64ImmOp {
596596
// @formatter:off
597597
public static final AMD64MIOp BT = new AMD64MIOp("BT", true, P_0F, 0xBA, 4, true, OpAssertion.WordOrLargerAssertion);
598598
public static final AMD64MIOp BTR = new AMD64MIOp("BTR", true, P_0F, 0xBA, 6, true, OpAssertion.WordOrLargerAssertion);
599+
public static final AMD64MIOp BTS = new AMD64MIOp("BTS", true, P_0F, 0xBA, 5, true, OpAssertion.WordOrLargerAssertion);
599600
public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true, 0xC6, 0, false, OpAssertion.ByteAssertion);
600601
public static final AMD64MIOp MOV = new AMD64MIOp("MOV", false, 0xC7, 0, false, OpAssertion.WordOrLargerAssertion);
601602
public static final AMD64MIOp SAR = new AMD64MIOp("SAR", true, 0xC1, 7, true, OpAssertion.WordOrLargerAssertion);
@@ -679,8 +680,9 @@ public boolean isMemRead() {
679680
* <p>
680681
* Note that when {@code src} is a memory address, we will choose {@code dst} as {@code nds}
681682
* even if {@link PreferredNDS#SRC} is specified, which implies an implicit dependency to
682-
* {@code dst}. In {@link jdk.graal.compiler.lir.amd64.vector.AMD64VectorUnary.AVXConvertOp}, we
683-
* manually insert an {@code XOR} instruction for {@code dst}.
683+
* {@code dst}. In
684+
* {@link jdk.graal.compiler.lir.amd64.vector.AMD64VectorUnary.AVXConvertToFloatOp}, we manually
685+
* insert an {@code XOR} instruction for {@code dst}.
684686
*/
685687
private enum PreferredNDS {
686688
NONE,
@@ -1316,9 +1318,11 @@ private enum VEXOpAssertion {
13161318
XMM_CPU_AVX1_AVX512BW_128ONLY(VEXFeatureAssertion.AVX1_128, EVEXFeatureAssertion.AVX512BW_128, XMM, null, CPU),
13171319
XMM_CPU_AVX1_AVX512DQ_128ONLY(VEXFeatureAssertion.AVX1_128, EVEXFeatureAssertion.AVX512DQ_128, XMM, null, CPU),
13181320
CPU_XMM_AVX1_AVX512F_128ONLY(VEXFeatureAssertion.AVX1_128, EVEXFeatureAssertion.AVX512F_128, CPU, null, XMM),
1321+
CPU_XMM_AVX512F_128ONLY(null, EVEXFeatureAssertion.AVX512F_128, CPU, null, XMM),
13191322
XMM_XMM_CPU_AVX1_AVX512F_128ONLY(VEXFeatureAssertion.AVX1_128, EVEXFeatureAssertion.AVX512F_128, XMM, XMM, CPU),
13201323
XMM_XMM_CPU_AVX1_AVX512BW_128ONLY(VEXFeatureAssertion.AVX1_128, EVEXFeatureAssertion.AVX512BW_128, XMM, XMM, CPU),
13211324
XMM_XMM_CPU_AVX1_AVX512DQ_128ONLY(VEXFeatureAssertion.AVX1_128, EVEXFeatureAssertion.AVX512DQ_128, XMM, XMM, CPU),
1325+
XMM_XMM_CPU_AVX512F_128ONLY(null, EVEXFeatureAssertion.AVX512F_128, XMM, XMM, CPU),
13221326
XMM_CPU_AVX512BW_VL(null, EVEXFeatureAssertion.AVX512F_BW_VL, XMM, null, CPU),
13231327
XMM_CPU_AVX512F_VL(null, EVEXFeatureAssertion.AVX512F_VL, XMM, null, CPU),
13241328
AVX1_AVX512F_VL(VEXFeatureAssertion.AVX1, EVEXFeatureAssertion.AVX512F_VL, XMM, XMM, XMM),
@@ -1680,8 +1684,12 @@ public static class VexRMOp extends VexRROp {
16801684
// EVEX encoded instructions
16811685
public static final VexRMOp EVCVTTSS2SI = new VexRMOp("EVCVTTSS2SI", VCVTTSS2SI);
16821686
public static final VexRMOp EVCVTTSS2SQ = new VexRMOp("EVCVTTSS2SQ", VCVTTSS2SQ);
1687+
public static final VexRMOp EVCVTTSS2USI = new VexRMOp("EVCVTTSS2USI", VEXPrefixConfig.P_F3, VEXPrefixConfig.M_0F, VEXPrefixConfig.W0, 0x78, VEXOpAssertion.CPU_XMM_AVX512F_128ONLY, EVEXTuple.T1F_32BIT, VEXPrefixConfig.W0, true);
1688+
public static final VexRMOp EVCVTTSS2USQ = new VexRMOp("EVCVTTSS2USQ", VEXPrefixConfig.P_F3, VEXPrefixConfig.M_0F, VEXPrefixConfig.W1, 0x78, VEXOpAssertion.CPU_XMM_AVX512F_128ONLY, EVEXTuple.T1F_32BIT, VEXPrefixConfig.W1, true);
16831689
public static final VexRMOp EVCVTTSD2SI = new VexRMOp("EVCVTTSD2SI", VCVTTSD2SI);
16841690
public static final VexRMOp EVCVTTSD2SQ = new VexRMOp("EVCVTTSD2SQ", VCVTTSD2SQ);
1691+
public static final VexRMOp EVCVTTSD2USI = new VexRMOp("EVCVTTSD2USI", VEXPrefixConfig.P_F2, VEXPrefixConfig.M_0F, VEXPrefixConfig.W0, 0x78, VEXOpAssertion.CPU_XMM_AVX512F_128ONLY, EVEXTuple.T1F_64BIT, VEXPrefixConfig.W0, true);
1692+
public static final VexRMOp EVCVTTSD2USQ = new VexRMOp("EVCVTTSD2USQ", VEXPrefixConfig.P_F2, VEXPrefixConfig.M_0F, VEXPrefixConfig.W1, 0x78, VEXOpAssertion.CPU_XMM_AVX512F_128ONLY, EVEXTuple.T1F_64BIT, VEXPrefixConfig.W1, true);
16851693
public static final VexRMOp EVCVTPS2PD = new VexRMOp("EVCVTPS2PD", VCVTPS2PD);
16861694
public static final VexRMOp EVCVTPD2PS = new VexRMOp("EVCVTPD2PS", VCVTPD2PS);
16871695
public static final VexRMOp EVCVTDQ2PS = new VexRMOp("EVCVTDQ2PS", VCVTDQ2PS);
@@ -2588,12 +2596,21 @@ public static final class VexRVMConvertOp extends VexRVMOp {
25882596
public static final VexRVMConvertOp EVCVTSQ2SD = new VexRVMConvertOp("EVCVTSQ2SD", VCVTSQ2SD);
25892597
public static final VexRVMConvertOp EVCVTSI2SS = new VexRVMConvertOp("EVCVTSI2SS", VCVTSI2SS);
25902598
public static final VexRVMConvertOp EVCVTSQ2SS = new VexRVMConvertOp("EVCVTSQ2SS", VCVTSQ2SS);
2599+
2600+
public static final VexRVMConvertOp EVCVTUSI2SD = new VexRVMConvertOp("EVCVTUSI2SD", VEXPrefixConfig.P_F2, VEXPrefixConfig.M_0F, VEXPrefixConfig.W0, 0x7B, VEXOpAssertion.XMM_XMM_CPU_AVX512F_128ONLY, EVEXTuple.T1S_32BIT, VEXPrefixConfig.W0, true);
2601+
public static final VexRVMConvertOp EVCVTUSQ2SD = new VexRVMConvertOp("EVCVTUSQ2SD", VEXPrefixConfig.P_F2, VEXPrefixConfig.M_0F, VEXPrefixConfig.W0, 0x7B, VEXOpAssertion.XMM_XMM_CPU_AVX512F_128ONLY, EVEXTuple.T1S_64BIT, VEXPrefixConfig.W1, true);
2602+
public static final VexRVMConvertOp EVCVTUSI2SS = new VexRVMConvertOp("EVCVTUSI2SS", VEXPrefixConfig.P_F3, VEXPrefixConfig.M_0F, VEXPrefixConfig.W0, 0x7B, VEXOpAssertion.XMM_XMM_CPU_AVX512F_128ONLY, EVEXTuple.T1S_32BIT, VEXPrefixConfig.W0, true);
2603+
public static final VexRVMConvertOp EVCVTUSQ2SS = new VexRVMConvertOp("EVCVTUSQ2SS", VEXPrefixConfig.P_F3, VEXPrefixConfig.M_0F, VEXPrefixConfig.W0, 0x7B, VEXOpAssertion.XMM_XMM_CPU_AVX512F_128ONLY, EVEXTuple.T1S_64BIT, VEXPrefixConfig.W1, true);
25912604
// @formatter:on
25922605

25932606
private VexRVMConvertOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) {
25942607
super(opcode, pp, mmmmm, w, op, assertion, evexTuple, wEvex);
25952608
}
25962609

2610+
private VexRVMConvertOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex, boolean isEvex) {
2611+
super(opcode, pp, mmmmm, w, op, assertion, evexTuple, wEvex, isEvex);
2612+
}
2613+
25972614
/**
25982615
* Build the EVEX variant of a given vexOp.
25992616
*/
@@ -4863,6 +4880,10 @@ public final void btrq(Register src, int imm8) {
48634880
AMD64MIOp.BTR.emit(this, OperandSize.QWORD, src, imm8);
48644881
}
48654882

4883+
public final void btsq(Register src, int imm8) {
4884+
AMD64MIOp.BTS.emit(this, OperandSize.QWORD, src, imm8);
4885+
}
4886+
48664887
public final void cmpb(Register dst, Register src) {
48674888
AMD64BinaryArithmetic.CMP.byteRmOp.emit(this, OperandSize.BYTE, dst, src);
48684889
}
@@ -5874,6 +5895,14 @@ public final void subsd(Register dst, AMD64Address src) {
58745895
SSEOp.SUB.emit(this, OperandSize.SD, dst, src);
58755896
}
58765897

5898+
public final void subss(Register dst, Register src) {
5899+
SSEOp.SUB.emit(this, OperandSize.SS, dst, src);
5900+
}
5901+
5902+
public final void subss(Register dst, AMD64Address src) {
5903+
SSEOp.SUB.emit(this, OperandSize.SS, dst, src);
5904+
}
5905+
58775906
public final void testl(Register dst, Register src) {
58785907
AMD64RMOp.TEST.emit(this, OperandSize.DWORD, dst, src);
58795908
}

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/core/aarch64/AArch64LoweringProviderMixin.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,12 @@ default boolean supportsFoldingExtendIntoAccess(ExtendableMemoryAccess access, M
9090
}
9191

9292
@Override
93-
default boolean supportsUnsignedFloatConvert() {
93+
default boolean supportsFloatToUnsignedConvert() {
94+
return true;
95+
}
96+
97+
@Override
98+
default boolean supportsUnsignedToFloatConvert() {
9499
return true;
95100
}
96101
}

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/core/amd64/AMD64ArithmeticLIRGenerator.java

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -867,10 +867,16 @@ private AllocatableValue emitConvertOp(LIRKind kind, AMD64MROp op, OperandSize s
867867
* Emit a floating point to integer conversion that needs fixup code to adjust the result to
868868
* Java semantics.
869869
*/
870-
private AllocatableValue emitFloatConvertWithFixup(LIRKind kind, AMD64RMOp op, OperandSize size, Value input, boolean canBeNaN, boolean canOverflow) {
870+
private AllocatableValue emitFloatConvertWithFixup(LIRKind kind, AMD64RMOp op, OperandSize size, Value input, boolean canBeNaN, boolean canOverflow, NumUtil.Signedness signedness) {
871871
Variable result = getLIRGen().newVariable(kind);
872872
AMD64ConvertFloatToIntegerOp.OpcodeEmitter emitter = (crb, masm, dst, src) -> op.emit(masm, size, dst, src);
873-
getLIRGen().append(new AMD64ConvertFloatToIntegerOp(getLIRGen(), emitter, result, input, canBeNaN, canOverflow));
873+
getLIRGen().append(new AMD64ConvertFloatToIntegerOp(getLIRGen(), emitter, result, input, canBeNaN, canOverflow, signedness));
874+
return result;
875+
}
876+
877+
private AllocatableValue emitIntegerToFloatConvertWithFixup(LIRKind kind, SSEOp op, OperandSize size, Value input, NumUtil.Signedness signedness) {
878+
Variable result = getLIRGen().newVariable(kind);
879+
getLIRGen().append(new AMD64VectorUnary.SSEConvertToFloatOp(op, result, asAllocatable(input), size, signedness));
874880
return result;
875881
}
876882

@@ -936,13 +942,17 @@ public Value emitFloatConvert(FloatConvert op, Value input, boolean canBeNaN, bo
936942
if (op.getCategory().equals(FloatConvertCategory.FloatingPointToInteger)) {
937943
switch (op) {
938944
case D2I:
939-
return emitFloatConvertWithFixup(LIRKind.combine(input).changeType(AMD64Kind.DWORD), SSEOp.CVTTSD2SI, DWORD, input, canBeNaN, canOverflow);
945+
case D2UI:
946+
return emitFloatConvertWithFixup(LIRKind.combine(input).changeType(AMD64Kind.DWORD), SSEOp.CVTTSD2SI, DWORD, input, canBeNaN, canOverflow, op.signedness());
940947
case D2L:
941-
return emitFloatConvertWithFixup(LIRKind.combine(input).changeType(AMD64Kind.QWORD), SSEOp.CVTTSD2SI, QWORD, input, canBeNaN, canOverflow);
948+
case D2UL:
949+
return emitFloatConvertWithFixup(LIRKind.combine(input).changeType(AMD64Kind.QWORD), SSEOp.CVTTSD2SI, QWORD, input, canBeNaN, canOverflow, op.signedness());
942950
case F2I:
943-
return emitFloatConvertWithFixup(LIRKind.combine(input).changeType(AMD64Kind.DWORD), SSEOp.CVTTSS2SI, DWORD, input, canBeNaN, canOverflow);
951+
case F2UI:
952+
return emitFloatConvertWithFixup(LIRKind.combine(input).changeType(AMD64Kind.DWORD), SSEOp.CVTTSS2SI, DWORD, input, canBeNaN, canOverflow, op.signedness());
944953
case F2L:
945-
return emitFloatConvertWithFixup(LIRKind.combine(input).changeType(AMD64Kind.QWORD), SSEOp.CVTTSS2SI, QWORD, input, canBeNaN, canOverflow);
954+
case F2UL:
955+
return emitFloatConvertWithFixup(LIRKind.combine(input).changeType(AMD64Kind.QWORD), SSEOp.CVTTSS2SI, QWORD, input, canBeNaN, canOverflow, op.signedness());
946956
default:
947957
throw GraalError.shouldNotReachHereUnexpectedValue(op); // ExcludeFromJacocoGeneratedReport
948958
}
@@ -953,13 +963,13 @@ public Value emitFloatConvert(FloatConvert op, Value input, boolean canBeNaN, bo
953963
case F2D:
954964
return emitConvertOp(LIRKind.combine(input).changeType(AMD64Kind.DOUBLE), SSEOp.CVTSS2SD, SS, input);
955965
case I2D:
956-
return emitConvertOp(LIRKind.combine(input).changeType(AMD64Kind.DOUBLE), SSEOp.CVTSI2SD, DWORD, input);
966+
return emitIntegerToFloatConvertWithFixup(LIRKind.combine(input).changeType(AMD64Kind.DOUBLE), SSEOp.CVTSI2SD, DWORD, input, op.signedness());
957967
case I2F:
958-
return emitConvertOp(LIRKind.combine(input).changeType(AMD64Kind.SINGLE), SSEOp.CVTSI2SS, DWORD, input);
968+
return emitIntegerToFloatConvertWithFixup(LIRKind.combine(input).changeType(AMD64Kind.SINGLE), SSEOp.CVTSI2SS, DWORD, input, op.signedness());
959969
case L2D:
960-
return emitConvertOp(LIRKind.combine(input).changeType(AMD64Kind.DOUBLE), SSEOp.CVTSI2SD, QWORD, input);
970+
return emitIntegerToFloatConvertWithFixup(LIRKind.combine(input).changeType(AMD64Kind.DOUBLE), SSEOp.CVTSI2SD, QWORD, input, op.signedness());
961971
case L2F:
962-
return emitConvertOp(LIRKind.combine(input).changeType(AMD64Kind.SINGLE), SSEOp.CVTSI2SS, QWORD, input);
972+
return emitIntegerToFloatConvertWithFixup(LIRKind.combine(input).changeType(AMD64Kind.SINGLE), SSEOp.CVTSI2SS, QWORD, input, op.signedness());
963973
default:
964974
throw GraalError.shouldNotReachHereUnexpectedValue(op); // ExcludeFromJacocoGeneratedReport
965975
}

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/core/amd64/AMD64LoweringProviderMixin.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,11 @@
2525

2626
package jdk.graal.compiler.core.amd64;
2727

28+
import jdk.graal.compiler.asm.amd64.AMD64Assembler;
2829
import jdk.graal.compiler.core.common.memory.MemoryExtendKind;
2930
import jdk.graal.compiler.nodes.memory.ExtendableMemoryAccess;
3031
import jdk.graal.compiler.nodes.spi.LoweringProvider;
32+
import jdk.vm.ci.amd64.AMD64;
3133

3234
public interface AMD64LoweringProviderMixin extends LoweringProvider {
3335

@@ -65,4 +67,18 @@ default boolean supportsFoldingExtendIntoAccess(ExtendableMemoryAccess access, M
6567
return false;
6668
}
6769

70+
@Override
71+
default boolean supportsFloatToUnsignedConvert() {
72+
return true;
73+
}
74+
75+
@Override
76+
default boolean supportsUnsignedToFloatConvert() {
77+
/*
78+
* Use AVX-512 conversion instructions if available. Otherwise, don't bother with
79+
* hand-written assembly intrinsics, which won't beat the pure Java implementation.
80+
*/
81+
AMD64 amd64 = (AMD64) getTarget().arch;
82+
return AMD64Assembler.supportsFullAVX512(amd64.getFeatures());
83+
}
6884
}

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/core/common/calc/FloatConvert.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
*/
2525
package jdk.graal.compiler.core.common.calc;
2626

27+
import jdk.graal.compiler.core.common.NumUtil.Signedness;
2728
import jdk.graal.compiler.core.common.type.FloatStamp;
2829
import jdk.graal.compiler.core.common.type.IntegerStamp;
2930
import jdk.graal.compiler.core.common.type.PrimitiveStamp;
@@ -131,4 +132,28 @@ public static FloatConvert forStamps(Stamp from, Stamp to) {
131132
}
132133
return null;
133134
}
135+
136+
public Signedness signedness() {
137+
return switch (this) {
138+
case D2F -> Signedness.SIGNED;
139+
case D2I -> Signedness.SIGNED;
140+
case D2L -> Signedness.SIGNED;
141+
case F2D -> Signedness.SIGNED;
142+
case F2I -> Signedness.SIGNED;
143+
case F2L -> Signedness.SIGNED;
144+
case I2D -> Signedness.SIGNED;
145+
case I2F -> Signedness.SIGNED;
146+
case L2D -> Signedness.SIGNED;
147+
case L2F -> Signedness.SIGNED;
148+
case F2UI -> Signedness.UNSIGNED;
149+
case D2UI -> Signedness.UNSIGNED;
150+
case F2UL -> Signedness.UNSIGNED;
151+
case D2UL -> Signedness.UNSIGNED;
152+
case UI2F -> Signedness.UNSIGNED;
153+
case UL2F -> Signedness.UNSIGNED;
154+
case UI2D -> Signedness.UNSIGNED;
155+
case UL2D -> Signedness.UNSIGNED;
156+
default -> throw GraalError.shouldNotReachHereUnexpectedValue(this); // ExcludeFromJacocoGeneratedReport
157+
};
158+
}
134159
}

0 commit comments

Comments
 (0)