Skip to content

Commit aeb2631

Browse files
committed
[JDK-8361254] Adapt JDK-8358179: Performance regression in Math.cbrt
PullRequest: graal/21316
2 parents cbc0b82 + cf75b68 commit aeb2631

File tree

2 files changed

+16
-48
lines changed

2 files changed

+16
-48
lines changed

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5898,6 +5898,10 @@ public final void ucomisd(Register dst, Register src) {
58985898
SSEOp.UCOMIS.emit(this, OperandSize.PD, dst, src);
58995899
}
59005900

5901+
public final void ucomisd(Register dst, AMD64Address src) {
5902+
SSEOp.UCOMIS.emit(this, OperandSize.PD, dst, src);
5903+
}
5904+
59015905
public final void ucomiss(Register dst, Register src) {
59025906
SSEOp.UCOMIS.emit(this, OperandSize.PS, dst, src);
59035907
}

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64MathCbrtOp.java

Lines changed: 12 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
import static jdk.vm.ci.amd64.AMD64.rax;
3434
import static jdk.vm.ci.amd64.AMD64.rcx;
3535
import static jdk.vm.ci.amd64.AMD64.rdx;
36-
import static jdk.vm.ci.amd64.AMD64.rsp;
3736
import static jdk.vm.ci.amd64.AMD64.xmm0;
3837
import static jdk.vm.ci.amd64.AMD64.xmm1;
3938
import static jdk.vm.ci.amd64.AMD64.xmm2;
@@ -73,8 +72,8 @@
7372
* </pre>
7473
*/
7574
// @formatter:off
76-
@SyncPort(from = "https://github.com/openjdk/jdk/blob/83cb0c6de5988de526545d0926c2c6ef60efc1c7/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp#L30-L364",
77-
sha1 = "1cf43819053aac54cbe343f9b8a8bfcc3e3dd6c8")
75+
@SyncPort(from = "https://github.com/openjdk/jdk/blob/38f59f84c98dfd974eec0c05541b2138b149def7/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp#L30-L339",
76+
sha1 = "ba7a498e0e5dd3aab7f6eacf50753b5e5999911e")
7877
// @formatter:on
7978
public final class AMD64MathCbrtOp extends AMD64MathIntrinsicUnaryOp {
8079

@@ -85,6 +84,10 @@ public AMD64MathCbrtOp() {
8584
/* XMM */ xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
8685
}
8786

87+
private static ArrayDataPointerConstant absMask = pointerConstant(16, new int[]{
88+
0xFFFFFFFF, 0x7FFFFFFF, 0x00000000, 0x00000000
89+
});
90+
8891
private static ArrayDataPointerConstant sigMask = pointerConstant(16, new int[]{
8992
0x00000000, 0x000fc000
9093
});
@@ -113,10 +116,6 @@ public AMD64MathCbrtOp() {
113116
0x00000000, 0x7ff00000
114117
});
115118

116-
private static ArrayDataPointerConstant negInf = pointerConstant(16, new int[]{
117-
0x00000000, 0xfff00000
118-
});
119-
120119
private static ArrayDataPointerConstant coeffTable = pointerConstant(16, new int[]{
121120
0x5c9cc8e7, 0xbf9036de, 0xd2b3183b, 0xbfa511e8,
122121
});
@@ -226,17 +225,17 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
226225
Label l2TAGPACKET001 = new Label();
227226
Label l2TAGPACKET101 = new Label();
228227
Label l2TAGPACKET201 = new Label();
229-
Label l2TAGPACKET301 = new Label();
230-
Label l2TAGPACKET401 = new Label();
231-
Label l2TAGPACKET501 = new Label();
232-
Label l2TAGPACKET601 = new Label();
233228
Label lB11 = new Label();
234229
Label lB12 = new Label();
235230
Label lB14 = new Label();
236231

237232
masm.bind(lB11);
238-
masm.subq(rsp, 24);
239-
masm.movsd(new AMD64Address(rsp), xmm0);
233+
masm.ucomisd(xmm0, recordExternalAddress(crb, zeron));
234+
masm.jcc(ConditionFlag.Equal, l2TAGPACKET101);
235+
masm.movq(xmm1, xmm0);
236+
masm.andpd(xmm1, recordExternalAddress(crb, absMask));
237+
masm.ucomisd(xmm1, recordExternalAddress(crb, inf));
238+
masm.jcc(ConditionFlag.Equal, lB14);
240239

241240
masm.bind(lB12);
242241
masm.movq(xmm7, xmm0);
@@ -255,8 +254,6 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
255254
masm.andl(rdx, rax);
256255
// Branch only if |x| is denormalized
257256
masm.cmplAndJcc(rdx, 0, ConditionFlag.Equal, l2TAGPACKET001, false);
258-
// Branch only if |x| is INF or NaN
259-
masm.cmplAndJcc(rdx, 0x7ff00, ConditionFlag.Equal, l2TAGPACKET101, false);
260257
masm.shrl(rdx, 8);
261258
masm.shrq(r9, 8);
262259
masm.andpd(xmm2, xmm0);
@@ -324,8 +321,6 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
324321
masm.andl(rdx, rax);
325322
masm.shrl(rdx, 8);
326323
masm.shrq(r9, 8);
327-
// Branch only if |x| is zero
328-
masm.cmplAndJcc(rdx, 0, ConditionFlag.Equal, l2TAGPACKET301, false);
329324
masm.andpd(xmm2, xmm0);
330325
masm.andpd(xmm0, xmm5);
331326
masm.orpd(xmm3, xmm2);
@@ -349,40 +344,9 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
349344
masm.psllq(xmm7, 52);
350345
masm.jmp(l2TAGPACKET201);
351346

352-
masm.bind(l2TAGPACKET301);
353-
// Branch only if x is negative zero
354-
masm.cmpqAndJcc(r9, 0, ConditionFlag.NotEqual, l2TAGPACKET401, false);
355-
masm.xorpd(xmm0, xmm0);
356-
masm.jmp(lB14);
357-
358-
masm.bind(l2TAGPACKET401);
359-
masm.movsd(xmm0, recordExternalAddress(crb, zeron));
360-
masm.jmp(lB14);
361-
362347
masm.bind(l2TAGPACKET101);
363-
masm.movl(rax, new AMD64Address(rsp, 4));
364-
masm.movl(rdx, new AMD64Address(rsp));
365-
masm.movl(rcx, rax);
366-
masm.andl(rcx, 0x7fffffff);
367-
// Branch only if |x| is NaN
368-
masm.cmplAndJcc(rcx, 0x7ff00000, ConditionFlag.Above, l2TAGPACKET501, false);
369-
// Branch only if |x| is NaN
370-
masm.cmplAndJcc(rdx, 0, ConditionFlag.NotEqual, l2TAGPACKET501, false);
371-
// Branch only if x is negative INF
372-
masm.cmplAndJcc(rax, 0x7ff00000, ConditionFlag.NotEqual, l2TAGPACKET601, false);
373-
masm.movsd(xmm0, recordExternalAddress(crb, inf));
374-
masm.jmp(lB14);
375-
376-
masm.bind(l2TAGPACKET601);
377-
masm.movsd(xmm0, recordExternalAddress(crb, negInf));
378-
masm.jmp(lB14);
379-
380-
masm.bind(l2TAGPACKET501);
381-
masm.movsd(xmm0, new AMD64Address(rsp));
382348
masm.addsd(xmm0, xmm0);
383-
masm.movq(new AMD64Address(rsp, 8), xmm0);
384349

385350
masm.bind(lB14);
386-
masm.addq(rsp, 24);
387351
}
388352
}

0 commit comments

Comments
 (0)