Skip to content

Commit 0f1f418

Browse files
committed
[GR-64752] Remove non-macro-fused patterns.
PullRequest: graal/20741
2 parents 974bb00 + 8a42486 commit 0f1f418

File tree

13 files changed

+241
-174
lines changed

13 files changed

+241
-174
lines changed

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6080,6 +6080,10 @@ public final void xorl(Register dst, Register src) {
60806080
AMD64BinaryArithmetic.XOR.rmOp.emit(this, OperandSize.DWORD, dst, src);
60816081
}
60826082

6083+
public final void xorl(Register dst, AMD64Address src) {
6084+
AMD64BinaryArithmetic.XOR.rmOp.emit(this, OperandSize.DWORD, dst, src);
6085+
}
6086+
60836087
public final void xorl(Register dst, int imm32) {
60846088
AMD64BinaryArithmetic.XOR.getMIOpcode(OperandSize.DWORD, isByte(imm32)).emit(this, OperandSize.DWORD, dst, imm32);
60856089
}
@@ -6112,6 +6116,10 @@ public final void xorq(Register dst, Register src) {
61126116
AMD64BinaryArithmetic.XOR.rmOp.emit(this, OperandSize.QWORD, dst, src);
61136117
}
61146118

6119+
public final void xorq(Register dst, AMD64Address src) {
6120+
AMD64BinaryArithmetic.XOR.rmOp.emit(this, OperandSize.QWORD, dst, src);
6121+
}
6122+
61156123
// Mask register related instructions
61166124

61176125
public final void kmovb(Register dst, Register src) {

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64MacroAssembler.java

Lines changed: 85 additions & 81 deletions
Large diffs are not rendered by default.

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64ArrayRegionCompareToOp.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,8 @@ private void emitVectorLoop(CompilationResultBuilder crb, AMD64MacroAssembler ma
275275
// convert result to bitmask
276276
masm.pmovmsk(vectorSize, tmp1, vector1);
277277
// invert bit mask. if the result is non-zero, compared regions are not equal
278-
masm.xorlAndJcc(tmp1, vectorSize == XMM ? ONES_16 : ONES_32, ConditionFlag.NotZero, diffFound, true);
278+
masm.xorl(tmp1, vectorSize == XMM ? ONES_16 : ONES_32);
279+
masm.jccb(ConditionFlag.NotZero, diffFound);
279280
// regions are equal, continue the loop
280281
masm.addqAndJcc(length, elementsPerVector, ConditionFlag.NotZero, loop, true);
281282

@@ -289,7 +290,8 @@ private void emitVectorLoop(CompilationResultBuilder crb, AMD64MacroAssembler ma
289290
masm.leaq(length, new AMD64Address(length, result, Stride.S1, -elementsPerVector));
290291
masm.pcmpeq(vectorSize, maxStride, vector1, vector2);
291292
masm.pmovmsk(vectorSize, tmp1, vector1);
292-
masm.xorlAndJcc(tmp1, vectorSize == XMM ? ONES_16 : ONES_32, ConditionFlag.NotZero, diffFound, true);
293+
masm.xorl(tmp1, vectorSize == XMM ? ONES_16 : ONES_32);
294+
masm.jccb(ConditionFlag.NotZero, diffFound);
293295
// all elements are equal, return 0
294296
masm.xorq(result, result);
295297
masm.jmp(returnLabel);
@@ -360,7 +362,8 @@ private void emitVectorizedTail(AMD64MacroAssembler masm, Stride strideA, Stride
360362
}
361363
masm.pcmpeq(cmpSize, maxStride, vector1, vector2);
362364
masm.pmovmsk(cmpSize, result, vector1);
363-
masm.xorlAndJcc(result, cmpSize == XMM ? ONES_16 : ONES_32, ConditionFlag.Zero, returnLabel, false);
365+
masm.xorl(result, cmpSize == XMM ? ONES_16 : ONES_32);
366+
masm.jcc(ConditionFlag.Zero, returnLabel);
364367

365368
bsfq(masm, tmp2, result);
366369
if (maxStride.value > 1) {

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64BigIntegerMulAddOp.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,8 @@ static void mulAdd128X32Loop(AMD64MacroAssembler masm, Register out, Register in
183183
masm.shrl(tmp1, 2);
184184

185185
masm.bind(lFirstLoop);
186-
masm.sublAndJcc(tmp1, 1, ConditionFlag.Negative, lFirstLoopExit, true);
186+
masm.decl(tmp1);
187+
masm.jccb(ConditionFlag.Negative, lFirstLoopExit);
187188

188189
masm.subl(len, 4);
189190
masm.subl(offset, 4);
@@ -255,8 +256,10 @@ static void mulAdd(AMD64MacroAssembler masm, Register out, Register in, Register
255256
mulAdd128X32Loop(masm, out, in, offs, len, tmp1, tmp2, tmp3, tmp4, tmp5, rdxReg, raxReg);
256257

257258
// Multiply the trailing in[] entry using 64 bit by 32 bit, if any
258-
masm.declAndJcc(len, ConditionFlag.Negative, lCarry, true);
259-
masm.declAndJcc(len, ConditionFlag.Negative, lLastIn, true);
259+
masm.decl(len);
260+
masm.jccb(ConditionFlag.Negative, lCarry);
261+
masm.decl(len);
262+
masm.jccb(ConditionFlag.Negative, lLastIn);
260263

261264
masm.movq(op1, new AMD64Address(in, len, Stride.S4, 0));
262265
masm.rorq(op1, 32);

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64BigIntegerMultiplyToLenOp.java

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -183,14 +183,17 @@ private static void multiply64x64Loop(AMD64MacroAssembler masm,
183183
Label labelOneY = new Label();
184184
Label labelMultiply = new Label();
185185

186-
masm.declAndJcc(xstart, ConditionFlag.Negative, labelOneX, false);
186+
masm.decl(xstart);
187+
masm.jcc(ConditionFlag.Negative, labelOneX);
187188

188189
masm.movq(xAtXstart, new AMD64Address(x, xstart, Stride.S4, 0));
189190
masm.rorq(xAtXstart, 32); // convert big-endian to little-endian
190191

191192
masm.bind(labelFirstLoop);
192-
masm.declAndJcc(idx, ConditionFlag.Negative, labelFirstLoopExit, false);
193-
masm.declAndJcc(idx, ConditionFlag.Negative, labelOneY, false);
193+
masm.decl(idx);
194+
masm.jcc(ConditionFlag.Negative, labelFirstLoopExit);
195+
masm.decl(idx);
196+
masm.jcc(ConditionFlag.Negative, labelOneY);
194197
masm.movq(yAtIdx, new AMD64Address(y, idx, Stride.S4, 0));
195198
masm.rorq(yAtIdx, 32); // convert big-endian to little-endian
196199
masm.bind(labelMultiply);
@@ -287,7 +290,8 @@ private static void multiply128x128Loop(AMD64MacroAssembler masm,
287290
masm.shrl(jdx, 2);
288291

289292
masm.bind(labelThirdLoop);
290-
masm.sublAndJcc(jdx, 1, ConditionFlag.Negative, labelThirdLoopExit, false);
293+
masm.decl(jdx);
294+
masm.jcc(ConditionFlag.Negative, labelThirdLoopExit);
291295
masm.subl(idx, 4);
292296

293297
multiplyAdd128x128(masm, xAtXstart, y, z, yzAtIdx, idx, carry, product, 8);
@@ -301,15 +305,17 @@ private static void multiply128x128Loop(AMD64MacroAssembler masm,
301305

302306
masm.andlAndJcc(idx, 0x3, ConditionFlag.Zero, labelPostThirdLoopDone, false);
303307

304-
masm.sublAndJcc(idx, 2, ConditionFlag.Negative, labelCheck1, false);
308+
masm.subl(idx, 2);
309+
masm.jcc(ConditionFlag.Negative, labelCheck1);
305310

306311
multiplyAdd128x128(masm, xAtXstart, y, z, yzAtIdx, idx, carry, product, 0);
307312
masm.movq(carry, rdx);
308313

309314
masm.bind(labelCheck1);
310315
masm.addl(idx, 0x2);
311316
masm.andl(idx, 0x1);
312-
masm.sublAndJcc(idx, 1, ConditionFlag.Negative, labelPostThirdLoopDone, false);
317+
masm.decl(idx);
318+
masm.jcc(ConditionFlag.Negative, labelPostThirdLoopDone);
313319

314320
masm.movl(yzAtIdx, new AMD64Address(y, idx, Stride.S4, 0));
315321
masm.movq(product, xAtXstart);
@@ -374,7 +380,8 @@ private static void multiply128x128BMI2Loop(AMD64MacroAssembler masm,
374380
masm.shrl(jdx, 2);
375381

376382
masm.bind(labelThirdLoop);
377-
masm.sublAndJcc(jdx, 1, ConditionFlag.Negative, labelThirdLoopExit, false);
383+
masm.decl(jdx);
384+
masm.jcc(ConditionFlag.Negative, labelThirdLoopExit);
378385
masm.subl(idx, 4);
379386

380387
masm.movq(yzAtIdx1, new AMD64Address(y, idx, Stride.S4, 8));
@@ -420,7 +427,8 @@ private static void multiply128x128BMI2Loop(AMD64MacroAssembler masm,
420427

421428
masm.andlAndJcc(idx, 0x3, ConditionFlag.Zero, labelPostThirdLoopDone, false);
422429

423-
masm.sublAndJcc(idx, 2, ConditionFlag.Negative, labelCheck1, false);
430+
masm.subl(idx, 2);
431+
masm.jcc(ConditionFlag.Negative, labelCheck1);
424432

425433
masm.movq(yzAtIdx1, new AMD64Address(y, idx, Stride.S4, 0));
426434
masm.rorxq(yzAtIdx1, yzAtIdx1, 32);
@@ -438,7 +446,8 @@ private static void multiply128x128BMI2Loop(AMD64MacroAssembler masm,
438446
masm.bind(labelCheck1);
439447
masm.addl(idx, 0x2);
440448
masm.andl(idx, 0x1);
441-
masm.sublAndJcc(idx, 1, ConditionFlag.Negative, labelPostThirdLoopDone, false);
449+
masm.decl(idx);
450+
masm.jcc(ConditionFlag.Negative, labelPostThirdLoopDone);
442451
masm.movl(tmp4, new AMD64Address(y, idx, Stride.S4, 0));
443452
masm.mulxq(carry2, tmp3, tmp4); // tmp4 * rdx -> carry2:tmp3
444453
masm.movl(tmp4, new AMD64Address(z, idx, Stride.S4, 0));
@@ -504,13 +513,14 @@ private static void multiplyToLen(AMD64MacroAssembler masm,
504513
masm.xorq(carry, carry); // carry = 0;
505514

506515
masm.movl(xstart, xlen);
507-
masm.declAndJcc(xstart, ConditionFlag.Negative, labelDone, false);
516+
masm.decl(xstart);
517+
masm.jcc(ConditionFlag.Negative, labelDone);
508518

509519
multiply64x64Loop(masm, x, xstart, xAtXstart, y, yAtIdx, z, carry, product, idx, kdx);
510520

511521
masm.testlAndJcc(kdx, kdx, ConditionFlag.Zero, labelSecondLoop, false);
512522

513-
masm.sublAndJcc(kdx, 1, ConditionFlag.Zero, labelCarry, false);
523+
masm.declAndJcc(kdx, ConditionFlag.Zero, labelCarry, false);
514524

515525
masm.movl(new AMD64Address(z, kdx, Stride.S4, 0), carry);
516526
masm.shrq(carry, 32);
@@ -542,14 +552,16 @@ private static void multiplyToLen(AMD64MacroAssembler masm,
542552
masm.xorq(carry, carry); // carry = 0;
543553
masm.movl(jdx, ylen); // j = ystart+1
544554
// i = xstart-1;
545-
masm.sublAndJcc(xstart, 1, ConditionFlag.Negative, labelDone, false);
555+
masm.decl(xstart);
556+
masm.jcc(ConditionFlag.Negative, labelDone);
546557

547558
masm.push(z);
548559

549560
// z = z + k - j
550561
masm.leaq(z, new AMD64Address(z, xstart, Stride.S4, 4));
551562
// i = xstart-1;
552-
masm.sublAndJcc(xstart, 1, ConditionFlag.Negative, labelLastX, false);
563+
masm.decl(xstart);
564+
masm.jcc(ConditionFlag.Negative, labelLastX);
553565

554566
if (useBMI2Instructions) {
555567
masm.movq(rdx, new AMD64Address(x, xstart, Stride.S4, 0));
@@ -579,7 +591,8 @@ private static void multiplyToLen(AMD64MacroAssembler masm,
579591
masm.movl(tmp3, xlen);
580592
masm.addl(tmp3, 1);
581593
masm.movl(new AMD64Address(z, tmp3, Stride.S4, 0), carry);
582-
masm.sublAndJcc(tmp3, 1, ConditionFlag.Negative, labelDone, false);
594+
masm.decl(tmp3);
595+
masm.jcc(ConditionFlag.Negative, labelDone);
583596

584597
masm.shrq(carry, 32);
585598
masm.movl(new AMD64Address(z, tmp3, Stride.S4, 0), carry);

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64BigIntegerSquareToLenOp.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,8 @@ static void addOne64(AMD64MacroAssembler masm, Register z, Register zlen, Regist
200200

201201
masm.bind(lFourthLoop);
202202
masm.jccb(ConditionFlag.CarryClear, lFourthLoopExit);
203-
masm.sublAndJcc(zlen, 2, ConditionFlag.Negative, lFourthLoopExit, true);
203+
masm.subl(zlen, 2);
204+
masm.jccb(ConditionFlag.Negative, lFourthLoopExit);
204205
masm.addq(new AMD64Address(z, zlen, Stride.S4, 0), tmp1);
205206
masm.jmp(lFourthLoop);
206207
masm.bind(lFourthLoopExit);
@@ -235,7 +236,8 @@ static void lshiftBy1(AMD64MacroAssembler masm, Register z, Register zlen,
235236

236237
masm.bind(lFifthLoop);
237238
masm.decl(zidx); // Use decl to preserve carry flag
238-
masm.declAndJcc(zidx, ConditionFlag.Negative, lFifthLoopExit, true);
239+
masm.decl(zidx);
240+
masm.jccb(ConditionFlag.Negative, lFifthLoopExit);
239241

240242
if (useBMI2Instructions(masm)) {
241243
masm.movq(value, new AMD64Address(z, zidx, Stride.S4, 0));
@@ -324,8 +326,10 @@ private static void squareToLen(AMD64MacroAssembler masm, Register x, Register l
324326
}
325327

326328
masm.bind(lThirdLoop);
327-
masm.declAndJcc(len, ConditionFlag.Negative, lThirdLoopExit, true);
328-
masm.declAndJcc(len, ConditionFlag.Negative, lLastX, true);
329+
masm.decl(len);
330+
masm.jccb(ConditionFlag.Negative, lThirdLoopExit);
331+
masm.decl(len);
332+
masm.jccb(ConditionFlag.Negative, lLastX);
329333

330334
masm.movq(op1, new AMD64Address(x, len, Stride.S4, 0));
331335
masm.rorq(op1, 32);

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64CipherBlockChainingAESDecryptOp.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -228,8 +228,8 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
228228
}
229229
masm.align(preferredLoopAlignment(crb));
230230
masm.bind(labelMultiBlockLoopTop[k]);
231-
masm.cmpq(lenReg, PARALLEL_FACTOR * AES_BLOCK_SIZE); // see if at least 4 blocks left
232-
masm.jcc(ConditionFlag.Less, labelSingleBlockLoopTopHead[k]);
231+
// see if at least 4 blocks left
232+
masm.cmpqAndJcc(lenReg, PARALLEL_FACTOR * AES_BLOCK_SIZE, ConditionFlag.Less, labelSingleBlockLoopTopHead[k], false);
233233

234234
if (k != 0) {
235235
masm.movdqu(xmm15, new AMD64Address(rsp, 2 * wordSize));
@@ -312,8 +312,8 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
312312
} else if (k == 2) {
313313
masm.addq(rsp, 10 * wordSize);
314314
}
315-
masm.cmpq(lenReg, 0); // any blocks left??
316-
masm.jcc(ConditionFlag.Equal, labelExit);
315+
// any blocks left??
316+
masm.cmpqAndJcc(lenReg, 0, ConditionFlag.Equal, labelExit, false);
317317
masm.bind(labelSingleBlockLoopTopHead2[k]);
318318
if (k == 1) {
319319
loadKey(masm, xmmKey11, key, 0xb0, crb); // 0xb0;
@@ -355,8 +355,7 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
355355
// set up next r vector with cipher input from this block
356356
masm.movdqa(xmmPrevBlockCipher, xmmPrevBlockCipherSave);
357357
masm.addq(pos, AES_BLOCK_SIZE);
358-
masm.subq(lenReg, AES_BLOCK_SIZE);
359-
masm.jcc(ConditionFlag.NotEqual, labelSingleBlockLoopTop[k]);
358+
masm.subqAndJcc(lenReg, AES_BLOCK_SIZE, ConditionFlag.NotEqual, labelSingleBlockLoopTop[k], false);
360359
if (k != 2) {
361360
masm.jmp(labelExit);
362361
}

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64CodepointIndexToByteIndexOp.java

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,8 @@ private void emitOp(CompilationResultBuilder crb, AMD64MacroAssembler asm, Regis
346346
countCodepoints(asm, ret, vecArray, vecMask1, vecMask2, mask, tmp, vectorSize);
347347
// subtract the number of code points from target index.
348348
// if the result is negative, the target index must be in the current vector
349-
asm.sublAndJcc(idx, tmp, Negative, tailMask, true);
349+
asm.subl(idx, tmp);
350+
asm.jccb(Negative, tailMask);
350351
// otherwise, continue the loop
351352
asm.sublAndJcc(len, vectorLength, NotZero, loop, true);
352353
asm.jmp(loopTail);
@@ -422,7 +423,8 @@ private void emitOp(CompilationResultBuilder crb, AMD64MacroAssembler asm, Regis
422423
// process the last vector the same way the vector loop would
423424
countCodepoints(asm, ret, vecArray, vecMask1, vecMask2, mask, tmp, vectorSize);
424425
// subtract the number of code points from the target index
425-
asm.sublAndJcc(idx, tmp, Positive, outOfBounds, false);
426+
asm.subl(idx, tmp);
427+
asm.jcc(Positive, outOfBounds);
426428
asm.jmp(tailMask);
427429

428430
if (supportsAVX2AndYMM()) {
@@ -445,7 +447,8 @@ private void emitOp(CompilationResultBuilder crb, AMD64MacroAssembler asm, Regis
445447
loadLessThan16IntoXMMOrdered(crb, asm, stride, arr, lengthTail, tmp, vecArray, vecTmp1, vecTmp2);
446448
countCodepoints(asm, ret, vecArray, vecMask1, vecMask2, mask, tmp, XMM);
447449
// subtract the number of code points from the target index
448-
asm.sublAndJcc(idx, tmp, Negative, tailMask16, false);
450+
asm.subl(idx, tmp);
451+
asm.jcc(Negative, tailMask16);
449452
asm.jmpb(outOfBounds);
450453

451454
// scalar loop
@@ -464,7 +467,8 @@ private void emitOp(CompilationResultBuilder crb, AMD64MacroAssembler asm, Regis
464467
asm.cmplAndJcc(tmp, 0x80, Equal, tailScalarLoopSkip, true);
465468
// not a continuation byte -> decrease idx.
466469
// if idx becomes negative, we found the target codepoint
467-
asm.declAndJcc(idx, Negative, end, true);
470+
asm.decl(idx);
471+
asm.jccb(Negative, end);
468472
asm.bind(tailScalarLoopSkip);
469473
asm.incl(ret);
470474
asm.declAndJcc(lengthTail, NotZero, tailScalarLoopHead, true);
@@ -477,7 +481,8 @@ private void emitOp(CompilationResultBuilder crb, AMD64MacroAssembler asm, Regis
477481
asm.cmplAndJcc(tmp, 0x37, Equal, tailScalarLoopSkip, true);
478482
// not low surrogate -> decrease idx.
479483
// if idx becomes negative, we found the target codepoint
480-
asm.sublAndJcc(idx, 2, Negative, end, true);
484+
asm.subl(idx, 2);
485+
asm.jccb(Negative, end);
481486
asm.bind(tailScalarLoopSkip);
482487
asm.addl(ret, 2);
483488
asm.sublAndJcc(lengthTail, 2, NotZero, tailScalarLoopHead, true);
@@ -557,7 +562,8 @@ private void emitMaskTailBinarySearch(CompilationResultBuilder crb, AMD64MacroAs
557562
asm.popcntl(tmp, mask);
558563
// add current negative index.
559564
// if the result is still negative, target index is in the lower half.
560-
asm.addlAndJcc(tmp, idx, Positive, nextTail, true);
565+
asm.addl(tmp, idx);
566+
asm.jccb(Positive, nextTail);
561567
// get lower half of bitmask
562568
asm.andl(maskCopy, ~0 >>> (32 - (bits / 2)));
563569
// adjust result for lower half

0 commit comments

Comments
 (0)