oracle
diff --git a/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java
Lines changed: 8 additions & 0 deletions b/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java
Lines changed: 8 additions & 0 deletions
diff --git a/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64MacroAssembler.java
Lines changed: 85 additions & 81 deletions b/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64MacroAssembler.java
Lines changed: 85 additions & 81 deletions
diff --git a/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64ArrayRegionCompareToOp.java
Lines changed: 6 additions & 3 deletions b/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64ArrayRegionCompareToOp.java
Lines changed: 6 additions & 3 deletions
diff --git a/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64BigIntegerMulAddOp.java
Lines changed: 6 additions & 3 deletions b/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64BigIntegerMulAddOp.java
Lines changed: 6 additions & 3 deletions
diff --git a/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64BigIntegerMultiplyToLenOp.java
Lines changed: 27 additions & 14 deletions b/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64BigIntegerMultiplyToLenOp.java
Lines changed: 27 additions & 14 deletions
diff --git a/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64BigIntegerSquareToLenOp.java
Lines changed: 8 additions & 4 deletions b/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64BigIntegerSquareToLenOp.java
Lines changed: 8 additions & 4 deletions
diff --git a/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64CipherBlockChainingAESDecryptOp.java
Lines changed: 5 additions & 6 deletions b/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64CipherBlockChainingAESDecryptOp.java
Lines changed: 5 additions & 6 deletions
diff --git a/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64CodepointIndexToByteIndexOp.java
Lines changed: 12 additions & 6 deletions b/‎compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64CodepointIndexToByteIndexOp.java
Lines changed: 12 additions & 6 deletions
@@ -6080,6 +6080,10 @@ public final void xorl(Register dst, Register src) {
         AMD64BinaryArithmetic.XOR.rmOp.emit(this, OperandSize.DWORD, dst, src);
     }
 
+    public final void xorl(Register dst, AMD64Address src) {
+        AMD64BinaryArithmetic.XOR.rmOp.emit(this, OperandSize.DWORD, dst, src);
+    }
+
     public final void xorl(Register dst, int imm32) {
         AMD64BinaryArithmetic.XOR.getMIOpcode(OperandSize.DWORD, isByte(imm32)).emit(this, OperandSize.DWORD, dst, imm32);
     }
@@ -6112,6 +6116,10 @@ public final void xorq(Register dst, Register src) {
         AMD64BinaryArithmetic.XOR.rmOp.emit(this, OperandSize.QWORD, dst, src);
     }
 
+    public final void xorq(Register dst, AMD64Address src) {
+        AMD64BinaryArithmetic.XOR.rmOp.emit(this, OperandSize.QWORD, dst, src);
+    }
+
     // Mask register related instructions
 
     public final void kmovb(Register dst, Register src) {
 
@@ -275,7 +275,8 @@ private void emitVectorLoop(CompilationResultBuilder crb, AMD64MacroAssembler ma
         // convert result to bitmask
         masm.pmovmsk(vectorSize, tmp1, vector1);
         // invert bit mask. if the result is non-zero, compared regions are not equal
-        masm.xorlAndJcc(tmp1, vectorSize == XMM ? ONES_16 : ONES_32, ConditionFlag.NotZero, diffFound, true);
+        masm.xorl(tmp1, vectorSize == XMM ? ONES_16 : ONES_32);
+        masm.jccb(ConditionFlag.NotZero, diffFound);
         // regions are equal, continue the loop
         masm.addqAndJcc(length, elementsPerVector, ConditionFlag.NotZero, loop, true);
 
@@ -289,7 +290,8 @@ private void emitVectorLoop(CompilationResultBuilder crb, AMD64MacroAssembler ma
         masm.leaq(length, new AMD64Address(length, result, Stride.S1, -elementsPerVector));
         masm.pcmpeq(vectorSize, maxStride, vector1, vector2);
         masm.pmovmsk(vectorSize, tmp1, vector1);
-        masm.xorlAndJcc(tmp1, vectorSize == XMM ? ONES_16 : ONES_32, ConditionFlag.NotZero, diffFound, true);
+        masm.xorl(tmp1, vectorSize == XMM ? ONES_16 : ONES_32);
+        masm.jccb(ConditionFlag.NotZero, diffFound);
         // all elements are equal, return 0
         masm.xorq(result, result);
         masm.jmp(returnLabel);
@@ -360,7 +362,8 @@ private void emitVectorizedTail(AMD64MacroAssembler masm, Stride strideA, Stride
         }
         masm.pcmpeq(cmpSize, maxStride, vector1, vector2);
         masm.pmovmsk(cmpSize, result, vector1);
-        masm.xorlAndJcc(result, cmpSize == XMM ? ONES_16 : ONES_32, ConditionFlag.Zero, returnLabel, false);
+        masm.xorl(result, cmpSize == XMM ? ONES_16 : ONES_32);
+        masm.jcc(ConditionFlag.Zero, returnLabel);
 
         bsfq(masm, tmp2, result);
         if (maxStride.value > 1) {
 
@@ -183,7 +183,8 @@ static void mulAdd128X32Loop(AMD64MacroAssembler masm, Register out, Register in
         masm.shrl(tmp1, 2);
 
         masm.bind(lFirstLoop);
-        masm.sublAndJcc(tmp1, 1, ConditionFlag.Negative, lFirstLoopExit, true);
+        masm.decl(tmp1);
+        masm.jccb(ConditionFlag.Negative, lFirstLoopExit);
 
         masm.subl(len, 4);
         masm.subl(offset, 4);
@@ -255,8 +256,10 @@ static void mulAdd(AMD64MacroAssembler masm, Register out, Register in, Register
         mulAdd128X32Loop(masm, out, in, offs, len, tmp1, tmp2, tmp3, tmp4, tmp5, rdxReg, raxReg);
 
         // Multiply the trailing in[] entry using 64 bit by 32 bit, if any
-        masm.declAndJcc(len, ConditionFlag.Negative, lCarry, true);
-        masm.declAndJcc(len, ConditionFlag.Negative, lLastIn, true);
+        masm.decl(len);
+        masm.jccb(ConditionFlag.Negative, lCarry);
+        masm.decl(len);
+        masm.jccb(ConditionFlag.Negative, lLastIn);
 
         masm.movq(op1, new AMD64Address(in, len, Stride.S4, 0));
         masm.rorq(op1, 32);
 
@@ -183,14 +183,17 @@ private static void multiply64x64Loop(AMD64MacroAssembler masm,
         Label labelOneY = new Label();
         Label labelMultiply = new Label();
 
-        masm.declAndJcc(xstart, ConditionFlag.Negative, labelOneX, false);
+        masm.decl(xstart);
+        masm.jcc(ConditionFlag.Negative, labelOneX);
 
         masm.movq(xAtXstart, new AMD64Address(x, xstart, Stride.S4, 0));
         masm.rorq(xAtXstart, 32); // convert big-endian to little-endian
 
         masm.bind(labelFirstLoop);
-        masm.declAndJcc(idx, ConditionFlag.Negative, labelFirstLoopExit, false);
-        masm.declAndJcc(idx, ConditionFlag.Negative, labelOneY, false);
+        masm.decl(idx);
+        masm.jcc(ConditionFlag.Negative, labelFirstLoopExit);
+        masm.decl(idx);
+        masm.jcc(ConditionFlag.Negative, labelOneY);
         masm.movq(yAtIdx, new AMD64Address(y, idx, Stride.S4, 0));
         masm.rorq(yAtIdx, 32); // convert big-endian to little-endian
         masm.bind(labelMultiply);
@@ -287,7 +290,8 @@ private static void multiply128x128Loop(AMD64MacroAssembler masm,
         masm.shrl(jdx, 2);
 
         masm.bind(labelThirdLoop);
-        masm.sublAndJcc(jdx, 1, ConditionFlag.Negative, labelThirdLoopExit, false);
+        masm.decl(jdx);
+        masm.jcc(ConditionFlag.Negative, labelThirdLoopExit);
         masm.subl(idx, 4);
 
         multiplyAdd128x128(masm, xAtXstart, y, z, yzAtIdx, idx, carry, product, 8);
@@ -301,15 +305,17 @@ private static void multiply128x128Loop(AMD64MacroAssembler masm,
 
         masm.andlAndJcc(idx, 0x3, ConditionFlag.Zero, labelPostThirdLoopDone, false);
 
-        masm.sublAndJcc(idx, 2, ConditionFlag.Negative, labelCheck1, false);
+        masm.subl(idx, 2);
+        masm.jcc(ConditionFlag.Negative, labelCheck1);
 
         multiplyAdd128x128(masm, xAtXstart, y, z, yzAtIdx, idx, carry, product, 0);
         masm.movq(carry, rdx);
 
         masm.bind(labelCheck1);
         masm.addl(idx, 0x2);
         masm.andl(idx, 0x1);
-        masm.sublAndJcc(idx, 1, ConditionFlag.Negative, labelPostThirdLoopDone, false);
+        masm.decl(idx);
+        masm.jcc(ConditionFlag.Negative, labelPostThirdLoopDone);
 
         masm.movl(yzAtIdx, new AMD64Address(y, idx, Stride.S4, 0));
         masm.movq(product, xAtXstart);
@@ -374,7 +380,8 @@ private static void multiply128x128BMI2Loop(AMD64MacroAssembler masm,
         masm.shrl(jdx, 2);
 
         masm.bind(labelThirdLoop);
-        masm.sublAndJcc(jdx, 1, ConditionFlag.Negative, labelThirdLoopExit, false);
+        masm.decl(jdx);
+        masm.jcc(ConditionFlag.Negative, labelThirdLoopExit);
         masm.subl(idx, 4);
 
         masm.movq(yzAtIdx1, new AMD64Address(y, idx, Stride.S4, 8));
@@ -420,7 +427,8 @@ private static void multiply128x128BMI2Loop(AMD64MacroAssembler masm,
 
         masm.andlAndJcc(idx, 0x3, ConditionFlag.Zero, labelPostThirdLoopDone, false);
 
-        masm.sublAndJcc(idx, 2, ConditionFlag.Negative, labelCheck1, false);
+        masm.subl(idx, 2);
+        masm.jcc(ConditionFlag.Negative, labelCheck1);
 
         masm.movq(yzAtIdx1, new AMD64Address(y, idx, Stride.S4, 0));
         masm.rorxq(yzAtIdx1, yzAtIdx1, 32);
@@ -438,7 +446,8 @@ private static void multiply128x128BMI2Loop(AMD64MacroAssembler masm,
         masm.bind(labelCheck1);
         masm.addl(idx, 0x2);
         masm.andl(idx, 0x1);
-        masm.sublAndJcc(idx, 1, ConditionFlag.Negative, labelPostThirdLoopDone, false);
+        masm.decl(idx);
+        masm.jcc(ConditionFlag.Negative, labelPostThirdLoopDone);
         masm.movl(tmp4, new AMD64Address(y, idx, Stride.S4, 0));
         masm.mulxq(carry2, tmp3, tmp4);  // tmp4 * rdx -> carry2:tmp3
         masm.movl(tmp4, new AMD64Address(z, idx, Stride.S4, 0));
@@ -504,13 +513,14 @@ private static void multiplyToLen(AMD64MacroAssembler masm,
         masm.xorq(carry, carry);   // carry = 0;
 
         masm.movl(xstart, xlen);
-        masm.declAndJcc(xstart, ConditionFlag.Negative, labelDone, false);
+        masm.decl(xstart);
+        masm.jcc(ConditionFlag.Negative, labelDone);
 
         multiply64x64Loop(masm, x, xstart, xAtXstart, y, yAtIdx, z, carry, product, idx, kdx);
 
         masm.testlAndJcc(kdx, kdx, ConditionFlag.Zero, labelSecondLoop, false);
 
-        masm.sublAndJcc(kdx, 1, ConditionFlag.Zero, labelCarry, false);
+        masm.declAndJcc(kdx, ConditionFlag.Zero, labelCarry, false);
 
         masm.movl(new AMD64Address(z, kdx, Stride.S4, 0), carry);
         masm.shrq(carry, 32);
@@ -542,14 +552,16 @@ private static void multiplyToLen(AMD64MacroAssembler masm,
         masm.xorq(carry, carry);    // carry = 0;
         masm.movl(jdx, ylen);       // j = ystart+1
         // i = xstart-1;
-        masm.sublAndJcc(xstart, 1, ConditionFlag.Negative, labelDone, false);
+        masm.decl(xstart);
+        masm.jcc(ConditionFlag.Negative, labelDone);
 
         masm.push(z);
 
         // z = z + k - j
         masm.leaq(z, new AMD64Address(z, xstart, Stride.S4, 4));
         // i = xstart-1;
-        masm.sublAndJcc(xstart, 1, ConditionFlag.Negative, labelLastX, false);
+        masm.decl(xstart);
+        masm.jcc(ConditionFlag.Negative, labelLastX);
 
         if (useBMI2Instructions) {
             masm.movq(rdx, new AMD64Address(x, xstart, Stride.S4, 0));
@@ -579,7 +591,8 @@ private static void multiplyToLen(AMD64MacroAssembler masm,
         masm.movl(tmp3, xlen);
         masm.addl(tmp3, 1);
         masm.movl(new AMD64Address(z, tmp3, Stride.S4, 0), carry);
-        masm.sublAndJcc(tmp3, 1, ConditionFlag.Negative, labelDone, false);
+        masm.decl(tmp3);
+        masm.jcc(ConditionFlag.Negative, labelDone);
 
         masm.shrq(carry, 32);
         masm.movl(new AMD64Address(z, tmp3, Stride.S4, 0), carry);
 
@@ -200,7 +200,8 @@ static void addOne64(AMD64MacroAssembler masm, Register z, Register zlen, Regist
 
         masm.bind(lFourthLoop);
         masm.jccb(ConditionFlag.CarryClear, lFourthLoopExit);
-        masm.sublAndJcc(zlen, 2, ConditionFlag.Negative, lFourthLoopExit, true);
+        masm.subl(zlen, 2);
+        masm.jccb(ConditionFlag.Negative, lFourthLoopExit);
         masm.addq(new AMD64Address(z, zlen, Stride.S4, 0), tmp1);
         masm.jmp(lFourthLoop);
         masm.bind(lFourthLoopExit);
@@ -235,7 +236,8 @@ static void lshiftBy1(AMD64MacroAssembler masm, Register z, Register zlen,
 
         masm.bind(lFifthLoop);
         masm.decl(zidx);  // Use decl to preserve carry flag
-        masm.declAndJcc(zidx, ConditionFlag.Negative, lFifthLoopExit, true);
+        masm.decl(zidx);
+        masm.jccb(ConditionFlag.Negative, lFifthLoopExit);
 
         if (useBMI2Instructions(masm)) {
             masm.movq(value, new AMD64Address(z, zidx, Stride.S4, 0));
@@ -324,8 +326,10 @@ private static void squareToLen(AMD64MacroAssembler masm, Register x, Register l
         }
 
         masm.bind(lThirdLoop);
-        masm.declAndJcc(len, ConditionFlag.Negative, lThirdLoopExit, true);
-        masm.declAndJcc(len, ConditionFlag.Negative, lLastX, true);
+        masm.decl(len);
+        masm.jccb(ConditionFlag.Negative, lThirdLoopExit);
+        masm.decl(len);
+        masm.jccb(ConditionFlag.Negative, lLastX);
 
         masm.movq(op1, new AMD64Address(x, len, Stride.S4, 0));
         masm.rorq(op1, 32);
 
@@ -228,8 +228,8 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
             }
             masm.align(preferredLoopAlignment(crb));
             masm.bind(labelMultiBlockLoopTop[k]);
-            masm.cmpq(lenReg, PARALLEL_FACTOR * AES_BLOCK_SIZE); // see if at least 4 blocks left
-            masm.jcc(ConditionFlag.Less, labelSingleBlockLoopTopHead[k]);
+            // see if at least 4 blocks left
+            masm.cmpqAndJcc(lenReg, PARALLEL_FACTOR * AES_BLOCK_SIZE, ConditionFlag.Less, labelSingleBlockLoopTopHead[k], false);
 
             if (k != 0) {
                 masm.movdqu(xmm15, new AMD64Address(rsp, 2 * wordSize));
@@ -312,8 +312,8 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
             } else if (k == 2) {
                 masm.addq(rsp, 10 * wordSize);
             }
-            masm.cmpq(lenReg, 0); // any blocks left??
-            masm.jcc(ConditionFlag.Equal, labelExit);
+            // any blocks left??
+            masm.cmpqAndJcc(lenReg, 0, ConditionFlag.Equal, labelExit, false);
             masm.bind(labelSingleBlockLoopTopHead2[k]);
             if (k == 1) {
                 loadKey(masm, xmmKey11, key, 0xb0, crb); // 0xb0;
@@ -355,8 +355,7 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
             // set up next r vector with cipher input from this block
             masm.movdqa(xmmPrevBlockCipher, xmmPrevBlockCipherSave);
             masm.addq(pos, AES_BLOCK_SIZE);
-            masm.subq(lenReg, AES_BLOCK_SIZE);
-            masm.jcc(ConditionFlag.NotEqual, labelSingleBlockLoopTop[k]);
+            masm.subqAndJcc(lenReg, AES_BLOCK_SIZE, ConditionFlag.NotEqual, labelSingleBlockLoopTop[k], false);
             if (k != 2) {
                 masm.jmp(labelExit);
             }
 
@@ -346,7 +346,8 @@ private void emitOp(CompilationResultBuilder crb, AMD64MacroAssembler asm, Regis
         countCodepoints(asm, ret, vecArray, vecMask1, vecMask2, mask, tmp, vectorSize);
         // subtract the number of code points from target index.
         // if the result is negative, the target index must be in the current vector
-        asm.sublAndJcc(idx, tmp, Negative, tailMask, true);
+        asm.subl(idx, tmp);
+        asm.jccb(Negative, tailMask);
         // otherwise, continue the loop
         asm.sublAndJcc(len, vectorLength, NotZero, loop, true);
         asm.jmp(loopTail);
@@ -422,7 +423,8 @@ private void emitOp(CompilationResultBuilder crb, AMD64MacroAssembler asm, Regis
         // process the last vector the same way the vector loop would
         countCodepoints(asm, ret, vecArray, vecMask1, vecMask2, mask, tmp, vectorSize);
         // subtract the number of code points from the target index
-        asm.sublAndJcc(idx, tmp, Positive, outOfBounds, false);
+        asm.subl(idx, tmp);
+        asm.jcc(Positive, outOfBounds);
         asm.jmp(tailMask);
 
         if (supportsAVX2AndYMM()) {
@@ -445,7 +447,8 @@ private void emitOp(CompilationResultBuilder crb, AMD64MacroAssembler asm, Regis
         loadLessThan16IntoXMMOrdered(crb, asm, stride, arr, lengthTail, tmp, vecArray, vecTmp1, vecTmp2);
         countCodepoints(asm, ret, vecArray, vecMask1, vecMask2, mask, tmp, XMM);
         // subtract the number of code points from the target index
-        asm.sublAndJcc(idx, tmp, Negative, tailMask16, false);
+        asm.subl(idx, tmp);
+        asm.jcc(Negative, tailMask16);
         asm.jmpb(outOfBounds);
 
         // scalar loop
@@ -464,7 +467,8 @@ private void emitOp(CompilationResultBuilder crb, AMD64MacroAssembler asm, Regis
                 asm.cmplAndJcc(tmp, 0x80, Equal, tailScalarLoopSkip, true);
                 // not a continuation byte -> decrease idx.
                 // if idx becomes negative, we found the target codepoint
-                asm.declAndJcc(idx, Negative, end, true);
+                asm.decl(idx);
+                asm.jccb(Negative, end);
                 asm.bind(tailScalarLoopSkip);
                 asm.incl(ret);
                 asm.declAndJcc(lengthTail, NotZero, tailScalarLoopHead, true);
@@ -477,7 +481,8 @@ private void emitOp(CompilationResultBuilder crb, AMD64MacroAssembler asm, Regis
                 asm.cmplAndJcc(tmp, 0x37, Equal, tailScalarLoopSkip, true);
                 // not low surrogate -> decrease idx.
                 // if idx becomes negative, we found the target codepoint
-                asm.sublAndJcc(idx, 2, Negative, end, true);
+                asm.subl(idx, 2);
+                asm.jccb(Negative, end);
                 asm.bind(tailScalarLoopSkip);
                 asm.addl(ret, 2);
                 asm.sublAndJcc(lengthTail, 2, NotZero, tailScalarLoopHead, true);
@@ -557,7 +562,8 @@ private void emitMaskTailBinarySearch(CompilationResultBuilder crb, AMD64MacroAs
         asm.popcntl(tmp, mask);
         // add current negative index.
         // if the result is still negative, target index is in the lower half.
-        asm.addlAndJcc(tmp, idx, Positive, nextTail, true);
+        asm.addl(tmp, idx);
+        asm.jccb(Positive, nextTail);
         // get lower half of bitmask
         asm.andl(maskCopy, ~0 >>> (32 - (bits / 2)));
         // adjust result for lower half
Original file line number	Diff line number	Diff line change
`@@ -6080,6 +6080,10 @@ public final void xorl(Register dst, Register src) {`
`6080`	`6080`	`AMD64BinaryArithmetic.XOR.rmOp.emit(this, OperandSize.DWORD, dst, src);`
`6081`	`6081`	`}`
`6082`	`6082`
	`6083`	`+ public final void xorl(Register dst, AMD64Address src) {`
	`6084`	`+ AMD64BinaryArithmetic.XOR.rmOp.emit(this, OperandSize.DWORD, dst, src);`
	`6085`	`+ }`
	`6086`	`+`
`6083`	`6087`	`public final void xorl(Register dst, int imm32) {`
`6084`	`6088`	`AMD64BinaryArithmetic.XOR.getMIOpcode(OperandSize.DWORD, isByte(imm32)).emit(this, OperandSize.DWORD, dst, imm32);`
`6085`	`6089`	`}`
`@@ -6112,6 +6116,10 @@ public final void xorq(Register dst, Register src) {`
`6112`	`6116`	`AMD64BinaryArithmetic.XOR.rmOp.emit(this, OperandSize.QWORD, dst, src);`
`6113`	`6117`	`}`
`6114`	`6118`
	`6119`	`+ public final void xorq(Register dst, AMD64Address src) {`
	`6120`	`+ AMD64BinaryArithmetic.XOR.rmOp.emit(this, OperandSize.QWORD, dst, src);`
	`6121`	`+ }`
	`6122`	`+`
`6115`	`6123`	`// Mask register related instructions`
`6116`	`6124`
`6117`	`6125`	`public final void kmovb(Register dst, Register src) {`