From 5f4487d8b53e1d80d9c0733c59e6423c5a81dbad Mon Sep 17 00:00:00 2001 From: Quan Anh Mai Date: Sat, 17 May 2025 21:49:36 +0700 Subject: [PATCH 1/5] misplaced CastLL --- src/hotspot/share/opto/macroArrayCopy.cpp | 6 +++--- test/hotspot/jtreg/ProblemList.txt | 2 -- .../jtreg/compiler/arraycopy/TestArrayCopyConjoint.java | 7 +++++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/hotspot/share/opto/macroArrayCopy.cpp b/src/hotspot/share/opto/macroArrayCopy.cpp index e2209fddbdfa0..b469f1de75fb1 100644 --- a/src/hotspot/share/opto/macroArrayCopy.cpp +++ b/src/hotspot/share/opto/macroArrayCopy.cpp @@ -230,9 +230,6 @@ void PhaseMacroExpand::generate_partial_inlining_block(Node** ctrl, MergeMemNode return; } - int inline_limit = ArrayOperationPartialInlineSize / type2aelembytes(type); - Node* casted_length = new CastLLNode(*ctrl, length, TypeLong::make(0, inline_limit, Type::WidenMin)); - transform_later(casted_length); Node* copy_bytes = new LShiftXNode(length, intcon(shift)); transform_later(copy_bytes); @@ -243,6 +240,9 @@ void PhaseMacroExpand::generate_partial_inlining_block(Node** ctrl, MergeMemNode inline_block = generate_guard(ctrl, bol_le, nullptr, PROB_FAIR); stub_block = *ctrl; + int inline_limit = ArrayOperationPartialInlineSize / type2aelembytes(type); + Node* casted_length = new CastLLNode(inline_block, length, TypeLong::make(0, inline_limit, Type::WidenMin), ConstraintCastNode::StrongDependency); + transform_later(casted_length); Node* mask_gen = VectorMaskGenNode::make(casted_length, type); transform_later(mask_gen); diff --git a/test/hotspot/jtreg/ProblemList.txt b/test/hotspot/jtreg/ProblemList.txt index deab959c94efd..c1a5f8e980426 100644 --- a/test/hotspot/jtreg/ProblemList.txt +++ b/test/hotspot/jtreg/ProblemList.txt @@ -76,8 +76,6 @@ compiler/interpreter/Test6833129.java 8335266 generic-i586 compiler/ciReplay/TestInliningProtectionDomain.java 8349191 generic-all compiler/ciReplay/TestIncrementalInlining.java 8349191 generic-all -compiler/c2/TestVerifyConstraintCasts.java 8355574 generic-all - ############################################################################# # :hotspot_gc diff --git a/test/hotspot/jtreg/compiler/arraycopy/TestArrayCopyConjoint.java b/test/hotspot/jtreg/compiler/arraycopy/TestArrayCopyConjoint.java index 8d98628c074d0..5aed44200bfac 100644 --- a/test/hotspot/jtreg/compiler/arraycopy/TestArrayCopyConjoint.java +++ b/test/hotspot/jtreg/compiler/arraycopy/TestArrayCopyConjoint.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,7 +26,7 @@ /** * @test - * @bug 8251871 8285301 + * @bug 8251871 8285301 8355574 * @summary Optimize arrayCopy using AVX-512 masked instructions. * * @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions @@ -38,6 +38,9 @@ * @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions * -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption * compiler.arraycopy.TestArrayCopyConjoint + * @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:+StressGCM -XX:VerifyConstraintCasts=2 + * -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption + * compiler.arraycopy.TestArrayCopyConjoint * @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions * -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=64 * compiler.arraycopy.TestArrayCopyConjoint From 336c295c67e4125cccf78029b46964926131be31 Mon Sep 17 00:00:00 2001 From: Quan Anh Mai Date: Sat, 17 May 2025 22:10:44 +0700 Subject: [PATCH 2/5] fix issues --- src/hotspot/share/opto/arraycopynode.cpp | 2 -- src/hotspot/share/opto/macroArrayCopy.cpp | 14 +++++--------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/hotspot/share/opto/arraycopynode.cpp b/src/hotspot/share/opto/arraycopynode.cpp index 85b6bd21aece1..c913476ffeb72 100644 --- a/src/hotspot/share/opto/arraycopynode.cpp +++ b/src/hotspot/share/opto/arraycopynode.cpp @@ -28,8 +28,6 @@ #include "gc/shared/gc_globals.hpp" #include "opto/arraycopynode.hpp" #include "opto/graphKit.hpp" -#include "runtime/sharedRuntime.hpp" -#include "utilities/macros.hpp" #include "utilities/powerOfTwo.hpp" const TypeFunc* ArrayCopyNode::_arraycopy_type_Type = nullptr; diff --git a/src/hotspot/share/opto/macroArrayCopy.cpp b/src/hotspot/share/opto/macroArrayCopy.cpp index b469f1de75fb1..87c0e5a48c2f5 100644 --- a/src/hotspot/share/opto/macroArrayCopy.cpp +++ b/src/hotspot/share/opto/macroArrayCopy.cpp @@ -207,41 +207,37 @@ void PhaseMacroExpand::generate_partial_inlining_block(Node** ctrl, MergeMemNode const TypePtr *src_adr_type = _igvn.type(src_start)->isa_ptr(); Node* inline_block = nullptr; Node* stub_block = nullptr; + int inline_limit = ArrayOperationPartialInlineSize / type2aelembytes(type); int const_len = -1; const TypeInt* lty = nullptr; - uint shift = exact_log2(type2aelembytes(type)); if (length->Opcode() == Op_ConvI2L) { lty = _igvn.type(length->in(1))->isa_int(); } else { lty = _igvn.type(length)->isa_int(); } if (lty && lty->is_con()) { - const_len = lty->get_con() << shift; + const_len = lty->get_con(); } // Return if copy length is greater than partial inline size limit or // target does not supports masked load/stores. int lane_count = ArrayCopyNode::get_partial_inline_vector_lane_count(type, const_len); - if ( const_len > ArrayOperationPartialInlineSize || + if (const_len > inline_limit || !Matcher::match_rule_supported_vector(Op_LoadVectorMasked, lane_count, type) || !Matcher::match_rule_supported_vector(Op_StoreVectorMasked, lane_count, type) || !Matcher::match_rule_supported_vector(Op_VectorMaskGen, lane_count, type)) { return; } - Node* copy_bytes = new LShiftXNode(length, intcon(shift)); - transform_later(copy_bytes); - - Node* cmp_le = new CmpULNode(copy_bytes, longcon(ArrayOperationPartialInlineSize)); + Node* cmp_le = new CmpULNode(length, longcon(inline_limit)); transform_later(cmp_le); Node* bol_le = new BoolNode(cmp_le, BoolTest::le); transform_later(bol_le); inline_block = generate_guard(ctrl, bol_le, nullptr, PROB_FAIR); stub_block = *ctrl; - int inline_limit = ArrayOperationPartialInlineSize / type2aelembytes(type); - Node* casted_length = new CastLLNode(inline_block, length, TypeLong::make(0, inline_limit, Type::WidenMin), ConstraintCastNode::StrongDependency); + Node* casted_length = new CastLLNode(inline_block, length, TypeLong::make(0, inline_limit, Type::WidenMin), ConstraintCastNode::RegularDependency); transform_later(casted_length); Node* mask_gen = VectorMaskGenNode::make(casted_length, type); transform_later(mask_gen); From 72e72180c0c2c80cc74b7151f1c11648b755e464 Mon Sep 17 00:00:00 2001 From: Quan Anh Mai Date: Sun, 18 May 2025 03:30:18 +0700 Subject: [PATCH 3/5] fix --- src/hotspot/share/opto/arraycopynode.cpp | 24 ++++++++-------- src/hotspot/share/opto/arraycopynode.hpp | 2 +- src/hotspot/share/opto/macroArrayCopy.cpp | 34 +++++++++++------------ 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/hotspot/share/opto/arraycopynode.cpp b/src/hotspot/share/opto/arraycopynode.cpp index c913476ffeb72..913d10ca13cf0 100644 --- a/src/hotspot/share/opto/arraycopynode.cpp +++ b/src/hotspot/share/opto/arraycopynode.cpp @@ -768,15 +768,17 @@ bool ArrayCopyNode::modifies(intptr_t offset_lo, intptr_t offset_hi, PhaseValues return false; } -// As an optimization, choose optimum vector size for copy length known at compile time. -int ArrayCopyNode::get_partial_inline_vector_lane_count(BasicType type, int const_len) { - int lane_count = ArrayOperationPartialInlineSize/type2aelembytes(type); - if (const_len > 0) { - int size_in_bytes = const_len * type2aelembytes(type); - if (size_in_bytes <= 16) - lane_count = 16/type2aelembytes(type); - else if (size_in_bytes > 16 && size_in_bytes <= 32) - lane_count = 32/type2aelembytes(type); - } - return lane_count; +// As an optimization, choose the optimal vector size for bounded copy length +int ArrayCopyNode::get_partial_inline_vector_lane_count(BasicType type, jlong max_len) { + assert(max_len > 0, JLONG_FORMAT, max_len); + // We only care if max_size_in_bytes is not larger than 32, we also want to avoid multiplication + // overflow, so clamp max_len to [0, 64] + int max_size_in_bytes = MIN2(max_len, 64) * type2aelembytes(type); + if (ArrayOperationPartialInlineSize > 16 && max_size_in_bytes <= 16) { + return 16 / type2aelembytes(type); + } else if (ArrayOperationPartialInlineSize > 32 && max_size_in_bytes <= 32) { + return 32 / type2aelembytes(type); + } else { + return ArrayOperationPartialInlineSize / type2aelembytes(type); + } } diff --git a/src/hotspot/share/opto/arraycopynode.hpp b/src/hotspot/share/opto/arraycopynode.hpp index 13e739fc2c7a5..83c085fd5db00 100644 --- a/src/hotspot/share/opto/arraycopynode.hpp +++ b/src/hotspot/share/opto/arraycopynode.hpp @@ -191,7 +191,7 @@ class ArrayCopyNode : public CallNode { static bool may_modify(const TypeOopPtr* t_oop, MemBarNode* mb, PhaseValues* phase, ArrayCopyNode*& ac); - static int get_partial_inline_vector_lane_count(BasicType type, int const_len); + static int get_partial_inline_vector_lane_count(BasicType type, jlong max_len); bool modifies(intptr_t offset_lo, intptr_t offset_hi, PhaseValues* phase, bool must_modify) const; diff --git a/src/hotspot/share/opto/macroArrayCopy.cpp b/src/hotspot/share/opto/macroArrayCopy.cpp index 87c0e5a48c2f5..4d3b0b4a3e042 100644 --- a/src/hotspot/share/opto/macroArrayCopy.cpp +++ b/src/hotspot/share/opto/macroArrayCopy.cpp @@ -204,27 +204,24 @@ void PhaseMacroExpand::generate_limit_guard(Node** ctrl, Node* offset, Node* sub void PhaseMacroExpand::generate_partial_inlining_block(Node** ctrl, MergeMemNode** mem, const TypePtr* adr_type, RegionNode** exit_block, Node** result_memory, Node* length, Node* src_start, Node* dst_start, BasicType type) { - const TypePtr *src_adr_type = _igvn.type(src_start)->isa_ptr(); - Node* inline_block = nullptr; - Node* stub_block = nullptr; int inline_limit = ArrayOperationPartialInlineSize / type2aelembytes(type); - int const_len = -1; - const TypeInt* lty = nullptr; - if (length->Opcode() == Op_ConvI2L) { - lty = _igvn.type(length->in(1))->isa_int(); - } else { - lty = _igvn.type(length)->isa_int(); - } - if (lty && lty->is_con()) { - const_len = lty->get_con(); + const TypeLong* length_type = _igvn.type(length)->isa_long(); + if (length_type == nullptr) { + assert(_igvn.type(length) == Type::TOP, ""); + return; + } else if (length_type->_hi <= 0) { + // Nothing to copy + return; + } else if (length_type->_lo > inline_limit) { + // Cannot inline + return; } // Return if copy length is greater than partial inline size limit or // target does not supports masked load/stores. - int lane_count = ArrayCopyNode::get_partial_inline_vector_lane_count(type, const_len); - if (const_len > inline_limit || - !Matcher::match_rule_supported_vector(Op_LoadVectorMasked, lane_count, type) || + int lane_count = ArrayCopyNode::get_partial_inline_vector_lane_count(type, length_type->_hi); + if (!Matcher::match_rule_supported_vector(Op_LoadVectorMasked, lane_count, type) || !Matcher::match_rule_supported_vector(Op_StoreVectorMasked, lane_count, type) || !Matcher::match_rule_supported_vector(Op_VectorMaskGen, lane_count, type)) { return; @@ -234,19 +231,20 @@ void PhaseMacroExpand::generate_partial_inlining_block(Node** ctrl, MergeMemNode transform_later(cmp_le); Node* bol_le = new BoolNode(cmp_le, BoolTest::le); transform_later(bol_le); - inline_block = generate_guard(ctrl, bol_le, nullptr, PROB_FAIR); - stub_block = *ctrl; + Node* inline_block = generate_guard(ctrl, bol_le, nullptr, PROB_FAIR); + Node* stub_block = *ctrl; Node* casted_length = new CastLLNode(inline_block, length, TypeLong::make(0, inline_limit, Type::WidenMin), ConstraintCastNode::RegularDependency); transform_later(casted_length); Node* mask_gen = VectorMaskGenNode::make(casted_length, type); transform_later(mask_gen); - unsigned vec_size = lane_count * type2aelembytes(type); + unsigned vec_size = lane_count * type2aelembytes(type); if (C->max_vector_size() < vec_size) { C->set_max_vector_size(vec_size); } + const TypePtr* src_adr_type = _igvn.type(src_start)->isa_ptr(); const TypeVect * vt = TypeVect::make(type, lane_count); Node* mm = (*mem)->memory_at(C->get_alias_index(src_adr_type)); Node* masked_load = new LoadVectorMaskedNode(inline_block, mm, src_start, From a5956ba2cf2ccba9c771e786e7bd23170c43f452 Mon Sep 17 00:00:00 2001 From: Quan Anh Mai Date: Sun, 18 May 2025 14:02:21 +0700 Subject: [PATCH 4/5] fix comment --- src/hotspot/share/opto/macroArrayCopy.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/hotspot/share/opto/macroArrayCopy.cpp b/src/hotspot/share/opto/macroArrayCopy.cpp index 4d3b0b4a3e042..3faf30c57ed09 100644 --- a/src/hotspot/share/opto/macroArrayCopy.cpp +++ b/src/hotspot/share/opto/macroArrayCopy.cpp @@ -218,8 +218,7 @@ void PhaseMacroExpand::generate_partial_inlining_block(Node** ctrl, MergeMemNode return; } - // Return if copy length is greater than partial inline size limit or - // target does not supports masked load/stores. + // Return if the target does not supports masked load/stores. int lane_count = ArrayCopyNode::get_partial_inline_vector_lane_count(type, length_type->_hi); if (!Matcher::match_rule_supported_vector(Op_LoadVectorMasked, lane_count, type) || !Matcher::match_rule_supported_vector(Op_StoreVectorMasked, lane_count, type) || From fdbb88bd3a0c26647cef1ee9b6bc952fe843e56e Mon Sep 17 00:00:00 2001 From: Quan Anh Mai Date: Sun, 18 May 2025 14:02:52 +0700 Subject: [PATCH 5/5] fix comment --- src/hotspot/share/opto/arraycopynode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hotspot/share/opto/arraycopynode.cpp b/src/hotspot/share/opto/arraycopynode.cpp index 913d10ca13cf0..77d7193503e61 100644 --- a/src/hotspot/share/opto/arraycopynode.cpp +++ b/src/hotspot/share/opto/arraycopynode.cpp @@ -771,8 +771,8 @@ bool ArrayCopyNode::modifies(intptr_t offset_lo, intptr_t offset_hi, PhaseValues // As an optimization, choose the optimal vector size for bounded copy length int ArrayCopyNode::get_partial_inline_vector_lane_count(BasicType type, jlong max_len) { assert(max_len > 0, JLONG_FORMAT, max_len); - // We only care if max_size_in_bytes is not larger than 32, we also want to avoid multiplication - // overflow, so clamp max_len to [0, 64] + // We only care whether max_size_in_bytes is not larger than 32, we also want to avoid + // multiplication overflow, so clamp max_len to [0, 64] int max_size_in_bytes = MIN2(max_len, 64) * type2aelembytes(type); if (ArrayOperationPartialInlineSize > 16 && max_size_in_bytes <= 16) { return 16 / type2aelembytes(type);