Skip to content

8361252: Compact Full-GC Forwarding #26133

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions src/hotspot/share/gc/g1/g1Arguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
#include "gc/g1/g1HeapRegionRemSet.hpp"
#include "gc/g1/g1HeapVerifier.hpp"
#include "gc/shared/cardTable.hpp"
#include "gc/shared/fullGCForwarding.hpp"
#include "gc/shared/gcArguments.hpp"
#include "gc/shared/workerPolicy.hpp"
#include "runtime/globals.hpp"
Expand Down Expand Up @@ -246,8 +245,6 @@ void G1Arguments::initialize() {
if (max_parallel_refinement_threads > UINT_MAX / divisor) {
vm_exit_during_initialization("Too large parallelism for remembered sets.");
}

FullGCForwarding::initialize_flags(heap_reserved_size_bytes());
}

CollectedHeap* G1Arguments::create_heap() {
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/g1/g1CollectedHeap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
#include "gc/g1/g1YoungGCAllocationFailureInjector.hpp"
#include "gc/shared/classUnloadingContext.hpp"
#include "gc/shared/concurrentGCBreakpoints.hpp"
#include "gc/shared/fullGCForwarding.hpp"
#include "gc/shared/fullGCForwarding.inline.hpp"
#include "gc/shared/gcBehaviours.hpp"
#include "gc/shared/gcHeapSummary.hpp"
#include "gc/shared/gcId.hpp"
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/share/gc/g1/g1FullCollector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ void G1FullCollector::collect() {
// Don't add any more derived pointers during later phases
deactivate_derived_pointers();

FullGCForwarding::begin();

phase2_prepare_compaction();

if (has_compaction_targets()) {
Expand All @@ -223,6 +225,8 @@ void G1FullCollector::collect() {
log_info(gc, phases) ("No Regions selected for compaction. Skipping Phase 3: Adjust pointers and Phase 4: Compact heap");
}

FullGCForwarding::end();

phase5_reset_metadata();

G1CollectedHeap::finish_codecache_marking_cycle();
Expand Down
3 changes: 0 additions & 3 deletions src/hotspot/share/gc/parallel/parallelArguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include "gc/parallel/parallelArguments.hpp"
#include "gc/parallel/parallelScavengeHeap.hpp"
#include "gc/shared/adaptiveSizePolicy.hpp"
#include "gc/shared/fullGCForwarding.hpp"
#include "gc/shared/gcArguments.hpp"
#include "gc/shared/genArguments.hpp"
#include "gc/shared/workerPolicy.hpp"
Expand Down Expand Up @@ -94,8 +93,6 @@ void ParallelArguments::initialize() {
if (FLAG_IS_DEFAULT(ParallelRefProcEnabled) && ParallelGCThreads > 1) {
FLAG_SET_DEFAULT(ParallelRefProcEnabled, true);
}

FullGCForwarding::initialize_flags(heap_reserved_size_bytes());
}

// The alignment used for spaces in young gen and old gen
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/share/gc/parallel/psParallelCompact.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1057,12 +1057,16 @@ bool PSParallelCompact::invoke_no_policy(bool clear_all_soft_refs) {
DerivedPointerTable::set_active(false);
#endif

FullGCForwarding::begin();

forward_to_new_addr();

adjust_pointers();

compact();

FullGCForwarding::end();

ParCompactionManager::_preserved_marks_set->restore(&ParallelScavengeHeap::heap()->workers());

ParCompactionManager::verify_all_region_stack_empty();
Expand Down
5 changes: 0 additions & 5 deletions src/hotspot/share/gc/serial/serialArguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@
#include "gc/shared/fullGCForwarding.hpp"
#include "gc/shared/gcArguments.hpp"

void SerialArguments::initialize() {
GCArguments::initialize();
FullGCForwarding::initialize_flags(MaxHeapSize);
}

CollectedHeap* SerialArguments::create_heap() {
return new SerialHeap();
}
1 change: 0 additions & 1 deletion src/hotspot/share/gc/serial/serialArguments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ class CollectedHeap;

class SerialArguments : public GenArguments {
private:
virtual void initialize();
virtual CollectedHeap* create_heap();
};

Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/share/gc/serial/serialFullGC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,8 @@ void SerialFullGC::invoke_at_safepoint(bool clear_all_softrefs) {

phase1_mark(clear_all_softrefs);

FullGCForwarding::begin();

Compacter compacter{gch};

{
Expand Down Expand Up @@ -739,6 +741,8 @@ void SerialFullGC::invoke_at_safepoint(bool clear_all_softrefs) {

restore_marks();

FullGCForwarding::end();

deallocate_stacks();

SerialFullGC::_string_dedup_requests->flush();
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/serial/serialHeap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
#include "gc/shared/collectedHeap.inline.hpp"
#include "gc/shared/collectorCounters.hpp"
#include "gc/shared/continuationGCSupport.inline.hpp"
#include "gc/shared/fullGCForwarding.hpp"
#include "gc/shared/fullGCForwarding.inline.hpp"
#include "gc/shared/gcId.hpp"
#include "gc/shared/gcInitLogger.hpp"
#include "gc/shared/gcLocker.inline.hpp"
Expand Down
56 changes: 0 additions & 56 deletions src/hotspot/share/gc/shared/fullGCForwarding.cpp

This file was deleted.

186 changes: 165 additions & 21 deletions src/hotspot/share/gc/shared/fullGCForwarding.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,35 +25,179 @@
#ifndef SHARE_GC_SHARED_FULLGCFORWARDING_HPP
#define SHARE_GC_SHARED_FULLGCFORWARDING_HPP

#include "memory/allStatic.hpp"
#include "memory/allocation.hpp"
#include "memory/memRegion.hpp"
#include "oops/markWord.hpp"
#include "oops/oopsHierarchy.hpp"

/*
* Implements forwarding for the Full GCs of Serial, Parallel, G1 and Shenandoah in
* a way that preserves upper N bits of object mark-words, which contain crucial
* Klass* information when running with compact headers. The encoding is similar to
* compressed-oops encoding: it basically subtracts the forwardee address from the
* heap-base, shifts that difference into the right place, and sets the lowest two
* bits (to indicate 'forwarded' state as usual).
* With compact-headers, we have 40 bits to encode forwarding pointers. This is
* enough to address 8TB of heap. If the heap size exceeds that limit, we turn off
* compact headers.
class FallbackTable;

/**
* FullGCForwarding is a method to store forwarding information in a compressed form into the object header,
* that has been specifically designed for sliding compacting GCs and compact object headers. With compact object
* headers, we store the compressed class pointer in the header, which would be overwritten by full forwarding
* pointers, if we allow the legacy forwarding code to act. This would lose the class information for the object,
* which is required later in GC cycle to iterate the reference fields and get the object size for copying.
*
* FullGCForwarding requires only small side tables and guarantees constant-time access and modification.
*
* The key advantage of sliding compaction for encoding efficiency:
* - It forwards objects linearily, starting at the heap bottom and moving up to the top, sliding
* live objects towards the bottom of the heap. (The reality in parallel or regionalized GCs is a bit more
* complex, but conceptually it is the same.)
* - Objects starting in any one block can only be forwarded to a memory region that is not larger than
* a block. (There are exceptions to this rule which are discussed below.)
*
* This is an intuitive property: when we slide the compact block full of data, it can not take up more
* memory afterwards.
* This property allows us to use a side table to record the addresses of the target memory region for
* each block. The table holds N entries for N blocks. For each block, it gives the base
* address of the target regions, or a special placeholder if not used.
*
* This encoding efficiency allows to store the forwarding information in the object header _together_ with the
* compressed class pointer.
*
* The idea is to use a pointer compression scheme very similar to the one that is used for compressed oops.
* We divide the heap into number of equal-sized blocks. Each block spans a maximum of 2^NUM_OFFSET_BITS words.
* We maintain a side-table of target-base-addresses, with one address entry per block.
*
* When recording the sliding forwarding, the mark word would look roughly like this:
*
* 32 0
* [.....................OOOOOOOOOTT]
* ^------ tag-bits, indicates 'forwarded'
* ^-------- in-region offset
* ^----------------- protected area, *not touched* by this code, useful for
* compressed class pointer with compact object headers
*
* Adding a forwarding then generally works as follows:
* 1. Compute the index of the block of the "from" address.
* 2. Load the target-base-offset of the from-block from the side-table.
* 3. If the base-offset is not-yet set, set it to the to-address of the forwarding.
* (In other words, the first forwarding of a block determines the target base-offset.)
* 4. Compute the offset of the to-address in the target region.
* 4. Store offset in the object header.
*
* Similarly, looking up the target address, given an original object address generally works as follows:
* 1. Compute the index of the block of the "from" address.
* 2. Load the target-base-offset of the from-block from the side-table.
* 3. Extract the offset from the object header.
* 4. Compute the "to" address from "to" region base and "offset"
*
* We reserve one special value for the offset:
* - 111111111: Indicates an exceptional forwarding (see below), for which a fallback hash-table
* is used to look up the target address.
*
* In order to support this, we need to make a change to the above algorithm:
* - Forwardings that would use offsets >= 111111111 (i.e. the last slot)
* would also need to use the fallback-table. We expect that to be relatively rare for two reasons:
* 1. It only affects 1 out of 512 possible offsets, in other words, 1/512th of all situations in an equal
* distribution.
* 2. Forwardings are not equally-distributed, because normally we 'skip' unreachable objects,
* thus compacting the block. Forwardings tend to cluster at the beginning of the target region,
* and become less likely towards the end of the possible encodable target address range.
* Which means in reality it will be much less frequent than 1/512.
*
* There are several conditions when the above algorithm would be broken because the assumption that
* 'objects from each block can only get forwarded to a region of block-size' is violated:
* - G1 last-ditch serial compaction: there, object from a single region can be forwarded to multiple,
* more than two regions. G1 serial compaction is not very common - it is the last-last-ditch GC
* that is used when the JVM is scrambling to squeeze more space out of the heap, and at that point,
* ultimate performance is no longer the main concern.
* - When forwarding hits a space (or G1/Shenandoah region) boundary, then latter objects of a block
* need to be forwarded to a different address range than earlier objects in the same block.
* This is rare.
* - With compact identity hash-code, objects can grow, and in the worst case use up more memory in
* the target block than we can address. We expect that to be rare.
*
* To deal with that, we initialize a fallback-hashtable for storing those extra forwardings, and use a special
* offset pattern (0b11...1) to indicate that the forwardee is not encoded but should be looked-up in the hashtable.
* This implies that this particular offset (the last word of a block) can not be used directly as forwarding,
* but also has to be handled by the fallback-table.
*/
class FullGCForwarding : public AllStatic {
static const int NumLowBitsNarrow = LP64_ONLY(markWord::klass_shift) NOT_LP64(0 /*unused*/);
static const int NumLowBitsWide = BitsPerWord;
static const int Shift = markWord::lock_bits + markWord::lock_shift;
template <int BITS>
class FullGCForwardingImpl : public AllStatic {
friend class FullGCForwardingTest;
static constexpr int AVAILABLE_LOW_BITS = BITS;
static constexpr uintptr_t AVAILABLE_BITS_MASK = right_n_bits(AVAILABLE_LOW_BITS);
// The offset bits start after the lock-bits, which are currently used by Serial GC
// for marking objects. Could be 1 for Serial GC when being clever with the bits,
// and 0 for all other GCs.
static constexpr int OFFSET_BITS_SHIFT = markWord::lock_shift + markWord::lock_bits;

// How many bits we use for the offset
static constexpr int NUM_OFFSET_BITS = AVAILABLE_LOW_BITS - OFFSET_BITS_SHIFT;
static constexpr size_t BLOCK_SIZE_WORDS = 1ll << NUM_OFFSET_BITS;
static constexpr int BLOCK_SIZE_BYTES_SHIFT = NUM_OFFSET_BITS + LogHeapWordSize;
static constexpr size_t MAX_OFFSET = BLOCK_SIZE_WORDS - 2;
static constexpr uintptr_t OFFSET_MASK = right_n_bits(NUM_OFFSET_BITS) << OFFSET_BITS_SHIFT;

static HeapWord* _heap_base;
static int _num_low_bits;
// This offset bit-pattern indicates that the actual mapping is handled by the
// fallback-table. This also implies that this cannot be used as a valid offset,
// and we must also use the fallback-table for mappings to the last word of a
// block.
static constexpr uintptr_t FALLBACK_PATTERN = right_n_bits(NUM_OFFSET_BITS);
static constexpr uintptr_t FALLBACK_PATTERN_IN_PLACE = FALLBACK_PATTERN << OFFSET_BITS_SHIFT;

// Indicates an unused base address in the target base table.
static HeapWord* const UNUSED_BASE;

static HeapWord* _heap_start;

static size_t _heap_start_region_bias;
static size_t _num_regions;
static uintptr_t _region_mask;

// The target base table memory.
static HeapWord** _bases_table;
// Entries into the target base tables, biased to the start of the heap.
static HeapWord** _biased_bases;

static size_t _fallback_table_log2_start_size;
static FallbackTable* _fallback_table;

#ifndef PRODUCT
static volatile uint64_t _num_forwardings;
static volatile uint64_t _num_fallback_forwardings;
#endif

static size_t biased_region_index_containing(HeapWord* addr);

static bool is_fallback(uintptr_t encoded);
static uintptr_t encode_forwarding(HeapWord* from, HeapWord* to);
static HeapWord* decode_forwarding(HeapWord* from, uintptr_t encoded);

static void maybe_init_fallback_table();
static void fallback_forward_to(HeapWord* from, HeapWord* to);
static HeapWord* fallback_forwardee(HeapWord* from);

static void forward_to_impl(oop from, oop to);
static oop forwardee_impl(oop from);

FullGCForwardingImpl() = delete;

// Used in unit-test, so that we can test fallback-table-growth.
static void set_fallback_table_log2_start_size(size_t fallback_table_log2_start_size) {
_fallback_table_log2_start_size = fallback_table_log2_start_size;
}
public:
static void initialize_flags(size_t max_heap_size);
static void initialize(MemRegion heap);
static inline void forward_to(oop from, oop to);
static inline oop forwardee(oop from);
static inline bool is_forwarded(oop obj);

static void begin();
static void end();

static bool is_forwarded(oop obj);

static void forward_to(oop from, oop to);
static oop forwardee(oop from);
};

#ifdef _LP64
using FullGCForwarding = FullGCForwardingImpl<markWord::klass_shift>;
#else
// On 32 bit, the BITS template argument is not used, but we still need
// to pass a value.
using FullGCForwarding = FullGCForwardingImpl<0>;
#endif

#endif // SHARE_GC_SHARED_FULLGCFORWARDING_HPP
Loading