1 /* 2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "gc_implementation/shared/gcTimer.hpp" 40 #include "gc_implementation/shared/gcTrace.hpp" 41 #include "gc_implementation/shared/gcTraceTime.hpp" 42 #include "memory/genOopClosures.inline.hpp" 43 #include "memory/referencePolicy.hpp" 44 #include "memory/resourceArea.hpp" 45 #include "oops/oop.inline.hpp" 46 #include "runtime/handles.inline.hpp" 47 #include "runtime/java.hpp" 48 #include "runtime/prefetch.inline.hpp" 49 #include "services/memTracker.hpp" 50 51 // Concurrent marking bit map wrapper 52 53 CMBitMapRO::CMBitMapRO(int shifter) : 54 _bm(), 55 _shifter(shifter) { 56 _bmStartWord = 0; 57 _bmWordSize = 0; 58 } 59 60 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 61 HeapWord* limit) const { 62 // First we must round addr *up* to a possible object boundary. 63 addr = (HeapWord*)align_size_up((intptr_t)addr, 64 HeapWordSize << _shifter); 65 size_t addrOffset = heapWordToOffset(addr); 66 if (limit == NULL) { 67 limit = _bmStartWord + _bmWordSize; 68 } 69 size_t limitOffset = heapWordToOffset(limit); 70 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 71 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 72 assert(nextAddr >= addr, "get_next_one postcondition"); 73 assert(nextAddr == limit || isMarked(nextAddr), 74 "get_next_one postcondition"); 75 return nextAddr; 76 } 77 78 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 79 HeapWord* limit) const { 80 size_t addrOffset = heapWordToOffset(addr); 81 if (limit == NULL) { 82 limit = _bmStartWord + _bmWordSize; 83 } 84 size_t limitOffset = heapWordToOffset(limit); 85 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 86 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 87 assert(nextAddr >= addr, "get_next_one postcondition"); 88 assert(nextAddr == limit || !isMarked(nextAddr), 89 "get_next_one postcondition"); 90 return nextAddr; 91 } 92 93 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 94 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 95 return (int) (diff >> _shifter); 96 } 97 98 #ifndef PRODUCT 99 bool CMBitMapRO::covers(ReservedSpace heap_rs) const { 100 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 101 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 102 "size inconsistency"); 103 return _bmStartWord == (HeapWord*)(heap_rs.base()) && 104 _bmWordSize == heap_rs.size()>>LogHeapWordSize; 105 } 106 #endif 107 108 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 109 _bm.print_on_error(st, prefix); 110 } 111 112 bool CMBitMap::allocate(ReservedSpace heap_rs) { 113 _bmStartWord = (HeapWord*)(heap_rs.base()); 114 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes 115 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 116 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 117 if (!brs.is_reserved()) { 118 warning("ConcurrentMark marking bit map allocation failure"); 119 return false; 120 } 121 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 122 // For now we'll just commit all of the bit map up front. 123 // Later on we'll try to be more parsimonious with swap. 124 if (!_virtual_space.initialize(brs, brs.size())) { 125 warning("ConcurrentMark marking bit map backing store failure"); 126 return false; 127 } 128 assert(_virtual_space.committed_size() == brs.size(), 129 "didn't reserve backing store for all of concurrent marking bit map?"); 130 _bm.set_map((uintptr_t*)_virtual_space.low()); 131 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 132 _bmWordSize, "inconsistency in bit map sizing"); 133 _bm.set_size(_bmWordSize >> _shifter); 134 return true; 135 } 136 137 void CMBitMap::clearAll() { 138 _bm.clear(); 139 return; 140 } 141 142 void CMBitMap::markRange(MemRegion mr) { 143 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 144 assert(!mr.is_empty(), "unexpected empty region"); 145 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 146 ((HeapWord *) mr.end())), 147 "markRange memory region end is not card aligned"); 148 // convert address range into offset range 149 _bm.at_put_range(heapWordToOffset(mr.start()), 150 heapWordToOffset(mr.end()), true); 151 } 152 153 void CMBitMap::clearRange(MemRegion mr) { 154 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 155 assert(!mr.is_empty(), "unexpected empty region"); 156 // convert address range into offset range 157 _bm.at_put_range(heapWordToOffset(mr.start()), 158 heapWordToOffset(mr.end()), false); 159 } 160 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 162 HeapWord* end_addr) { 163 HeapWord* start = getNextMarkedWordAddress(addr); 164 start = MIN2(start, end_addr); 165 HeapWord* end = getNextUnmarkedWordAddress(start); 166 end = MIN2(end, end_addr); 167 assert(start <= end, "Consistency check"); 168 MemRegion mr(start, end); 169 if (!mr.is_empty()) { 170 clearRange(mr); 171 } 172 return mr; 173 } 174 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 176 _base(NULL), _cm(cm) 177 #ifdef ASSERT 178 , _drain_in_progress(false) 179 , _drain_in_progress_yields(false) 180 #endif 181 {} 182 183 bool CMMarkStack::allocate(size_t capacity) { 184 // allocate a stack of the requisite depth 185 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 186 if (!rs.is_reserved()) { 187 warning("ConcurrentMark MarkStack allocation failure"); 188 return false; 189 } 190 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 191 if (!_virtual_space.initialize(rs, rs.size())) { 192 warning("ConcurrentMark MarkStack backing store failure"); 193 // Release the virtual memory reserved for the marking stack 194 rs.release(); 195 return false; 196 } 197 assert(_virtual_space.committed_size() == rs.size(), 198 "Didn't reserve backing store for all of ConcurrentMark stack?"); 199 _base = (oop*) _virtual_space.low(); 200 setEmpty(); 201 _capacity = (jint) capacity; 202 _saved_index = -1; 203 _should_expand = false; 204 NOT_PRODUCT(_max_depth = 0); 205 return true; 206 } 207 208 void CMMarkStack::expand() { 209 // Called, during remark, if we've overflown the marking stack during marking. 210 assert(isEmpty(), "stack should been emptied while handling overflow"); 211 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 212 // Clear expansion flag 213 _should_expand = false; 214 if (_capacity == (jint) MarkStackSizeMax) { 215 if (PrintGCDetails && Verbose) { 216 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 217 } 218 return; 219 } 220 // Double capacity if possible 221 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 222 // Do not give up existing stack until we have managed to 223 // get the double capacity that we desired. 224 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 225 sizeof(oop))); 226 if (rs.is_reserved()) { 227 // Release the backing store associated with old stack 228 _virtual_space.release(); 229 // Reinitialize virtual space for new stack 230 if (!_virtual_space.initialize(rs, rs.size())) { 231 fatal("Not enough swap for expanded marking stack capacity"); 232 } 233 _base = (oop*)(_virtual_space.low()); 234 _index = 0; 235 _capacity = new_capacity; 236 } else { 237 if (PrintGCDetails && Verbose) { 238 // Failed to double capacity, continue; 239 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 240 SIZE_FORMAT"K to " SIZE_FORMAT"K", 241 _capacity / K, new_capacity / K); 242 } 243 } 244 } 245 246 void CMMarkStack::set_should_expand() { 247 // If we're resetting the marking state because of an 248 // marking stack overflow, record that we should, if 249 // possible, expand the stack. 250 _should_expand = _cm->has_overflown(); 251 } 252 253 CMMarkStack::~CMMarkStack() { 254 if (_base != NULL) { 255 _base = NULL; 256 _virtual_space.release(); 257 } 258 } 259 260 void CMMarkStack::par_push(oop ptr) { 261 while (true) { 262 if (isFull()) { 263 _overflow = true; 264 return; 265 } 266 // Otherwise... 267 jint index = _index; 268 jint next_index = index+1; 269 jint res = Atomic::cmpxchg(next_index, &_index, index); 270 if (res == index) { 271 _base[index] = ptr; 272 // Note that we don't maintain this atomically. We could, but it 273 // doesn't seem necessary. 274 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 275 return; 276 } 277 // Otherwise, we need to try again. 278 } 279 } 280 281 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 282 while (true) { 283 if (isFull()) { 284 _overflow = true; 285 return; 286 } 287 // Otherwise... 288 jint index = _index; 289 jint next_index = index + n; 290 if (next_index > _capacity) { 291 _overflow = true; 292 return; 293 } 294 jint res = Atomic::cmpxchg(next_index, &_index, index); 295 if (res == index) { 296 for (int i = 0; i < n; i++) { 297 int ind = index + i; 298 assert(ind < _capacity, "By overflow test above."); 299 _base[ind] = ptr_arr[i]; 300 } 301 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 302 return; 303 } 304 // Otherwise, we need to try again. 305 } 306 } 307 308 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 309 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 310 jint start = _index; 311 jint next_index = start + n; 312 if (next_index > _capacity) { 313 _overflow = true; 314 return; 315 } 316 // Otherwise. 317 _index = next_index; 318 for (int i = 0; i < n; i++) { 319 int ind = start + i; 320 assert(ind < _capacity, "By overflow test above."); 321 _base[ind] = ptr_arr[i]; 322 } 323 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 324 } 325 326 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 327 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 328 jint index = _index; 329 if (index == 0) { 330 *n = 0; 331 return false; 332 } else { 333 int k = MIN2(max, index); 334 jint new_ind = index - k; 335 for (int j = 0; j < k; j++) { 336 ptr_arr[j] = _base[new_ind + j]; 337 } 338 _index = new_ind; 339 *n = k; 340 return true; 341 } 342 } 343 344 template<class OopClosureClass> 345 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 346 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 347 || SafepointSynchronize::is_at_safepoint(), 348 "Drain recursion must be yield-safe."); 349 bool res = true; 350 debug_only(_drain_in_progress = true); 351 debug_only(_drain_in_progress_yields = yield_after); 352 while (!isEmpty()) { 353 oop newOop = pop(); 354 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 355 assert(newOop->is_oop(), "Expected an oop"); 356 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 357 "only grey objects on this stack"); 358 newOop->oop_iterate(cl); 359 if (yield_after && _cm->do_yield_check()) { 360 res = false; 361 break; 362 } 363 } 364 debug_only(_drain_in_progress = false); 365 return res; 366 } 367 368 void CMMarkStack::note_start_of_gc() { 369 assert(_saved_index == -1, 370 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 371 _saved_index = _index; 372 } 373 374 void CMMarkStack::note_end_of_gc() { 375 // This is intentionally a guarantee, instead of an assert. If we 376 // accidentally add something to the mark stack during GC, it 377 // will be a correctness issue so it's better if we crash. we'll 378 // only check this once per GC anyway, so it won't be a performance 379 // issue in any way. 380 guarantee(_saved_index == _index, 381 err_msg("saved index: %d index: %d", _saved_index, _index)); 382 _saved_index = -1; 383 } 384 385 void CMMarkStack::oops_do(OopClosure* f) { 386 assert(_saved_index == _index, 387 err_msg("saved index: %d index: %d", _saved_index, _index)); 388 for (int i = 0; i < _index; i += 1) { 389 f->do_oop(&_base[i]); 390 } 391 } 392 393 bool ConcurrentMark::not_yet_marked(oop obj) const { 394 return _g1h->is_obj_ill(obj); 395 } 396 397 CMRootRegions::CMRootRegions() : 398 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 399 _should_abort(false), _next_survivor(NULL) { } 400 401 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 402 _young_list = g1h->young_list(); 403 _cm = cm; 404 } 405 406 void CMRootRegions::prepare_for_scan() { 407 assert(!scan_in_progress(), "pre-condition"); 408 409 // Currently, only survivors can be root regions. 410 assert(_next_survivor == NULL, "pre-condition"); 411 _next_survivor = _young_list->first_survivor_region(); 412 _scan_in_progress = (_next_survivor != NULL); 413 _should_abort = false; 414 } 415 416 HeapRegion* CMRootRegions::claim_next() { 417 if (_should_abort) { 418 // If someone has set the should_abort flag, we return NULL to 419 // force the caller to bail out of their loop. 420 return NULL; 421 } 422 423 // Currently, only survivors can be root regions. 424 HeapRegion* res = _next_survivor; 425 if (res != NULL) { 426 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 427 // Read it again in case it changed while we were waiting for the lock. 428 res = _next_survivor; 429 if (res != NULL) { 430 if (res == _young_list->last_survivor_region()) { 431 // We just claimed the last survivor so store NULL to indicate 432 // that we're done. 433 _next_survivor = NULL; 434 } else { 435 _next_survivor = res->get_next_young_region(); 436 } 437 } else { 438 // Someone else claimed the last survivor while we were trying 439 // to take the lock so nothing else to do. 440 } 441 } 442 assert(res == NULL || res->is_survivor(), "post-condition"); 443 444 return res; 445 } 446 447 void CMRootRegions::scan_finished() { 448 assert(scan_in_progress(), "pre-condition"); 449 450 // Currently, only survivors can be root regions. 451 if (!_should_abort) { 452 assert(_next_survivor == NULL, "we should have claimed all survivors"); 453 } 454 _next_survivor = NULL; 455 456 { 457 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 458 _scan_in_progress = false; 459 RootRegionScan_lock->notify_all(); 460 } 461 } 462 463 bool CMRootRegions::wait_until_scan_finished() { 464 if (!scan_in_progress()) return false; 465 466 { 467 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 468 while (scan_in_progress()) { 469 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 470 } 471 } 472 return true; 473 } 474 475 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 476 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 477 #endif // _MSC_VER 478 479 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 480 return MAX2((n_par_threads + 2) / 4, 1U); 481 } 482 483 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) : 484 _g1h(g1h), 485 _markBitMap1(log2_intptr(MinObjAlignment)), 486 _markBitMap2(log2_intptr(MinObjAlignment)), 487 _parallel_marking_threads(0), 488 _max_parallel_marking_threads(0), 489 _sleep_factor(0.0), 490 _marking_task_overhead(1.0), 491 _cleanup_sleep_factor(0.0), 492 _cleanup_task_overhead(1.0), 493 _cleanup_list("Cleanup List"), 494 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 495 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >> 496 CardTableModRefBS::card_shift, 497 false /* in_resource_area*/), 498 499 _prevMarkBitMap(&_markBitMap1), 500 _nextMarkBitMap(&_markBitMap2), 501 502 _markStack(this), 503 // _finger set in set_non_marking_state 504 505 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), 506 // _active_tasks set in set_non_marking_state 507 // _tasks set inside the constructor 508 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 509 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 510 511 _has_overflown(false), 512 _concurrent(false), 513 _has_aborted(false), 514 _restart_for_overflow(false), 515 _concurrent_marking_in_progress(false), 516 517 // _verbose_level set below 518 519 _init_times(), 520 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 521 _cleanup_times(), 522 _total_counting_time(0.0), 523 _total_rs_scrub_time(0.0), 524 525 _parallel_workers(NULL), 526 527 _count_card_bitmaps(NULL), 528 _count_marked_bytes(NULL), 529 _completed_initialization(false) { 530 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 531 if (verbose_level < no_verbose) { 532 verbose_level = no_verbose; 533 } 534 if (verbose_level > high_verbose) { 535 verbose_level = high_verbose; 536 } 537 _verbose_level = verbose_level; 538 539 if (verbose_low()) { 540 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 541 "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end)); 542 } 543 544 if (!_markBitMap1.allocate(heap_rs)) { 545 warning("Failed to allocate first CM bit map"); 546 return; 547 } 548 if (!_markBitMap2.allocate(heap_rs)) { 549 warning("Failed to allocate second CM bit map"); 550 return; 551 } 552 553 // Create & start a ConcurrentMark thread. 554 _cmThread = new ConcurrentMarkThread(this); 555 assert(cmThread() != NULL, "CM Thread should have been created"); 556 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 557 if (_cmThread->osthread() == NULL) { 558 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 559 } 560 561 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 562 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency"); 563 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency"); 564 565 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 566 satb_qs.set_buffer_size(G1SATBBufferSize); 567 568 _root_regions.init(_g1h, this); 569 570 if (ConcGCThreads > ParallelGCThreads) { 571 warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") " 572 "than ParallelGCThreads (" UINTX_FORMAT ").", 573 ConcGCThreads, ParallelGCThreads); 574 return; 575 } 576 if (ParallelGCThreads == 0) { 577 // if we are not running with any parallel GC threads we will not 578 // spawn any marking threads either 579 _parallel_marking_threads = 0; 580 _max_parallel_marking_threads = 0; 581 _sleep_factor = 0.0; 582 _marking_task_overhead = 1.0; 583 } else { 584 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 585 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 586 // if both are set 587 _sleep_factor = 0.0; 588 _marking_task_overhead = 1.0; 589 } else if (G1MarkingOverheadPercent > 0) { 590 // We will calculate the number of parallel marking threads based 591 // on a target overhead with respect to the soft real-time goal 592 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 593 double overall_cm_overhead = 594 (double) MaxGCPauseMillis * marking_overhead / 595 (double) GCPauseIntervalMillis; 596 double cpu_ratio = 1.0 / (double) os::processor_count(); 597 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 598 double marking_task_overhead = 599 overall_cm_overhead / marking_thread_num * 600 (double) os::processor_count(); 601 double sleep_factor = 602 (1.0 - marking_task_overhead) / marking_task_overhead; 603 604 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num); 605 _sleep_factor = sleep_factor; 606 _marking_task_overhead = marking_task_overhead; 607 } else { 608 // Calculate the number of parallel marking threads by scaling 609 // the number of parallel GC threads. 610 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads); 611 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num); 612 _sleep_factor = 0.0; 613 _marking_task_overhead = 1.0; 614 } 615 616 assert(ConcGCThreads > 0, "Should have been set"); 617 _parallel_marking_threads = (uint) ConcGCThreads; 618 _max_parallel_marking_threads = _parallel_marking_threads; 619 620 if (parallel_marking_threads() > 1) { 621 _cleanup_task_overhead = 1.0; 622 } else { 623 _cleanup_task_overhead = marking_task_overhead(); 624 } 625 _cleanup_sleep_factor = 626 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 627 628 #if 0 629 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 630 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 631 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 632 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 633 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 634 #endif 635 636 guarantee(parallel_marking_threads() > 0, "peace of mind"); 637 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 638 _max_parallel_marking_threads, false, true); 639 if (_parallel_workers == NULL) { 640 vm_exit_during_initialization("Failed necessary allocation."); 641 } else { 642 _parallel_workers->initialize_workers(); 643 } 644 } 645 646 if (FLAG_IS_DEFAULT(MarkStackSize)) { 647 uintx mark_stack_size = 648 MIN2(MarkStackSizeMax, 649 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE))); 650 // Verify that the calculated value for MarkStackSize is in range. 651 // It would be nice to use the private utility routine from Arguments. 652 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 653 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): " 654 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 655 mark_stack_size, (uintx) 1, MarkStackSizeMax); 656 return; 657 } 658 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size); 659 } else { 660 // Verify MarkStackSize is in range. 661 if (FLAG_IS_CMDLINE(MarkStackSize)) { 662 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 663 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 664 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): " 665 "must be between " UINTX_FORMAT " and " UINTX_FORMAT, 666 MarkStackSize, (uintx) 1, MarkStackSizeMax); 667 return; 668 } 669 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 670 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 671 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")" 672 " or for MarkStackSizeMax (" UINTX_FORMAT ")", 673 MarkStackSize, MarkStackSizeMax); 674 return; 675 } 676 } 677 } 678 } 679 680 if (!_markStack.allocate(MarkStackSize)) { 681 warning("Failed to allocate CM marking stack"); 682 return; 683 } 684 685 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 686 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 687 688 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 689 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 690 691 BitMap::idx_t card_bm_size = _card_bm.size(); 692 693 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 694 _active_tasks = _max_worker_id; 695 696 size_t max_regions = (size_t) _g1h->max_regions(); 697 for (uint i = 0; i < _max_worker_id; ++i) { 698 CMTaskQueue* task_queue = new CMTaskQueue(); 699 task_queue->initialize(); 700 _task_queues->register_queue(i, task_queue); 701 702 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 703 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 704 705 _tasks[i] = new CMTask(i, this, 706 _count_marked_bytes[i], 707 &_count_card_bitmaps[i], 708 task_queue, _task_queues); 709 710 _accum_task_vtime[i] = 0.0; 711 } 712 713 // Calculate the card number for the bottom of the heap. Used 714 // in biasing indexes into the accounting card bitmaps. 715 _heap_bottom_card_num = 716 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 717 CardTableModRefBS::card_shift); 718 719 // Clear all the liveness counting data 720 clear_all_count_data(); 721 722 // so that the call below can read a sensible value 723 _heap_start = (HeapWord*) heap_rs.base(); 724 set_non_marking_state(); 725 _completed_initialization = true; 726 } 727 728 void ConcurrentMark::update_g1_committed(bool force) { 729 // If concurrent marking is not in progress, then we do not need to 730 // update _heap_end. 731 if (!concurrent_marking_in_progress() && !force) return; 732 733 MemRegion committed = _g1h->g1_committed(); 734 assert(committed.start() == _heap_start, "start shouldn't change"); 735 HeapWord* new_end = committed.end(); 736 if (new_end > _heap_end) { 737 // The heap has been expanded. 738 739 _heap_end = new_end; 740 } 741 // Notice that the heap can also shrink. However, this only happens 742 // during a Full GC (at least currently) and the entire marking 743 // phase will bail out and the task will not be restarted. So, let's 744 // do nothing. 745 } 746 747 void ConcurrentMark::reset() { 748 // Starting values for these two. This should be called in a STW 749 // phase. CM will be notified of any future g1_committed expansions 750 // will be at the end of evacuation pauses, when tasks are 751 // inactive. 752 MemRegion committed = _g1h->g1_committed(); 753 _heap_start = committed.start(); 754 _heap_end = committed.end(); 755 756 // Separated the asserts so that we know which one fires. 757 assert(_heap_start != NULL, "heap bounds should look ok"); 758 assert(_heap_end != NULL, "heap bounds should look ok"); 759 assert(_heap_start < _heap_end, "heap bounds should look ok"); 760 761 // Reset all the marking data structures and any necessary flags 762 reset_marking_state(); 763 764 if (verbose_low()) { 765 gclog_or_tty->print_cr("[global] resetting"); 766 } 767 768 // We do reset all of them, since different phases will use 769 // different number of active threads. So, it's easiest to have all 770 // of them ready. 771 for (uint i = 0; i < _max_worker_id; ++i) { 772 _tasks[i]->reset(_nextMarkBitMap); 773 } 774 775 // we need this to make sure that the flag is on during the evac 776 // pause with initial mark piggy-backed 777 set_concurrent_marking_in_progress(); 778 } 779 780 781 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 782 _markStack.set_should_expand(); 783 _markStack.setEmpty(); // Also clears the _markStack overflow flag 784 if (clear_overflow) { 785 clear_has_overflown(); 786 } else { 787 assert(has_overflown(), "pre-condition"); 788 } 789 _finger = _heap_start; 790 791 for (uint i = 0; i < _max_worker_id; ++i) { 792 CMTaskQueue* queue = _task_queues->queue(i); 793 queue->set_empty(); 794 } 795 } 796 797 void ConcurrentMark::set_concurrency(uint active_tasks) { 798 assert(active_tasks <= _max_worker_id, "we should not have more"); 799 800 _active_tasks = active_tasks; 801 // Need to update the three data structures below according to the 802 // number of active threads for this phase. 803 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 804 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 805 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 806 } 807 808 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 809 set_concurrency(active_tasks); 810 811 _concurrent = concurrent; 812 // We propagate this to all tasks, not just the active ones. 813 for (uint i = 0; i < _max_worker_id; ++i) 814 _tasks[i]->set_concurrent(concurrent); 815 816 if (concurrent) { 817 set_concurrent_marking_in_progress(); 818 } else { 819 // We currently assume that the concurrent flag has been set to 820 // false before we start remark. At this point we should also be 821 // in a STW phase. 822 assert(!concurrent_marking_in_progress(), "invariant"); 823 assert(out_of_regions(), 824 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT, 825 p2i(_finger), p2i(_heap_end))); 826 update_g1_committed(true); 827 } 828 } 829 830 void ConcurrentMark::set_non_marking_state() { 831 // We set the global marking state to some default values when we're 832 // not doing marking. 833 reset_marking_state(); 834 _active_tasks = 0; 835 clear_concurrent_marking_in_progress(); 836 } 837 838 ConcurrentMark::~ConcurrentMark() { 839 // The ConcurrentMark instance is never freed. 840 ShouldNotReachHere(); 841 } 842 843 void ConcurrentMark::clearNextBitmap() { 844 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 845 G1CollectorPolicy* g1p = g1h->g1_policy(); 846 847 // Make sure that the concurrent mark thread looks to still be in 848 // the current cycle. 849 guarantee(cmThread()->during_cycle(), "invariant"); 850 851 // We are finishing up the current cycle by clearing the next 852 // marking bitmap and getting it ready for the next cycle. During 853 // this time no other cycle can start. So, let's make sure that this 854 // is the case. 855 guarantee(!g1h->mark_in_progress(), "invariant"); 856 857 // clear the mark bitmap (no grey objects to start with). 858 // We need to do this in chunks and offer to yield in between 859 // each chunk. 860 HeapWord* start = _nextMarkBitMap->startWord(); 861 HeapWord* end = _nextMarkBitMap->endWord(); 862 HeapWord* cur = start; 863 size_t chunkSize = M; 864 while (cur < end) { 865 HeapWord* next = cur + chunkSize; 866 if (next > end) { 867 next = end; 868 } 869 MemRegion mr(cur,next); 870 _nextMarkBitMap->clearRange(mr); 871 cur = next; 872 do_yield_check(); 873 874 // Repeat the asserts from above. We'll do them as asserts here to 875 // minimize their overhead on the product. However, we'll have 876 // them as guarantees at the beginning / end of the bitmap 877 // clearing to get some checking in the product. 878 assert(cmThread()->during_cycle(), "invariant"); 879 assert(!g1h->mark_in_progress(), "invariant"); 880 } 881 882 // Clear the liveness counting data 883 clear_all_count_data(); 884 885 // Repeat the asserts from above. 886 guarantee(cmThread()->during_cycle(), "invariant"); 887 guarantee(!g1h->mark_in_progress(), "invariant"); 888 } 889 890 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 891 public: 892 bool doHeapRegion(HeapRegion* r) { 893 if (!r->continuesHumongous()) { 894 r->note_start_of_marking(); 895 } 896 return false; 897 } 898 }; 899 900 void ConcurrentMark::checkpointRootsInitialPre() { 901 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 902 G1CollectorPolicy* g1p = g1h->g1_policy(); 903 904 _has_aborted = false; 905 906 #ifndef PRODUCT 907 if (G1PrintReachableAtInitialMark) { 908 print_reachable("at-cycle-start", 909 VerifyOption_G1UsePrevMarking, true /* all */); 910 } 911 #endif 912 913 // Initialize marking structures. This has to be done in a STW phase. 914 reset(); 915 916 // For each region note start of marking. 917 NoteStartOfMarkHRClosure startcl; 918 g1h->heap_region_iterate(&startcl); 919 } 920 921 922 void ConcurrentMark::checkpointRootsInitialPost() { 923 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 924 925 // If we force an overflow during remark, the remark operation will 926 // actually abort and we'll restart concurrent marking. If we always 927 // force an overflow during remark we'll never actually complete the 928 // marking phase. So, we initialize this here, at the start of the 929 // cycle, so that at the remaining overflow number will decrease at 930 // every remark and we'll eventually not need to cause one. 931 force_overflow_stw()->init(); 932 933 // Start Concurrent Marking weak-reference discovery. 934 ReferenceProcessor* rp = g1h->ref_processor_cm(); 935 // enable ("weak") refs discovery 936 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 937 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 938 939 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 940 // This is the start of the marking cycle, we're expected all 941 // threads to have SATB queues with active set to false. 942 satb_mq_set.set_active_all_threads(true, /* new active value */ 943 false /* expected_active */); 944 945 _root_regions.prepare_for_scan(); 946 947 // update_g1_committed() will be called at the end of an evac pause 948 // when marking is on. So, it's also called at the end of the 949 // initial-mark pause to update the heap end, if the heap expands 950 // during it. No need to call it here. 951 } 952 953 /* 954 * Notice that in the next two methods, we actually leave the STS 955 * during the barrier sync and join it immediately afterwards. If we 956 * do not do this, the following deadlock can occur: one thread could 957 * be in the barrier sync code, waiting for the other thread to also 958 * sync up, whereas another one could be trying to yield, while also 959 * waiting for the other threads to sync up too. 960 * 961 * Note, however, that this code is also used during remark and in 962 * this case we should not attempt to leave / enter the STS, otherwise 963 * we'll either hit an assert (debug / fastdebug) or deadlock 964 * (product). So we should only leave / enter the STS if we are 965 * operating concurrently. 966 * 967 * Because the thread that does the sync barrier has left the STS, it 968 * is possible to be suspended for a Full GC or an evacuation pause 969 * could occur. This is actually safe, since the entering the sync 970 * barrier is one of the last things do_marking_step() does, and it 971 * doesn't manipulate any data structures afterwards. 972 */ 973 974 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 975 if (verbose_low()) { 976 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 977 } 978 979 if (concurrent()) { 980 SuspendibleThreadSet::leave(); 981 } 982 983 bool barrier_aborted = !_first_overflow_barrier_sync.enter(); 984 985 if (concurrent()) { 986 SuspendibleThreadSet::join(); 987 } 988 // at this point everyone should have synced up and not be doing any 989 // more work 990 991 if (verbose_low()) { 992 if (barrier_aborted) { 993 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); 994 } else { 995 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 996 } 997 } 998 999 if (barrier_aborted) { 1000 // If the barrier aborted we ignore the overflow condition and 1001 // just abort the whole marking phase as quickly as possible. 1002 return; 1003 } 1004 1005 // If we're executing the concurrent phase of marking, reset the marking 1006 // state; otherwise the marking state is reset after reference processing, 1007 // during the remark pause. 1008 // If we reset here as a result of an overflow during the remark we will 1009 // see assertion failures from any subsequent set_concurrency_and_phase() 1010 // calls. 1011 if (concurrent()) { 1012 // let the task associated with with worker 0 do this 1013 if (worker_id == 0) { 1014 // task 0 is responsible for clearing the global data structures 1015 // We should be here because of an overflow. During STW we should 1016 // not clear the overflow flag since we rely on it being true when 1017 // we exit this method to abort the pause and restart concurrent 1018 // marking. 1019 reset_marking_state(true /* clear_overflow */); 1020 force_overflow()->update(); 1021 1022 if (G1Log::fine()) { 1023 gclog_or_tty->date_stamp(PrintGCDateStamps); 1024 gclog_or_tty->stamp(PrintGCTimeStamps); 1025 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 1026 } 1027 } 1028 } 1029 1030 // after this, each task should reset its own data structures then 1031 // then go into the second barrier 1032 } 1033 1034 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 1035 if (verbose_low()) { 1036 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 1037 } 1038 1039 if (concurrent()) { 1040 SuspendibleThreadSet::leave(); 1041 } 1042 1043 bool barrier_aborted = !_second_overflow_barrier_sync.enter(); 1044 1045 if (concurrent()) { 1046 SuspendibleThreadSet::join(); 1047 } 1048 // at this point everything should be re-initialized and ready to go 1049 1050 if (verbose_low()) { 1051 if (barrier_aborted) { 1052 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); 1053 } else { 1054 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1055 } 1056 } 1057 } 1058 1059 #ifndef PRODUCT 1060 void ForceOverflowSettings::init() { 1061 _num_remaining = G1ConcMarkForceOverflow; 1062 _force = false; 1063 update(); 1064 } 1065 1066 void ForceOverflowSettings::update() { 1067 if (_num_remaining > 0) { 1068 _num_remaining -= 1; 1069 _force = true; 1070 } else { 1071 _force = false; 1072 } 1073 } 1074 1075 bool ForceOverflowSettings::should_force() { 1076 if (_force) { 1077 _force = false; 1078 return true; 1079 } else { 1080 return false; 1081 } 1082 } 1083 #endif // !PRODUCT 1084 1085 class CMConcurrentMarkingTask: public AbstractGangTask { 1086 private: 1087 ConcurrentMark* _cm; 1088 ConcurrentMarkThread* _cmt; 1089 1090 public: 1091 void work(uint worker_id) { 1092 assert(Thread::current()->is_ConcurrentGC_thread(), 1093 "this should only be done by a conc GC thread"); 1094 ResourceMark rm; 1095 1096 double start_vtime = os::elapsedVTime(); 1097 1098 SuspendibleThreadSet::join(); 1099 1100 assert(worker_id < _cm->active_tasks(), "invariant"); 1101 CMTask* the_task = _cm->task(worker_id); 1102 the_task->record_start_time(); 1103 if (!_cm->has_aborted()) { 1104 do { 1105 double start_vtime_sec = os::elapsedVTime(); 1106 double start_time_sec = os::elapsedTime(); 1107 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1108 1109 the_task->do_marking_step(mark_step_duration_ms, 1110 true /* do_termination */, 1111 false /* is_serial*/); 1112 1113 double end_time_sec = os::elapsedTime(); 1114 double end_vtime_sec = os::elapsedVTime(); 1115 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1116 double elapsed_time_sec = end_time_sec - start_time_sec; 1117 _cm->clear_has_overflown(); 1118 1119 bool ret = _cm->do_yield_check(worker_id); 1120 1121 jlong sleep_time_ms; 1122 if (!_cm->has_aborted() && the_task->has_aborted()) { 1123 sleep_time_ms = 1124 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1125 SuspendibleThreadSet::leave(); 1126 os::sleep(Thread::current(), sleep_time_ms, false); 1127 SuspendibleThreadSet::join(); 1128 } 1129 double end_time2_sec = os::elapsedTime(); 1130 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1131 1132 #if 0 1133 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1134 "overhead %1.4lf", 1135 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1136 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1137 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1138 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1139 #endif 1140 } while (!_cm->has_aborted() && the_task->has_aborted()); 1141 } 1142 the_task->record_end_time(); 1143 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1144 1145 SuspendibleThreadSet::leave(); 1146 1147 double end_vtime = os::elapsedVTime(); 1148 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1149 } 1150 1151 CMConcurrentMarkingTask(ConcurrentMark* cm, 1152 ConcurrentMarkThread* cmt) : 1153 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1154 1155 ~CMConcurrentMarkingTask() { } 1156 }; 1157 1158 // Calculates the number of active workers for a concurrent 1159 // phase. 1160 uint ConcurrentMark::calc_parallel_marking_threads() { 1161 if (G1CollectedHeap::use_parallel_gc_threads()) { 1162 uint n_conc_workers = 0; 1163 if (!UseDynamicNumberOfGCThreads || 1164 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1165 !ForceDynamicNumberOfGCThreads)) { 1166 n_conc_workers = max_parallel_marking_threads(); 1167 } else { 1168 n_conc_workers = 1169 AdaptiveSizePolicy::calc_default_active_workers( 1170 max_parallel_marking_threads(), 1171 1, /* Minimum workers */ 1172 parallel_marking_threads(), 1173 Threads::number_of_non_daemon_threads()); 1174 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1175 // that scaling has already gone into "_max_parallel_marking_threads". 1176 } 1177 assert(n_conc_workers > 0, "Always need at least 1"); 1178 return n_conc_workers; 1179 } 1180 // If we are not running with any parallel GC threads we will not 1181 // have spawned any marking threads either. Hence the number of 1182 // concurrent workers should be 0. 1183 return 0; 1184 } 1185 1186 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1187 // Currently, only survivors can be root regions. 1188 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1189 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1190 1191 const uintx interval = PrefetchScanIntervalInBytes; 1192 HeapWord* curr = hr->bottom(); 1193 const HeapWord* end = hr->top(); 1194 while (curr < end) { 1195 Prefetch::read(curr, interval); 1196 oop obj = oop(curr); 1197 int size = obj->oop_iterate(&cl); 1198 assert(size == obj->size(), "sanity"); 1199 curr += size; 1200 } 1201 } 1202 1203 class CMRootRegionScanTask : public AbstractGangTask { 1204 private: 1205 ConcurrentMark* _cm; 1206 1207 public: 1208 CMRootRegionScanTask(ConcurrentMark* cm) : 1209 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1210 1211 void work(uint worker_id) { 1212 assert(Thread::current()->is_ConcurrentGC_thread(), 1213 "this should only be done by a conc GC thread"); 1214 1215 CMRootRegions* root_regions = _cm->root_regions(); 1216 HeapRegion* hr = root_regions->claim_next(); 1217 while (hr != NULL) { 1218 _cm->scanRootRegion(hr, worker_id); 1219 hr = root_regions->claim_next(); 1220 } 1221 } 1222 }; 1223 1224 void ConcurrentMark::scanRootRegions() { 1225 // scan_in_progress() will have been set to true only if there was 1226 // at least one root region to scan. So, if it's false, we 1227 // should not attempt to do any further work. 1228 if (root_regions()->scan_in_progress()) { 1229 _parallel_marking_threads = calc_parallel_marking_threads(); 1230 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1231 "Maximum number of marking threads exceeded"); 1232 uint active_workers = MAX2(1U, parallel_marking_threads()); 1233 1234 CMRootRegionScanTask task(this); 1235 if (use_parallel_marking_threads()) { 1236 _parallel_workers->set_active_workers((int) active_workers); 1237 _parallel_workers->run_task(&task); 1238 } else { 1239 task.work(0); 1240 } 1241 1242 // It's possible that has_aborted() is true here without actually 1243 // aborting the survivor scan earlier. This is OK as it's 1244 // mainly used for sanity checking. 1245 root_regions()->scan_finished(); 1246 } 1247 } 1248 1249 void ConcurrentMark::markFromRoots() { 1250 // we might be tempted to assert that: 1251 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1252 // "inconsistent argument?"); 1253 // However that wouldn't be right, because it's possible that 1254 // a safepoint is indeed in progress as a younger generation 1255 // stop-the-world GC happens even as we mark in this generation. 1256 1257 _restart_for_overflow = false; 1258 force_overflow_conc()->init(); 1259 1260 // _g1h has _n_par_threads 1261 _parallel_marking_threads = calc_parallel_marking_threads(); 1262 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1263 "Maximum number of marking threads exceeded"); 1264 1265 uint active_workers = MAX2(1U, parallel_marking_threads()); 1266 1267 // Parallel task terminator is set in "set_concurrency_and_phase()" 1268 set_concurrency_and_phase(active_workers, true /* concurrent */); 1269 1270 CMConcurrentMarkingTask markingTask(this, cmThread()); 1271 if (use_parallel_marking_threads()) { 1272 _parallel_workers->set_active_workers((int)active_workers); 1273 // Don't set _n_par_threads because it affects MT in process_strong_roots() 1274 // and the decisions on that MT processing is made elsewhere. 1275 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1276 _parallel_workers->run_task(&markingTask); 1277 } else { 1278 markingTask.work(0); 1279 } 1280 print_stats(); 1281 } 1282 1283 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1284 // world is stopped at this checkpoint 1285 assert(SafepointSynchronize::is_at_safepoint(), 1286 "world should be stopped"); 1287 1288 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1289 1290 // If a full collection has happened, we shouldn't do this. 1291 if (has_aborted()) { 1292 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1293 return; 1294 } 1295 1296 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1297 1298 if (VerifyDuringGC) { 1299 HandleMark hm; // handle scope 1300 Universe::heap()->prepare_for_verify(); 1301 Universe::verify(VerifyOption_G1UsePrevMarking, 1302 " VerifyDuringGC:(before)"); 1303 } 1304 g1h->check_bitmaps("Remark Start"); 1305 1306 G1CollectorPolicy* g1p = g1h->g1_policy(); 1307 g1p->record_concurrent_mark_remark_start(); 1308 1309 double start = os::elapsedTime(); 1310 1311 checkpointRootsFinalWork(); 1312 1313 double mark_work_end = os::elapsedTime(); 1314 1315 weakRefsWork(clear_all_soft_refs); 1316 1317 if (has_overflown()) { 1318 // Oops. We overflowed. Restart concurrent marking. 1319 _restart_for_overflow = true; 1320 if (G1TraceMarkStackOverflow) { 1321 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1322 } 1323 1324 // Verify the heap w.r.t. the previous marking bitmap. 1325 if (VerifyDuringGC) { 1326 HandleMark hm; // handle scope 1327 Universe::heap()->prepare_for_verify(); 1328 Universe::verify(VerifyOption_G1UsePrevMarking, 1329 " VerifyDuringGC:(overflow)"); 1330 } 1331 1332 // Clear the marking state because we will be restarting 1333 // marking due to overflowing the global mark stack. 1334 reset_marking_state(); 1335 } else { 1336 // Aggregate the per-task counting data that we have accumulated 1337 // while marking. 1338 aggregate_count_data(); 1339 1340 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1341 // We're done with marking. 1342 // This is the end of the marking cycle, we're expected all 1343 // threads to have SATB queues with active set to true. 1344 satb_mq_set.set_active_all_threads(false, /* new active value */ 1345 true /* expected_active */); 1346 1347 if (VerifyDuringGC) { 1348 HandleMark hm; // handle scope 1349 Universe::heap()->prepare_for_verify(); 1350 Universe::verify(VerifyOption_G1UseNextMarking, 1351 " VerifyDuringGC:(after)"); 1352 } 1353 g1h->check_bitmaps("Remark End"); 1354 assert(!restart_for_overflow(), "sanity"); 1355 // Completely reset the marking state since marking completed 1356 set_non_marking_state(); 1357 } 1358 1359 // Expand the marking stack, if we have to and if we can. 1360 if (_markStack.should_expand()) { 1361 _markStack.expand(); 1362 } 1363 1364 // Statistics 1365 double now = os::elapsedTime(); 1366 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1367 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1368 _remark_times.add((now - start) * 1000.0); 1369 1370 g1p->record_concurrent_mark_remark_end(); 1371 1372 G1CMIsAliveClosure is_alive(g1h); 1373 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1374 } 1375 1376 // Base class of the closures that finalize and verify the 1377 // liveness counting data. 1378 class CMCountDataClosureBase: public HeapRegionClosure { 1379 protected: 1380 G1CollectedHeap* _g1h; 1381 ConcurrentMark* _cm; 1382 CardTableModRefBS* _ct_bs; 1383 1384 BitMap* _region_bm; 1385 BitMap* _card_bm; 1386 1387 // Takes a region that's not empty (i.e., it has at least one 1388 // live object in it and sets its corresponding bit on the region 1389 // bitmap to 1. If the region is "starts humongous" it will also set 1390 // to 1 the bits on the region bitmap that correspond to its 1391 // associated "continues humongous" regions. 1392 void set_bit_for_region(HeapRegion* hr) { 1393 assert(!hr->continuesHumongous(), "should have filtered those out"); 1394 1395 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1396 if (!hr->startsHumongous()) { 1397 // Normal (non-humongous) case: just set the bit. 1398 _region_bm->par_at_put(index, true); 1399 } else { 1400 // Starts humongous case: calculate how many regions are part of 1401 // this humongous region and then set the bit range. 1402 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1403 _region_bm->par_at_put_range(index, end_index, true); 1404 } 1405 } 1406 1407 public: 1408 CMCountDataClosureBase(G1CollectedHeap* g1h, 1409 BitMap* region_bm, BitMap* card_bm): 1410 _g1h(g1h), _cm(g1h->concurrent_mark()), 1411 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1412 _region_bm(region_bm), _card_bm(card_bm) { } 1413 }; 1414 1415 // Closure that calculates the # live objects per region. Used 1416 // for verification purposes during the cleanup pause. 1417 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1418 CMBitMapRO* _bm; 1419 size_t _region_marked_bytes; 1420 1421 public: 1422 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1423 BitMap* region_bm, BitMap* card_bm) : 1424 CMCountDataClosureBase(g1h, region_bm, card_bm), 1425 _bm(bm), _region_marked_bytes(0) { } 1426 1427 bool doHeapRegion(HeapRegion* hr) { 1428 1429 if (hr->continuesHumongous()) { 1430 // We will ignore these here and process them when their 1431 // associated "starts humongous" region is processed (see 1432 // set_bit_for_heap_region()). Note that we cannot rely on their 1433 // associated "starts humongous" region to have their bit set to 1434 // 1 since, due to the region chunking in the parallel region 1435 // iteration, a "continues humongous" region might be visited 1436 // before its associated "starts humongous". 1437 return false; 1438 } 1439 1440 HeapWord* ntams = hr->next_top_at_mark_start(); 1441 HeapWord* start = hr->bottom(); 1442 1443 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1444 err_msg("Preconditions not met - " 1445 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1446 p2i(start), p2i(ntams), p2i(hr->end()))); 1447 1448 // Find the first marked object at or after "start". 1449 start = _bm->getNextMarkedWordAddress(start, ntams); 1450 1451 size_t marked_bytes = 0; 1452 1453 while (start < ntams) { 1454 oop obj = oop(start); 1455 int obj_sz = obj->size(); 1456 HeapWord* obj_end = start + obj_sz; 1457 1458 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1459 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1460 1461 // Note: if we're looking at the last region in heap - obj_end 1462 // could be actually just beyond the end of the heap; end_idx 1463 // will then correspond to a (non-existent) card that is also 1464 // just beyond the heap. 1465 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1466 // end of object is not card aligned - increment to cover 1467 // all the cards spanned by the object 1468 end_idx += 1; 1469 } 1470 1471 // Set the bits in the card BM for the cards spanned by this object. 1472 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1473 1474 // Add the size of this object to the number of marked bytes. 1475 marked_bytes += (size_t)obj_sz * HeapWordSize; 1476 1477 // Find the next marked object after this one. 1478 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1479 } 1480 1481 // Mark the allocated-since-marking portion... 1482 HeapWord* top = hr->top(); 1483 if (ntams < top) { 1484 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1485 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1486 1487 // Note: if we're looking at the last region in heap - top 1488 // could be actually just beyond the end of the heap; end_idx 1489 // will then correspond to a (non-existent) card that is also 1490 // just beyond the heap. 1491 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1492 // end of object is not card aligned - increment to cover 1493 // all the cards spanned by the object 1494 end_idx += 1; 1495 } 1496 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1497 1498 // This definitely means the region has live objects. 1499 set_bit_for_region(hr); 1500 } 1501 1502 // Update the live region bitmap. 1503 if (marked_bytes > 0) { 1504 set_bit_for_region(hr); 1505 } 1506 1507 // Set the marked bytes for the current region so that 1508 // it can be queried by a calling verification routine 1509 _region_marked_bytes = marked_bytes; 1510 1511 return false; 1512 } 1513 1514 size_t region_marked_bytes() const { return _region_marked_bytes; } 1515 }; 1516 1517 // Heap region closure used for verifying the counting data 1518 // that was accumulated concurrently and aggregated during 1519 // the remark pause. This closure is applied to the heap 1520 // regions during the STW cleanup pause. 1521 1522 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1523 G1CollectedHeap* _g1h; 1524 ConcurrentMark* _cm; 1525 CalcLiveObjectsClosure _calc_cl; 1526 BitMap* _region_bm; // Region BM to be verified 1527 BitMap* _card_bm; // Card BM to be verified 1528 bool _verbose; // verbose output? 1529 1530 BitMap* _exp_region_bm; // Expected Region BM values 1531 BitMap* _exp_card_bm; // Expected card BM values 1532 1533 int _failures; 1534 1535 public: 1536 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1537 BitMap* region_bm, 1538 BitMap* card_bm, 1539 BitMap* exp_region_bm, 1540 BitMap* exp_card_bm, 1541 bool verbose) : 1542 _g1h(g1h), _cm(g1h->concurrent_mark()), 1543 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1544 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1545 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1546 _failures(0) { } 1547 1548 int failures() const { return _failures; } 1549 1550 bool doHeapRegion(HeapRegion* hr) { 1551 if (hr->continuesHumongous()) { 1552 // We will ignore these here and process them when their 1553 // associated "starts humongous" region is processed (see 1554 // set_bit_for_heap_region()). Note that we cannot rely on their 1555 // associated "starts humongous" region to have their bit set to 1556 // 1 since, due to the region chunking in the parallel region 1557 // iteration, a "continues humongous" region might be visited 1558 // before its associated "starts humongous". 1559 return false; 1560 } 1561 1562 int failures = 0; 1563 1564 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1565 // this region and set the corresponding bits in the expected region 1566 // and card bitmaps. 1567 bool res = _calc_cl.doHeapRegion(hr); 1568 assert(res == false, "should be continuing"); 1569 1570 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1571 Mutex::_no_safepoint_check_flag); 1572 1573 // Verify the marked bytes for this region. 1574 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1575 size_t act_marked_bytes = hr->next_marked_bytes(); 1576 1577 // We're not OK if expected marked bytes > actual marked bytes. It means 1578 // we have missed accounting some objects during the actual marking. 1579 if (exp_marked_bytes > act_marked_bytes) { 1580 if (_verbose) { 1581 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1582 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1583 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1584 } 1585 failures += 1; 1586 } 1587 1588 // Verify the bit, for this region, in the actual and expected 1589 // (which was just calculated) region bit maps. 1590 // We're not OK if the bit in the calculated expected region 1591 // bitmap is set and the bit in the actual region bitmap is not. 1592 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1593 1594 bool expected = _exp_region_bm->at(index); 1595 bool actual = _region_bm->at(index); 1596 if (expected && !actual) { 1597 if (_verbose) { 1598 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1599 "expected: %s, actual: %s", 1600 hr->hrs_index(), 1601 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1602 } 1603 failures += 1; 1604 } 1605 1606 // Verify that the card bit maps for the cards spanned by the current 1607 // region match. We have an error if we have a set bit in the expected 1608 // bit map and the corresponding bit in the actual bitmap is not set. 1609 1610 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1611 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1612 1613 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1614 expected = _exp_card_bm->at(i); 1615 actual = _card_bm->at(i); 1616 1617 if (expected && !actual) { 1618 if (_verbose) { 1619 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1620 "expected: %s, actual: %s", 1621 hr->hrs_index(), i, 1622 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1623 } 1624 failures += 1; 1625 } 1626 } 1627 1628 if (failures > 0 && _verbose) { 1629 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1630 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1631 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()), 1632 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1633 } 1634 1635 _failures += failures; 1636 1637 // We could stop iteration over the heap when we 1638 // find the first violating region by returning true. 1639 return false; 1640 } 1641 }; 1642 1643 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1644 protected: 1645 G1CollectedHeap* _g1h; 1646 ConcurrentMark* _cm; 1647 BitMap* _actual_region_bm; 1648 BitMap* _actual_card_bm; 1649 1650 uint _n_workers; 1651 1652 BitMap* _expected_region_bm; 1653 BitMap* _expected_card_bm; 1654 1655 int _failures; 1656 bool _verbose; 1657 1658 public: 1659 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1660 BitMap* region_bm, BitMap* card_bm, 1661 BitMap* expected_region_bm, BitMap* expected_card_bm) 1662 : AbstractGangTask("G1 verify final counting"), 1663 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1664 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1665 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1666 _failures(0), _verbose(false), 1667 _n_workers(0) { 1668 assert(VerifyDuringGC, "don't call this otherwise"); 1669 1670 // Use the value already set as the number of active threads 1671 // in the call to run_task(). 1672 if (G1CollectedHeap::use_parallel_gc_threads()) { 1673 assert( _g1h->workers()->active_workers() > 0, 1674 "Should have been previously set"); 1675 _n_workers = _g1h->workers()->active_workers(); 1676 } else { 1677 _n_workers = 1; 1678 } 1679 1680 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1681 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1682 1683 _verbose = _cm->verbose_medium(); 1684 } 1685 1686 void work(uint worker_id) { 1687 assert(worker_id < _n_workers, "invariant"); 1688 1689 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1690 _actual_region_bm, _actual_card_bm, 1691 _expected_region_bm, 1692 _expected_card_bm, 1693 _verbose); 1694 1695 if (G1CollectedHeap::use_parallel_gc_threads()) { 1696 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1697 worker_id, 1698 _n_workers, 1699 HeapRegion::VerifyCountClaimValue); 1700 } else { 1701 _g1h->heap_region_iterate(&verify_cl); 1702 } 1703 1704 Atomic::add(verify_cl.failures(), &_failures); 1705 } 1706 1707 int failures() const { return _failures; } 1708 }; 1709 1710 // Closure that finalizes the liveness counting data. 1711 // Used during the cleanup pause. 1712 // Sets the bits corresponding to the interval [NTAMS, top] 1713 // (which contains the implicitly live objects) in the 1714 // card liveness bitmap. Also sets the bit for each region, 1715 // containing live data, in the region liveness bitmap. 1716 1717 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1718 public: 1719 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1720 BitMap* region_bm, 1721 BitMap* card_bm) : 1722 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1723 1724 bool doHeapRegion(HeapRegion* hr) { 1725 1726 if (hr->continuesHumongous()) { 1727 // We will ignore these here and process them when their 1728 // associated "starts humongous" region is processed (see 1729 // set_bit_for_heap_region()). Note that we cannot rely on their 1730 // associated "starts humongous" region to have their bit set to 1731 // 1 since, due to the region chunking in the parallel region 1732 // iteration, a "continues humongous" region might be visited 1733 // before its associated "starts humongous". 1734 return false; 1735 } 1736 1737 HeapWord* ntams = hr->next_top_at_mark_start(); 1738 HeapWord* top = hr->top(); 1739 1740 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1741 1742 // Mark the allocated-since-marking portion... 1743 if (ntams < top) { 1744 // This definitely means the region has live objects. 1745 set_bit_for_region(hr); 1746 1747 // Now set the bits in the card bitmap for [ntams, top) 1748 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1749 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1750 1751 // Note: if we're looking at the last region in heap - top 1752 // could be actually just beyond the end of the heap; end_idx 1753 // will then correspond to a (non-existent) card that is also 1754 // just beyond the heap. 1755 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1756 // end of object is not card aligned - increment to cover 1757 // all the cards spanned by the object 1758 end_idx += 1; 1759 } 1760 1761 assert(end_idx <= _card_bm->size(), 1762 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1763 end_idx, _card_bm->size())); 1764 assert(start_idx < _card_bm->size(), 1765 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1766 start_idx, _card_bm->size())); 1767 1768 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1769 } 1770 1771 // Set the bit for the region if it contains live data 1772 if (hr->next_marked_bytes() > 0) { 1773 set_bit_for_region(hr); 1774 } 1775 1776 return false; 1777 } 1778 }; 1779 1780 class G1ParFinalCountTask: public AbstractGangTask { 1781 protected: 1782 G1CollectedHeap* _g1h; 1783 ConcurrentMark* _cm; 1784 BitMap* _actual_region_bm; 1785 BitMap* _actual_card_bm; 1786 1787 uint _n_workers; 1788 1789 public: 1790 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1791 : AbstractGangTask("G1 final counting"), 1792 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1793 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1794 _n_workers(0) { 1795 // Use the value already set as the number of active threads 1796 // in the call to run_task(). 1797 if (G1CollectedHeap::use_parallel_gc_threads()) { 1798 assert( _g1h->workers()->active_workers() > 0, 1799 "Should have been previously set"); 1800 _n_workers = _g1h->workers()->active_workers(); 1801 } else { 1802 _n_workers = 1; 1803 } 1804 } 1805 1806 void work(uint worker_id) { 1807 assert(worker_id < _n_workers, "invariant"); 1808 1809 FinalCountDataUpdateClosure final_update_cl(_g1h, 1810 _actual_region_bm, 1811 _actual_card_bm); 1812 1813 if (G1CollectedHeap::use_parallel_gc_threads()) { 1814 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1815 worker_id, 1816 _n_workers, 1817 HeapRegion::FinalCountClaimValue); 1818 } else { 1819 _g1h->heap_region_iterate(&final_update_cl); 1820 } 1821 } 1822 }; 1823 1824 class G1ParNoteEndTask; 1825 1826 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1827 G1CollectedHeap* _g1; 1828 size_t _max_live_bytes; 1829 uint _regions_claimed; 1830 size_t _freed_bytes; 1831 FreeRegionList* _local_cleanup_list; 1832 HeapRegionSetCount _old_regions_removed; 1833 HeapRegionSetCount _humongous_regions_removed; 1834 HRRSCleanupTask* _hrrs_cleanup_task; 1835 double _claimed_region_time; 1836 double _max_region_time; 1837 1838 public: 1839 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1840 FreeRegionList* local_cleanup_list, 1841 HRRSCleanupTask* hrrs_cleanup_task) : 1842 _g1(g1), 1843 _max_live_bytes(0), _regions_claimed(0), 1844 _freed_bytes(0), 1845 _claimed_region_time(0.0), _max_region_time(0.0), 1846 _local_cleanup_list(local_cleanup_list), 1847 _old_regions_removed(), 1848 _humongous_regions_removed(), 1849 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1850 1851 size_t freed_bytes() { return _freed_bytes; } 1852 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1853 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1854 1855 bool doHeapRegion(HeapRegion *hr) { 1856 if (hr->continuesHumongous()) { 1857 return false; 1858 } 1859 // We use a claim value of zero here because all regions 1860 // were claimed with value 1 in the FinalCount task. 1861 _g1->reset_gc_time_stamps(hr); 1862 double start = os::elapsedTime(); 1863 _regions_claimed++; 1864 hr->note_end_of_marking(); 1865 _max_live_bytes += hr->max_live_bytes(); 1866 1867 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1868 _freed_bytes += hr->used(); 1869 hr->set_containing_set(NULL); 1870 if (hr->isHumongous()) { 1871 assert(hr->startsHumongous(), "we should only see starts humongous"); 1872 _humongous_regions_removed.increment(1u, hr->capacity()); 1873 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1874 } else { 1875 _old_regions_removed.increment(1u, hr->capacity()); 1876 _g1->free_region(hr, _local_cleanup_list, true); 1877 } 1878 } else { 1879 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1880 } 1881 1882 double region_time = (os::elapsedTime() - start); 1883 _claimed_region_time += region_time; 1884 if (region_time > _max_region_time) { 1885 _max_region_time = region_time; 1886 } 1887 return false; 1888 } 1889 1890 size_t max_live_bytes() { return _max_live_bytes; } 1891 uint regions_claimed() { return _regions_claimed; } 1892 double claimed_region_time_sec() { return _claimed_region_time; } 1893 double max_region_time_sec() { return _max_region_time; } 1894 }; 1895 1896 class G1ParNoteEndTask: public AbstractGangTask { 1897 friend class G1NoteEndOfConcMarkClosure; 1898 1899 protected: 1900 G1CollectedHeap* _g1h; 1901 size_t _max_live_bytes; 1902 size_t _freed_bytes; 1903 FreeRegionList* _cleanup_list; 1904 1905 public: 1906 G1ParNoteEndTask(G1CollectedHeap* g1h, 1907 FreeRegionList* cleanup_list) : 1908 AbstractGangTask("G1 note end"), _g1h(g1h), 1909 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1910 1911 void work(uint worker_id) { 1912 double start = os::elapsedTime(); 1913 FreeRegionList local_cleanup_list("Local Cleanup List"); 1914 HRRSCleanupTask hrrs_cleanup_task; 1915 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1916 &hrrs_cleanup_task); 1917 if (G1CollectedHeap::use_parallel_gc_threads()) { 1918 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1919 _g1h->workers()->active_workers(), 1920 HeapRegion::NoteEndClaimValue); 1921 } else { 1922 _g1h->heap_region_iterate(&g1_note_end); 1923 } 1924 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1925 1926 // Now update the lists 1927 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1928 { 1929 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1930 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1931 _max_live_bytes += g1_note_end.max_live_bytes(); 1932 _freed_bytes += g1_note_end.freed_bytes(); 1933 1934 // If we iterate over the global cleanup list at the end of 1935 // cleanup to do this printing we will not guarantee to only 1936 // generate output for the newly-reclaimed regions (the list 1937 // might not be empty at the beginning of cleanup; we might 1938 // still be working on its previous contents). So we do the 1939 // printing here, before we append the new regions to the global 1940 // cleanup list. 1941 1942 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1943 if (hr_printer->is_active()) { 1944 FreeRegionListIterator iter(&local_cleanup_list); 1945 while (iter.more_available()) { 1946 HeapRegion* hr = iter.get_next(); 1947 hr_printer->cleanup(hr); 1948 } 1949 } 1950 1951 _cleanup_list->add_ordered(&local_cleanup_list); 1952 assert(local_cleanup_list.is_empty(), "post-condition"); 1953 1954 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1955 } 1956 } 1957 size_t max_live_bytes() { return _max_live_bytes; } 1958 size_t freed_bytes() { return _freed_bytes; } 1959 }; 1960 1961 class G1ParScrubRemSetTask: public AbstractGangTask { 1962 protected: 1963 G1RemSet* _g1rs; 1964 BitMap* _region_bm; 1965 BitMap* _card_bm; 1966 public: 1967 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1968 BitMap* region_bm, BitMap* card_bm) : 1969 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1970 _region_bm(region_bm), _card_bm(card_bm) { } 1971 1972 void work(uint worker_id) { 1973 if (G1CollectedHeap::use_parallel_gc_threads()) { 1974 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1975 HeapRegion::ScrubRemSetClaimValue); 1976 } else { 1977 _g1rs->scrub(_region_bm, _card_bm); 1978 } 1979 } 1980 1981 }; 1982 1983 void ConcurrentMark::cleanup() { 1984 // world is stopped at this checkpoint 1985 assert(SafepointSynchronize::is_at_safepoint(), 1986 "world should be stopped"); 1987 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1988 1989 // If a full collection has happened, we shouldn't do this. 1990 if (has_aborted()) { 1991 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1992 return; 1993 } 1994 1995 g1h->verify_region_sets_optional(); 1996 1997 if (VerifyDuringGC) { 1998 HandleMark hm; // handle scope 1999 Universe::heap()->prepare_for_verify(); 2000 Universe::verify(VerifyOption_G1UsePrevMarking, 2001 " VerifyDuringGC:(before)"); 2002 } 2003 g1h->check_bitmaps("Cleanup Start"); 2004 2005 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 2006 g1p->record_concurrent_mark_cleanup_start(); 2007 2008 double start = os::elapsedTime(); 2009 2010 HeapRegionRemSet::reset_for_cleanup_tasks(); 2011 2012 uint n_workers; 2013 2014 // Do counting once more with the world stopped for good measure. 2015 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 2016 2017 if (G1CollectedHeap::use_parallel_gc_threads()) { 2018 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 2019 "sanity check"); 2020 2021 g1h->set_par_threads(); 2022 n_workers = g1h->n_par_threads(); 2023 assert(g1h->n_par_threads() == n_workers, 2024 "Should not have been reset"); 2025 g1h->workers()->run_task(&g1_par_count_task); 2026 // Done with the parallel phase so reset to 0. 2027 g1h->set_par_threads(0); 2028 2029 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 2030 "sanity check"); 2031 } else { 2032 n_workers = 1; 2033 g1_par_count_task.work(0); 2034 } 2035 2036 if (VerifyDuringGC) { 2037 // Verify that the counting data accumulated during marking matches 2038 // that calculated by walking the marking bitmap. 2039 2040 // Bitmaps to hold expected values 2041 BitMap expected_region_bm(_region_bm.size(), true); 2042 BitMap expected_card_bm(_card_bm.size(), true); 2043 2044 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 2045 &_region_bm, 2046 &_card_bm, 2047 &expected_region_bm, 2048 &expected_card_bm); 2049 2050 if (G1CollectedHeap::use_parallel_gc_threads()) { 2051 g1h->set_par_threads((int)n_workers); 2052 g1h->workers()->run_task(&g1_par_verify_task); 2053 // Done with the parallel phase so reset to 0. 2054 g1h->set_par_threads(0); 2055 2056 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 2057 "sanity check"); 2058 } else { 2059 g1_par_verify_task.work(0); 2060 } 2061 2062 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 2063 } 2064 2065 size_t start_used_bytes = g1h->used(); 2066 g1h->set_marking_complete(); 2067 2068 double count_end = os::elapsedTime(); 2069 double this_final_counting_time = (count_end - start); 2070 _total_counting_time += this_final_counting_time; 2071 2072 if (G1PrintRegionLivenessInfo) { 2073 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 2074 _g1h->heap_region_iterate(&cl); 2075 } 2076 2077 // Install newly created mark bitMap as "prev". 2078 swapMarkBitMaps(); 2079 2080 g1h->reset_gc_time_stamp(); 2081 2082 // Note end of marking in all heap regions. 2083 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 2084 if (G1CollectedHeap::use_parallel_gc_threads()) { 2085 g1h->set_par_threads((int)n_workers); 2086 g1h->workers()->run_task(&g1_par_note_end_task); 2087 g1h->set_par_threads(0); 2088 2089 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 2090 "sanity check"); 2091 } else { 2092 g1_par_note_end_task.work(0); 2093 } 2094 g1h->check_gc_time_stamps(); 2095 2096 if (!cleanup_list_is_empty()) { 2097 // The cleanup list is not empty, so we'll have to process it 2098 // concurrently. Notify anyone else that might be wanting free 2099 // regions that there will be more free regions coming soon. 2100 g1h->set_free_regions_coming(); 2101 } 2102 2103 // call below, since it affects the metric by which we sort the heap 2104 // regions. 2105 if (G1ScrubRemSets) { 2106 double rs_scrub_start = os::elapsedTime(); 2107 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 2108 if (G1CollectedHeap::use_parallel_gc_threads()) { 2109 g1h->set_par_threads((int)n_workers); 2110 g1h->workers()->run_task(&g1_par_scrub_rs_task); 2111 g1h->set_par_threads(0); 2112 2113 assert(g1h->check_heap_region_claim_values( 2114 HeapRegion::ScrubRemSetClaimValue), 2115 "sanity check"); 2116 } else { 2117 g1_par_scrub_rs_task.work(0); 2118 } 2119 2120 double rs_scrub_end = os::elapsedTime(); 2121 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 2122 _total_rs_scrub_time += this_rs_scrub_time; 2123 } 2124 2125 // this will also free any regions totally full of garbage objects, 2126 // and sort the regions. 2127 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 2128 2129 // Statistics. 2130 double end = os::elapsedTime(); 2131 _cleanup_times.add((end - start) * 1000.0); 2132 2133 if (G1Log::fine()) { 2134 g1h->print_size_transition(gclog_or_tty, 2135 start_used_bytes, 2136 g1h->used(), 2137 g1h->capacity()); 2138 } 2139 2140 // Clean up will have freed any regions completely full of garbage. 2141 // Update the soft reference policy with the new heap occupancy. 2142 Universe::update_heap_info_at_gc(); 2143 2144 // We need to make this be a "collection" so any collection pause that 2145 // races with it goes around and waits for completeCleanup to finish. 2146 g1h->increment_total_collections(); 2147 2148 // We reclaimed old regions so we should calculate the sizes to make 2149 // sure we update the old gen/space data. 2150 g1h->g1mm()->update_sizes(); 2151 2152 if (VerifyDuringGC) { 2153 HandleMark hm; // handle scope 2154 Universe::heap()->prepare_for_verify(); 2155 Universe::verify(VerifyOption_G1UsePrevMarking, 2156 " VerifyDuringGC:(after)"); 2157 } 2158 g1h->check_bitmaps("Cleanup End"); 2159 2160 g1h->verify_region_sets_optional(); 2161 g1h->trace_heap_after_concurrent_cycle(); 2162 } 2163 2164 void ConcurrentMark::completeCleanup() { 2165 if (has_aborted()) return; 2166 2167 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2168 2169 _cleanup_list.verify_optional(); 2170 FreeRegionList tmp_free_list("Tmp Free List"); 2171 2172 if (G1ConcRegionFreeingVerbose) { 2173 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2174 "cleanup list has %u entries", 2175 _cleanup_list.length()); 2176 } 2177 2178 // Noone else should be accessing the _cleanup_list at this point, 2179 // so it's not necessary to take any locks 2180 while (!_cleanup_list.is_empty()) { 2181 HeapRegion* hr = _cleanup_list.remove_head(); 2182 assert(hr != NULL, "Got NULL from a non-empty list"); 2183 hr->par_clear(); 2184 tmp_free_list.add_ordered(hr); 2185 2186 // Instead of adding one region at a time to the secondary_free_list, 2187 // we accumulate them in the local list and move them a few at a 2188 // time. This also cuts down on the number of notify_all() calls 2189 // we do during this process. We'll also append the local list when 2190 // _cleanup_list is empty (which means we just removed the last 2191 // region from the _cleanup_list). 2192 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2193 _cleanup_list.is_empty()) { 2194 if (G1ConcRegionFreeingVerbose) { 2195 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2196 "appending %u entries to the secondary_free_list, " 2197 "cleanup list still has %u entries", 2198 tmp_free_list.length(), 2199 _cleanup_list.length()); 2200 } 2201 2202 { 2203 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2204 g1h->secondary_free_list_add(&tmp_free_list); 2205 SecondaryFreeList_lock->notify_all(); 2206 } 2207 2208 if (G1StressConcRegionFreeing) { 2209 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2210 os::sleep(Thread::current(), (jlong) 1, false); 2211 } 2212 } 2213 } 2214 } 2215 assert(tmp_free_list.is_empty(), "post-condition"); 2216 } 2217 2218 // Supporting Object and Oop closures for reference discovery 2219 // and processing in during marking 2220 2221 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2222 HeapWord* addr = (HeapWord*)obj; 2223 return addr != NULL && 2224 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2225 } 2226 2227 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2228 // Uses the CMTask associated with a worker thread (for serial reference 2229 // processing the CMTask for worker 0 is used) to preserve (mark) and 2230 // trace referent objects. 2231 // 2232 // Using the CMTask and embedded local queues avoids having the worker 2233 // threads operating on the global mark stack. This reduces the risk 2234 // of overflowing the stack - which we would rather avoid at this late 2235 // state. Also using the tasks' local queues removes the potential 2236 // of the workers interfering with each other that could occur if 2237 // operating on the global stack. 2238 2239 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2240 ConcurrentMark* _cm; 2241 CMTask* _task; 2242 int _ref_counter_limit; 2243 int _ref_counter; 2244 bool _is_serial; 2245 public: 2246 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2247 _cm(cm), _task(task), _is_serial(is_serial), 2248 _ref_counter_limit(G1RefProcDrainInterval) { 2249 assert(_ref_counter_limit > 0, "sanity"); 2250 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2251 _ref_counter = _ref_counter_limit; 2252 } 2253 2254 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2255 virtual void do_oop( oop* p) { do_oop_work(p); } 2256 2257 template <class T> void do_oop_work(T* p) { 2258 if (!_cm->has_overflown()) { 2259 oop obj = oopDesc::load_decode_heap_oop(p); 2260 if (_cm->verbose_high()) { 2261 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2262 "*"PTR_FORMAT" = "PTR_FORMAT, 2263 _task->worker_id(), p2i(p), p2i((void*) obj)); 2264 } 2265 2266 _task->deal_with_reference(obj); 2267 _ref_counter--; 2268 2269 if (_ref_counter == 0) { 2270 // We have dealt with _ref_counter_limit references, pushing them 2271 // and objects reachable from them on to the local stack (and 2272 // possibly the global stack). Call CMTask::do_marking_step() to 2273 // process these entries. 2274 // 2275 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2276 // there's nothing more to do (i.e. we're done with the entries that 2277 // were pushed as a result of the CMTask::deal_with_reference() calls 2278 // above) or we overflow. 2279 // 2280 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2281 // flag while there may still be some work to do. (See the comment at 2282 // the beginning of CMTask::do_marking_step() for those conditions - 2283 // one of which is reaching the specified time target.) It is only 2284 // when CMTask::do_marking_step() returns without setting the 2285 // has_aborted() flag that the marking step has completed. 2286 do { 2287 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2288 _task->do_marking_step(mark_step_duration_ms, 2289 false /* do_termination */, 2290 _is_serial); 2291 } while (_task->has_aborted() && !_cm->has_overflown()); 2292 _ref_counter = _ref_counter_limit; 2293 } 2294 } else { 2295 if (_cm->verbose_high()) { 2296 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2297 } 2298 } 2299 } 2300 }; 2301 2302 // 'Drain' oop closure used by both serial and parallel reference processing. 2303 // Uses the CMTask associated with a given worker thread (for serial 2304 // reference processing the CMtask for worker 0 is used). Calls the 2305 // do_marking_step routine, with an unbelievably large timeout value, 2306 // to drain the marking data structures of the remaining entries 2307 // added by the 'keep alive' oop closure above. 2308 2309 class G1CMDrainMarkingStackClosure: public VoidClosure { 2310 ConcurrentMark* _cm; 2311 CMTask* _task; 2312 bool _is_serial; 2313 public: 2314 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2315 _cm(cm), _task(task), _is_serial(is_serial) { 2316 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2317 } 2318 2319 void do_void() { 2320 do { 2321 if (_cm->verbose_high()) { 2322 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2323 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2324 } 2325 2326 // We call CMTask::do_marking_step() to completely drain the local 2327 // and global marking stacks of entries pushed by the 'keep alive' 2328 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2329 // 2330 // CMTask::do_marking_step() is called in a loop, which we'll exit 2331 // if there's nothing more to do (i.e. we've completely drained the 2332 // entries that were pushed as a a result of applying the 'keep alive' 2333 // closure to the entries on the discovered ref lists) or we overflow 2334 // the global marking stack. 2335 // 2336 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2337 // flag while there may still be some work to do. (See the comment at 2338 // the beginning of CMTask::do_marking_step() for those conditions - 2339 // one of which is reaching the specified time target.) It is only 2340 // when CMTask::do_marking_step() returns without setting the 2341 // has_aborted() flag that the marking step has completed. 2342 2343 _task->do_marking_step(1000000000.0 /* something very large */, 2344 true /* do_termination */, 2345 _is_serial); 2346 } while (_task->has_aborted() && !_cm->has_overflown()); 2347 } 2348 }; 2349 2350 // Implementation of AbstractRefProcTaskExecutor for parallel 2351 // reference processing at the end of G1 concurrent marking 2352 2353 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2354 private: 2355 G1CollectedHeap* _g1h; 2356 ConcurrentMark* _cm; 2357 WorkGang* _workers; 2358 int _active_workers; 2359 2360 public: 2361 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2362 ConcurrentMark* cm, 2363 WorkGang* workers, 2364 int n_workers) : 2365 _g1h(g1h), _cm(cm), 2366 _workers(workers), _active_workers(n_workers) { } 2367 2368 // Executes the given task using concurrent marking worker threads. 2369 virtual void execute(ProcessTask& task); 2370 virtual void execute(EnqueueTask& task); 2371 }; 2372 2373 class G1CMRefProcTaskProxy: public AbstractGangTask { 2374 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2375 ProcessTask& _proc_task; 2376 G1CollectedHeap* _g1h; 2377 ConcurrentMark* _cm; 2378 2379 public: 2380 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2381 G1CollectedHeap* g1h, 2382 ConcurrentMark* cm) : 2383 AbstractGangTask("Process reference objects in parallel"), 2384 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2385 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2386 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2387 } 2388 2389 virtual void work(uint worker_id) { 2390 CMTask* task = _cm->task(worker_id); 2391 G1CMIsAliveClosure g1_is_alive(_g1h); 2392 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2393 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2394 2395 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2396 } 2397 }; 2398 2399 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2400 assert(_workers != NULL, "Need parallel worker threads."); 2401 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2402 2403 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2404 2405 // We need to reset the concurrency level before each 2406 // proxy task execution, so that the termination protocol 2407 // and overflow handling in CMTask::do_marking_step() knows 2408 // how many workers to wait for. 2409 _cm->set_concurrency(_active_workers); 2410 _g1h->set_par_threads(_active_workers); 2411 _workers->run_task(&proc_task_proxy); 2412 _g1h->set_par_threads(0); 2413 } 2414 2415 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2416 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2417 EnqueueTask& _enq_task; 2418 2419 public: 2420 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2421 AbstractGangTask("Enqueue reference objects in parallel"), 2422 _enq_task(enq_task) { } 2423 2424 virtual void work(uint worker_id) { 2425 _enq_task.work(worker_id); 2426 } 2427 }; 2428 2429 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2430 assert(_workers != NULL, "Need parallel worker threads."); 2431 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2432 2433 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2434 2435 // Not strictly necessary but... 2436 // 2437 // We need to reset the concurrency level before each 2438 // proxy task execution, so that the termination protocol 2439 // and overflow handling in CMTask::do_marking_step() knows 2440 // how many workers to wait for. 2441 _cm->set_concurrency(_active_workers); 2442 _g1h->set_par_threads(_active_workers); 2443 _workers->run_task(&enq_task_proxy); 2444 _g1h->set_par_threads(0); 2445 } 2446 2447 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2448 if (has_overflown()) { 2449 // Skip processing the discovered references if we have 2450 // overflown the global marking stack. Reference objects 2451 // only get discovered once so it is OK to not 2452 // de-populate the discovered reference lists. We could have, 2453 // but the only benefit would be that, when marking restarts, 2454 // less reference objects are discovered. 2455 return; 2456 } 2457 2458 ResourceMark rm; 2459 HandleMark hm; 2460 2461 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2462 2463 // Is alive closure. 2464 G1CMIsAliveClosure g1_is_alive(g1h); 2465 2466 // Inner scope to exclude the cleaning of the string and symbol 2467 // tables from the displayed time. 2468 { 2469 if (G1Log::finer()) { 2470 gclog_or_tty->put(' '); 2471 } 2472 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm()); 2473 2474 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2475 2476 // See the comment in G1CollectedHeap::ref_processing_init() 2477 // about how reference processing currently works in G1. 2478 2479 // Set the soft reference policy 2480 rp->setup_policy(clear_all_soft_refs); 2481 assert(_markStack.isEmpty(), "mark stack should be empty"); 2482 2483 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2484 // in serial reference processing. Note these closures are also 2485 // used for serially processing (by the the current thread) the 2486 // JNI references during parallel reference processing. 2487 // 2488 // These closures do not need to synchronize with the worker 2489 // threads involved in parallel reference processing as these 2490 // instances are executed serially by the current thread (e.g. 2491 // reference processing is not multi-threaded and is thus 2492 // performed by the current thread instead of a gang worker). 2493 // 2494 // The gang tasks involved in parallel reference processing create 2495 // their own instances of these closures, which do their own 2496 // synchronization among themselves. 2497 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2498 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2499 2500 // We need at least one active thread. If reference processing 2501 // is not multi-threaded we use the current (VMThread) thread, 2502 // otherwise we use the work gang from the G1CollectedHeap and 2503 // we utilize all the worker threads we can. 2504 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL; 2505 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2506 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2507 2508 // Parallel processing task executor. 2509 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2510 g1h->workers(), active_workers); 2511 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2512 2513 // Set the concurrency level. The phase was already set prior to 2514 // executing the remark task. 2515 set_concurrency(active_workers); 2516 2517 // Set the degree of MT processing here. If the discovery was done MT, 2518 // the number of threads involved during discovery could differ from 2519 // the number of active workers. This is OK as long as the discovered 2520 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2521 rp->set_active_mt_degree(active_workers); 2522 2523 // Process the weak references. 2524 const ReferenceProcessorStats& stats = 2525 rp->process_discovered_references(&g1_is_alive, 2526 &g1_keep_alive, 2527 &g1_drain_mark_stack, 2528 executor, 2529 g1h->gc_timer_cm()); 2530 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2531 2532 // The do_oop work routines of the keep_alive and drain_marking_stack 2533 // oop closures will set the has_overflown flag if we overflow the 2534 // global marking stack. 2535 2536 assert(_markStack.overflow() || _markStack.isEmpty(), 2537 "mark stack should be empty (unless it overflowed)"); 2538 2539 if (_markStack.overflow()) { 2540 // This should have been done already when we tried to push an 2541 // entry on to the global mark stack. But let's do it again. 2542 set_has_overflown(); 2543 } 2544 2545 assert(rp->num_q() == active_workers, "why not"); 2546 2547 rp->enqueue_discovered_references(executor); 2548 2549 rp->verify_no_references_recorded(); 2550 assert(!rp->discovery_enabled(), "Post condition"); 2551 } 2552 2553 if (has_overflown()) { 2554 // We can not trust g1_is_alive if the marking stack overflowed 2555 return; 2556 } 2557 2558 g1h->unlink_string_and_symbol_table(&g1_is_alive, 2559 /* process_strings */ false, // currently strings are always roots 2560 /* process_symbols */ true); 2561 } 2562 2563 void ConcurrentMark::swapMarkBitMaps() { 2564 CMBitMapRO* temp = _prevMarkBitMap; 2565 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2566 _nextMarkBitMap = (CMBitMap*) temp; 2567 } 2568 2569 class CMRemarkTask: public AbstractGangTask { 2570 private: 2571 ConcurrentMark* _cm; 2572 bool _is_serial; 2573 public: 2574 void work(uint worker_id) { 2575 // Since all available tasks are actually started, we should 2576 // only proceed if we're supposed to be active. 2577 if (worker_id < _cm->active_tasks()) { 2578 CMTask* task = _cm->task(worker_id); 2579 task->record_start_time(); 2580 do { 2581 task->do_marking_step(1000000000.0 /* something very large */, 2582 true /* do_termination */, 2583 _is_serial); 2584 } while (task->has_aborted() && !_cm->has_overflown()); 2585 // If we overflow, then we do not want to restart. We instead 2586 // want to abort remark and do concurrent marking again. 2587 task->record_end_time(); 2588 } 2589 } 2590 2591 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) : 2592 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) { 2593 _cm->terminator()->reset_for_reuse(active_workers); 2594 } 2595 }; 2596 2597 void ConcurrentMark::checkpointRootsFinalWork() { 2598 ResourceMark rm; 2599 HandleMark hm; 2600 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2601 2602 g1h->ensure_parsability(false); 2603 2604 if (G1CollectedHeap::use_parallel_gc_threads()) { 2605 G1CollectedHeap::StrongRootsScope srs(g1h); 2606 // this is remark, so we'll use up all active threads 2607 uint active_workers = g1h->workers()->active_workers(); 2608 if (active_workers == 0) { 2609 assert(active_workers > 0, "Should have been set earlier"); 2610 active_workers = (uint) ParallelGCThreads; 2611 g1h->workers()->set_active_workers(active_workers); 2612 } 2613 set_concurrency_and_phase(active_workers, false /* concurrent */); 2614 // Leave _parallel_marking_threads at it's 2615 // value originally calculated in the ConcurrentMark 2616 // constructor and pass values of the active workers 2617 // through the gang in the task. 2618 2619 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */); 2620 // We will start all available threads, even if we decide that the 2621 // active_workers will be fewer. The extra ones will just bail out 2622 // immediately. 2623 g1h->set_par_threads(active_workers); 2624 g1h->workers()->run_task(&remarkTask); 2625 g1h->set_par_threads(0); 2626 } else { 2627 G1CollectedHeap::StrongRootsScope srs(g1h); 2628 uint active_workers = 1; 2629 set_concurrency_and_phase(active_workers, false /* concurrent */); 2630 2631 // Note - if there's no work gang then the VMThread will be 2632 // the thread to execute the remark - serially. We have 2633 // to pass true for the is_serial parameter so that 2634 // CMTask::do_marking_step() doesn't enter the sync 2635 // barriers in the event of an overflow. Doing so will 2636 // cause an assert that the current thread is not a 2637 // concurrent GC thread. 2638 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/); 2639 remarkTask.work(0); 2640 } 2641 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2642 guarantee(has_overflown() || 2643 satb_mq_set.completed_buffers_num() == 0, 2644 err_msg("Invariant: has_overflown = %s, num buffers = %d", 2645 BOOL_TO_STR(has_overflown()), 2646 satb_mq_set.completed_buffers_num())); 2647 2648 print_stats(); 2649 } 2650 2651 #ifndef PRODUCT 2652 2653 class PrintReachableOopClosure: public OopClosure { 2654 private: 2655 G1CollectedHeap* _g1h; 2656 outputStream* _out; 2657 VerifyOption _vo; 2658 bool _all; 2659 2660 public: 2661 PrintReachableOopClosure(outputStream* out, 2662 VerifyOption vo, 2663 bool all) : 2664 _g1h(G1CollectedHeap::heap()), 2665 _out(out), _vo(vo), _all(all) { } 2666 2667 void do_oop(narrowOop* p) { do_oop_work(p); } 2668 void do_oop( oop* p) { do_oop_work(p); } 2669 2670 template <class T> void do_oop_work(T* p) { 2671 oop obj = oopDesc::load_decode_heap_oop(p); 2672 const char* str = NULL; 2673 const char* str2 = ""; 2674 2675 if (obj == NULL) { 2676 str = ""; 2677 } else if (!_g1h->is_in_g1_reserved(obj)) { 2678 str = " O"; 2679 } else { 2680 HeapRegion* hr = _g1h->heap_region_containing(obj); 2681 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2682 bool marked = _g1h->is_marked(obj, _vo); 2683 2684 if (over_tams) { 2685 str = " >"; 2686 if (marked) { 2687 str2 = " AND MARKED"; 2688 } 2689 } else if (marked) { 2690 str = " M"; 2691 } else { 2692 str = " NOT"; 2693 } 2694 } 2695 2696 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2697 p2i(p), p2i((void*) obj), str, str2); 2698 } 2699 }; 2700 2701 class PrintReachableObjectClosure : public ObjectClosure { 2702 private: 2703 G1CollectedHeap* _g1h; 2704 outputStream* _out; 2705 VerifyOption _vo; 2706 bool _all; 2707 HeapRegion* _hr; 2708 2709 public: 2710 PrintReachableObjectClosure(outputStream* out, 2711 VerifyOption vo, 2712 bool all, 2713 HeapRegion* hr) : 2714 _g1h(G1CollectedHeap::heap()), 2715 _out(out), _vo(vo), _all(all), _hr(hr) { } 2716 2717 void do_object(oop o) { 2718 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2719 bool marked = _g1h->is_marked(o, _vo); 2720 bool print_it = _all || over_tams || marked; 2721 2722 if (print_it) { 2723 _out->print_cr(" "PTR_FORMAT"%s", 2724 p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : ""); 2725 PrintReachableOopClosure oopCl(_out, _vo, _all); 2726 o->oop_iterate_no_header(&oopCl); 2727 } 2728 } 2729 }; 2730 2731 class PrintReachableRegionClosure : public HeapRegionClosure { 2732 private: 2733 G1CollectedHeap* _g1h; 2734 outputStream* _out; 2735 VerifyOption _vo; 2736 bool _all; 2737 2738 public: 2739 bool doHeapRegion(HeapRegion* hr) { 2740 HeapWord* b = hr->bottom(); 2741 HeapWord* e = hr->end(); 2742 HeapWord* t = hr->top(); 2743 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2744 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2745 "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p)); 2746 _out->cr(); 2747 2748 HeapWord* from = b; 2749 HeapWord* to = t; 2750 2751 if (to > from) { 2752 _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to)); 2753 _out->cr(); 2754 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2755 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2756 _out->cr(); 2757 } 2758 2759 return false; 2760 } 2761 2762 PrintReachableRegionClosure(outputStream* out, 2763 VerifyOption vo, 2764 bool all) : 2765 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2766 }; 2767 2768 void ConcurrentMark::print_reachable(const char* str, 2769 VerifyOption vo, 2770 bool all) { 2771 gclog_or_tty->cr(); 2772 gclog_or_tty->print_cr("== Doing heap dump... "); 2773 2774 if (G1PrintReachableBaseFile == NULL) { 2775 gclog_or_tty->print_cr(" #### error: no base file defined"); 2776 return; 2777 } 2778 2779 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2780 (JVM_MAXPATHLEN - 1)) { 2781 gclog_or_tty->print_cr(" #### error: file name too long"); 2782 return; 2783 } 2784 2785 char file_name[JVM_MAXPATHLEN]; 2786 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2787 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2788 2789 fileStream fout(file_name); 2790 if (!fout.is_open()) { 2791 gclog_or_tty->print_cr(" #### error: could not open file"); 2792 return; 2793 } 2794 2795 outputStream* out = &fout; 2796 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2797 out->cr(); 2798 2799 out->print_cr("--- ITERATING OVER REGIONS"); 2800 out->cr(); 2801 PrintReachableRegionClosure rcl(out, vo, all); 2802 _g1h->heap_region_iterate(&rcl); 2803 out->cr(); 2804 2805 gclog_or_tty->print_cr(" done"); 2806 gclog_or_tty->flush(); 2807 } 2808 2809 #endif // PRODUCT 2810 2811 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2812 // Note we are overriding the read-only view of the prev map here, via 2813 // the cast. 2814 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2815 } 2816 2817 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2818 _nextMarkBitMap->clearRange(mr); 2819 } 2820 2821 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2822 clearRangePrevBitmap(mr); 2823 clearRangeNextBitmap(mr); 2824 } 2825 2826 HeapRegion* 2827 ConcurrentMark::claim_region(uint worker_id) { 2828 // "checkpoint" the finger 2829 HeapWord* finger = _finger; 2830 2831 // _heap_end will not change underneath our feet; it only changes at 2832 // yield points. 2833 while (finger < _heap_end) { 2834 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2835 2836 // Note on how this code handles humongous regions. In the 2837 // normal case the finger will reach the start of a "starts 2838 // humongous" (SH) region. Its end will either be the end of the 2839 // last "continues humongous" (CH) region in the sequence, or the 2840 // standard end of the SH region (if the SH is the only region in 2841 // the sequence). That way claim_region() will skip over the CH 2842 // regions. However, there is a subtle race between a CM thread 2843 // executing this method and a mutator thread doing a humongous 2844 // object allocation. The two are not mutually exclusive as the CM 2845 // thread does not need to hold the Heap_lock when it gets 2846 // here. So there is a chance that claim_region() will come across 2847 // a free region that's in the progress of becoming a SH or a CH 2848 // region. In the former case, it will either 2849 // a) Miss the update to the region's end, in which case it will 2850 // visit every subsequent CH region, will find their bitmaps 2851 // empty, and do nothing, or 2852 // b) Will observe the update of the region's end (in which case 2853 // it will skip the subsequent CH regions). 2854 // If it comes across a region that suddenly becomes CH, the 2855 // scenario will be similar to b). So, the race between 2856 // claim_region() and a humongous object allocation might force us 2857 // to do a bit of unnecessary work (due to some unnecessary bitmap 2858 // iterations) but it should not introduce and correctness issues. 2859 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2860 HeapWord* bottom = curr_region->bottom(); 2861 HeapWord* end = curr_region->end(); 2862 HeapWord* limit = curr_region->next_top_at_mark_start(); 2863 2864 if (verbose_low()) { 2865 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" " 2866 "["PTR_FORMAT", "PTR_FORMAT"), " 2867 "limit = "PTR_FORMAT, 2868 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit)); 2869 } 2870 2871 // Is the gap between reading the finger and doing the CAS too long? 2872 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2873 if (res == finger) { 2874 // we succeeded 2875 2876 // notice that _finger == end cannot be guaranteed here since, 2877 // someone else might have moved the finger even further 2878 assert(_finger >= end, "the finger should have moved forward"); 2879 2880 if (verbose_low()) { 2881 gclog_or_tty->print_cr("[%u] we were successful with region = " 2882 PTR_FORMAT, worker_id, p2i(curr_region)); 2883 } 2884 2885 if (limit > bottom) { 2886 if (verbose_low()) { 2887 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, " 2888 "returning it ", worker_id, p2i(curr_region)); 2889 } 2890 return curr_region; 2891 } else { 2892 assert(limit == bottom, 2893 "the region limit should be at bottom"); 2894 if (verbose_low()) { 2895 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, " 2896 "returning NULL", worker_id, p2i(curr_region)); 2897 } 2898 // we return NULL and the caller should try calling 2899 // claim_region() again. 2900 return NULL; 2901 } 2902 } else { 2903 assert(_finger > finger, "the finger should have moved forward"); 2904 if (verbose_low()) { 2905 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 2906 "global finger = "PTR_FORMAT", " 2907 "our finger = "PTR_FORMAT, 2908 worker_id, p2i(_finger), p2i(finger)); 2909 } 2910 2911 // read it again 2912 finger = _finger; 2913 } 2914 } 2915 2916 return NULL; 2917 } 2918 2919 #ifndef PRODUCT 2920 enum VerifyNoCSetOopsPhase { 2921 VerifyNoCSetOopsStack, 2922 VerifyNoCSetOopsQueues, 2923 VerifyNoCSetOopsSATBCompleted, 2924 VerifyNoCSetOopsSATBThread 2925 }; 2926 2927 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2928 private: 2929 G1CollectedHeap* _g1h; 2930 VerifyNoCSetOopsPhase _phase; 2931 int _info; 2932 2933 const char* phase_str() { 2934 switch (_phase) { 2935 case VerifyNoCSetOopsStack: return "Stack"; 2936 case VerifyNoCSetOopsQueues: return "Queue"; 2937 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2938 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2939 default: ShouldNotReachHere(); 2940 } 2941 return NULL; 2942 } 2943 2944 void do_object_work(oop obj) { 2945 guarantee(!_g1h->obj_in_cs(obj), 2946 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2947 p2i((void*) obj), phase_str(), _info)); 2948 } 2949 2950 public: 2951 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2952 2953 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2954 _phase = phase; 2955 _info = info; 2956 } 2957 2958 virtual void do_oop(oop* p) { 2959 oop obj = oopDesc::load_decode_heap_oop(p); 2960 do_object_work(obj); 2961 } 2962 2963 virtual void do_oop(narrowOop* p) { 2964 // We should not come across narrow oops while scanning marking 2965 // stacks and SATB buffers. 2966 ShouldNotReachHere(); 2967 } 2968 2969 virtual void do_object(oop obj) { 2970 do_object_work(obj); 2971 } 2972 }; 2973 2974 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2975 bool verify_enqueued_buffers, 2976 bool verify_thread_buffers, 2977 bool verify_fingers) { 2978 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2979 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2980 return; 2981 } 2982 2983 VerifyNoCSetOopsClosure cl; 2984 2985 if (verify_stacks) { 2986 // Verify entries on the global mark stack 2987 cl.set_phase(VerifyNoCSetOopsStack); 2988 _markStack.oops_do(&cl); 2989 2990 // Verify entries on the task queues 2991 for (uint i = 0; i < _max_worker_id; i += 1) { 2992 cl.set_phase(VerifyNoCSetOopsQueues, i); 2993 CMTaskQueue* queue = _task_queues->queue(i); 2994 queue->oops_do(&cl); 2995 } 2996 } 2997 2998 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2999 3000 // Verify entries on the enqueued SATB buffers 3001 if (verify_enqueued_buffers) { 3002 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 3003 satb_qs.iterate_completed_buffers_read_only(&cl); 3004 } 3005 3006 // Verify entries on the per-thread SATB buffers 3007 if (verify_thread_buffers) { 3008 cl.set_phase(VerifyNoCSetOopsSATBThread); 3009 satb_qs.iterate_thread_buffers_read_only(&cl); 3010 } 3011 3012 if (verify_fingers) { 3013 // Verify the global finger 3014 HeapWord* global_finger = finger(); 3015 if (global_finger != NULL && global_finger < _heap_end) { 3016 // The global finger always points to a heap region boundary. We 3017 // use heap_region_containing_raw() to get the containing region 3018 // given that the global finger could be pointing to a free region 3019 // which subsequently becomes continues humongous. If that 3020 // happens, heap_region_containing() will return the bottom of the 3021 // corresponding starts humongous region and the check below will 3022 // not hold any more. 3023 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 3024 guarantee(global_finger == global_hr->bottom(), 3025 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 3026 p2i(global_finger), HR_FORMAT_PARAMS(global_hr))); 3027 } 3028 3029 // Verify the task fingers 3030 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 3031 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 3032 CMTask* task = _tasks[i]; 3033 HeapWord* task_finger = task->finger(); 3034 if (task_finger != NULL && task_finger < _heap_end) { 3035 // See above note on the global finger verification. 3036 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 3037 guarantee(task_finger == task_hr->bottom() || 3038 !task_hr->in_collection_set(), 3039 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 3040 p2i(task_finger), HR_FORMAT_PARAMS(task_hr))); 3041 } 3042 } 3043 } 3044 } 3045 #endif // PRODUCT 3046 3047 // Aggregate the counting data that was constructed concurrently 3048 // with marking. 3049 class AggregateCountDataHRClosure: public HeapRegionClosure { 3050 G1CollectedHeap* _g1h; 3051 ConcurrentMark* _cm; 3052 CardTableModRefBS* _ct_bs; 3053 BitMap* _cm_card_bm; 3054 uint _max_worker_id; 3055 3056 public: 3057 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 3058 BitMap* cm_card_bm, 3059 uint max_worker_id) : 3060 _g1h(g1h), _cm(g1h->concurrent_mark()), 3061 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 3062 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 3063 3064 bool doHeapRegion(HeapRegion* hr) { 3065 if (hr->continuesHumongous()) { 3066 // We will ignore these here and process them when their 3067 // associated "starts humongous" region is processed. 3068 // Note that we cannot rely on their associated 3069 // "starts humongous" region to have their bit set to 1 3070 // since, due to the region chunking in the parallel region 3071 // iteration, a "continues humongous" region might be visited 3072 // before its associated "starts humongous". 3073 return false; 3074 } 3075 3076 HeapWord* start = hr->bottom(); 3077 HeapWord* limit = hr->next_top_at_mark_start(); 3078 HeapWord* end = hr->end(); 3079 3080 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 3081 err_msg("Preconditions not met - " 3082 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 3083 "top: "PTR_FORMAT", end: "PTR_FORMAT, 3084 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()))); 3085 3086 assert(hr->next_marked_bytes() == 0, "Precondition"); 3087 3088 if (start == limit) { 3089 // NTAMS of this region has not been set so nothing to do. 3090 return false; 3091 } 3092 3093 // 'start' should be in the heap. 3094 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 3095 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 3096 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 3097 3098 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 3099 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 3100 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 3101 3102 // If ntams is not card aligned then we bump card bitmap index 3103 // for limit so that we get the all the cards spanned by 3104 // the object ending at ntams. 3105 // Note: if this is the last region in the heap then ntams 3106 // could be actually just beyond the end of the the heap; 3107 // limit_idx will then correspond to a (non-existent) card 3108 // that is also outside the heap. 3109 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 3110 limit_idx += 1; 3111 } 3112 3113 assert(limit_idx <= end_idx, "or else use atomics"); 3114 3115 // Aggregate the "stripe" in the count data associated with hr. 3116 uint hrs_index = hr->hrs_index(); 3117 size_t marked_bytes = 0; 3118 3119 for (uint i = 0; i < _max_worker_id; i += 1) { 3120 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 3121 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 3122 3123 // Fetch the marked_bytes in this region for task i and 3124 // add it to the running total for this region. 3125 marked_bytes += marked_bytes_array[hrs_index]; 3126 3127 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 3128 // into the global card bitmap. 3129 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 3130 3131 while (scan_idx < limit_idx) { 3132 assert(task_card_bm->at(scan_idx) == true, "should be"); 3133 _cm_card_bm->set_bit(scan_idx); 3134 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 3135 3136 // BitMap::get_next_one_offset() can handle the case when 3137 // its left_offset parameter is greater than its right_offset 3138 // parameter. It does, however, have an early exit if 3139 // left_offset == right_offset. So let's limit the value 3140 // passed in for left offset here. 3141 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 3142 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 3143 } 3144 } 3145 3146 // Update the marked bytes for this region. 3147 hr->add_to_marked_bytes(marked_bytes); 3148 3149 // Next heap region 3150 return false; 3151 } 3152 }; 3153 3154 class G1AggregateCountDataTask: public AbstractGangTask { 3155 protected: 3156 G1CollectedHeap* _g1h; 3157 ConcurrentMark* _cm; 3158 BitMap* _cm_card_bm; 3159 uint _max_worker_id; 3160 int _active_workers; 3161 3162 public: 3163 G1AggregateCountDataTask(G1CollectedHeap* g1h, 3164 ConcurrentMark* cm, 3165 BitMap* cm_card_bm, 3166 uint max_worker_id, 3167 int n_workers) : 3168 AbstractGangTask("Count Aggregation"), 3169 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 3170 _max_worker_id(max_worker_id), 3171 _active_workers(n_workers) { } 3172 3173 void work(uint worker_id) { 3174 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 3175 3176 if (G1CollectedHeap::use_parallel_gc_threads()) { 3177 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 3178 _active_workers, 3179 HeapRegion::AggregateCountClaimValue); 3180 } else { 3181 _g1h->heap_region_iterate(&cl); 3182 } 3183 } 3184 }; 3185 3186 3187 void ConcurrentMark::aggregate_count_data() { 3188 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3189 _g1h->workers()->active_workers() : 3190 1); 3191 3192 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3193 _max_worker_id, n_workers); 3194 3195 if (G1CollectedHeap::use_parallel_gc_threads()) { 3196 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3197 "sanity check"); 3198 _g1h->set_par_threads(n_workers); 3199 _g1h->workers()->run_task(&g1_par_agg_task); 3200 _g1h->set_par_threads(0); 3201 3202 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3203 "sanity check"); 3204 _g1h->reset_heap_region_claim_values(); 3205 } else { 3206 g1_par_agg_task.work(0); 3207 } 3208 } 3209 3210 // Clear the per-worker arrays used to store the per-region counting data 3211 void ConcurrentMark::clear_all_count_data() { 3212 // Clear the global card bitmap - it will be filled during 3213 // liveness count aggregation (during remark) and the 3214 // final counting task. 3215 _card_bm.clear(); 3216 3217 // Clear the global region bitmap - it will be filled as part 3218 // of the final counting task. 3219 _region_bm.clear(); 3220 3221 uint max_regions = _g1h->max_regions(); 3222 assert(_max_worker_id > 0, "uninitialized"); 3223 3224 for (uint i = 0; i < _max_worker_id; i += 1) { 3225 BitMap* task_card_bm = count_card_bitmap_for(i); 3226 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3227 3228 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3229 assert(marked_bytes_array != NULL, "uninitialized"); 3230 3231 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3232 task_card_bm->clear(); 3233 } 3234 } 3235 3236 void ConcurrentMark::print_stats() { 3237 if (verbose_stats()) { 3238 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3239 for (size_t i = 0; i < _active_tasks; ++i) { 3240 _tasks[i]->print_stats(); 3241 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3242 } 3243 } 3244 } 3245 3246 // abandon current marking iteration due to a Full GC 3247 void ConcurrentMark::abort() { 3248 // Clear all marks to force marking thread to do nothing 3249 _nextMarkBitMap->clearAll(); 3250 3251 // Note we cannot clear the previous marking bitmap here 3252 // since VerifyDuringGC verifies the objects marked during 3253 // a full GC against the previous bitmap. 3254 3255 // Clear the liveness counting data 3256 clear_all_count_data(); 3257 // Empty mark stack 3258 reset_marking_state(); 3259 for (uint i = 0; i < _max_worker_id; ++i) { 3260 _tasks[i]->clear_region_fields(); 3261 } 3262 _first_overflow_barrier_sync.abort(); 3263 _second_overflow_barrier_sync.abort(); 3264 _has_aborted = true; 3265 3266 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3267 satb_mq_set.abandon_partial_marking(); 3268 // This can be called either during or outside marking, we'll read 3269 // the expected_active value from the SATB queue set. 3270 satb_mq_set.set_active_all_threads( 3271 false, /* new active value */ 3272 satb_mq_set.is_active() /* expected_active */); 3273 3274 _g1h->trace_heap_after_concurrent_cycle(); 3275 _g1h->register_concurrent_cycle_end(); 3276 } 3277 3278 static void print_ms_time_info(const char* prefix, const char* name, 3279 NumberSeq& ns) { 3280 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3281 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3282 if (ns.num() > 0) { 3283 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3284 prefix, ns.sd(), ns.maximum()); 3285 } 3286 } 3287 3288 void ConcurrentMark::print_summary_info() { 3289 gclog_or_tty->print_cr(" Concurrent marking:"); 3290 print_ms_time_info(" ", "init marks", _init_times); 3291 print_ms_time_info(" ", "remarks", _remark_times); 3292 { 3293 print_ms_time_info(" ", "final marks", _remark_mark_times); 3294 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3295 3296 } 3297 print_ms_time_info(" ", "cleanups", _cleanup_times); 3298 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3299 _total_counting_time, 3300 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3301 (double)_cleanup_times.num() 3302 : 0.0)); 3303 if (G1ScrubRemSets) { 3304 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3305 _total_rs_scrub_time, 3306 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3307 (double)_cleanup_times.num() 3308 : 0.0)); 3309 } 3310 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3311 (_init_times.sum() + _remark_times.sum() + 3312 _cleanup_times.sum())/1000.0); 3313 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3314 "(%8.2f s marking).", 3315 cmThread()->vtime_accum(), 3316 cmThread()->vtime_mark_accum()); 3317 } 3318 3319 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3320 if (use_parallel_marking_threads()) { 3321 _parallel_workers->print_worker_threads_on(st); 3322 } 3323 } 3324 3325 void ConcurrentMark::print_on_error(outputStream* st) const { 3326 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 3327 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 3328 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 3329 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 3330 } 3331 3332 // We take a break if someone is trying to stop the world. 3333 bool ConcurrentMark::do_yield_check(uint worker_id) { 3334 if (SuspendibleThreadSet::should_yield()) { 3335 if (worker_id == 0) { 3336 _g1h->g1_policy()->record_concurrent_pause(); 3337 } 3338 SuspendibleThreadSet::yield(); 3339 return true; 3340 } else { 3341 return false; 3342 } 3343 } 3344 3345 bool ConcurrentMark::containing_card_is_marked(void* p) { 3346 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3347 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3348 } 3349 3350 bool ConcurrentMark::containing_cards_are_marked(void* start, 3351 void* last) { 3352 return containing_card_is_marked(start) && 3353 containing_card_is_marked(last); 3354 } 3355 3356 #ifndef PRODUCT 3357 // for debugging purposes 3358 void ConcurrentMark::print_finger() { 3359 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3360 p2i(_heap_start), p2i(_heap_end), p2i(_finger)); 3361 for (uint i = 0; i < _max_worker_id; ++i) { 3362 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger())); 3363 } 3364 gclog_or_tty->cr(); 3365 } 3366 #endif 3367 3368 void CMTask::scan_object(oop obj) { 3369 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3370 3371 if (_cm->verbose_high()) { 3372 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, 3373 _worker_id, p2i((void*) obj)); 3374 } 3375 3376 size_t obj_size = obj->size(); 3377 _words_scanned += obj_size; 3378 3379 obj->oop_iterate(_cm_oop_closure); 3380 statsOnly( ++_objs_scanned ); 3381 check_limits(); 3382 } 3383 3384 // Closure for iteration over bitmaps 3385 class CMBitMapClosure : public BitMapClosure { 3386 private: 3387 // the bitmap that is being iterated over 3388 CMBitMap* _nextMarkBitMap; 3389 ConcurrentMark* _cm; 3390 CMTask* _task; 3391 3392 public: 3393 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3394 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3395 3396 bool do_bit(size_t offset) { 3397 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3398 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3399 assert( addr < _cm->finger(), "invariant"); 3400 3401 statsOnly( _task->increase_objs_found_on_bitmap() ); 3402 assert(addr >= _task->finger(), "invariant"); 3403 3404 // We move that task's local finger along. 3405 _task->move_finger_to(addr); 3406 3407 _task->scan_object(oop(addr)); 3408 // we only partially drain the local queue and global stack 3409 _task->drain_local_queue(true); 3410 _task->drain_global_stack(true); 3411 3412 // if the has_aborted flag has been raised, we need to bail out of 3413 // the iteration 3414 return !_task->has_aborted(); 3415 } 3416 }; 3417 3418 // Closure for iterating over objects, currently only used for 3419 // processing SATB buffers. 3420 class CMObjectClosure : public ObjectClosure { 3421 private: 3422 CMTask* _task; 3423 3424 public: 3425 void do_object(oop obj) { 3426 _task->deal_with_reference(obj); 3427 } 3428 3429 CMObjectClosure(CMTask* task) : _task(task) { } 3430 }; 3431 3432 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3433 ConcurrentMark* cm, 3434 CMTask* task) 3435 : _g1h(g1h), _cm(cm), _task(task) { 3436 assert(_ref_processor == NULL, "should be initialized to NULL"); 3437 3438 if (G1UseConcMarkReferenceProcessing) { 3439 _ref_processor = g1h->ref_processor_cm(); 3440 assert(_ref_processor != NULL, "should not be NULL"); 3441 } 3442 } 3443 3444 void CMTask::setup_for_region(HeapRegion* hr) { 3445 assert(hr != NULL, 3446 "claim_region() should have filtered out NULL regions"); 3447 assert(!hr->continuesHumongous(), 3448 "claim_region() should have filtered out continues humongous regions"); 3449 3450 if (_cm->verbose_low()) { 3451 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT, 3452 _worker_id, p2i(hr)); 3453 } 3454 3455 _curr_region = hr; 3456 _finger = hr->bottom(); 3457 update_region_limit(); 3458 } 3459 3460 void CMTask::update_region_limit() { 3461 HeapRegion* hr = _curr_region; 3462 HeapWord* bottom = hr->bottom(); 3463 HeapWord* limit = hr->next_top_at_mark_start(); 3464 3465 if (limit == bottom) { 3466 if (_cm->verbose_low()) { 3467 gclog_or_tty->print_cr("[%u] found an empty region " 3468 "["PTR_FORMAT", "PTR_FORMAT")", 3469 _worker_id, p2i(bottom), p2i(limit)); 3470 } 3471 // The region was collected underneath our feet. 3472 // We set the finger to bottom to ensure that the bitmap 3473 // iteration that will follow this will not do anything. 3474 // (this is not a condition that holds when we set the region up, 3475 // as the region is not supposed to be empty in the first place) 3476 _finger = bottom; 3477 } else if (limit >= _region_limit) { 3478 assert(limit >= _finger, "peace of mind"); 3479 } else { 3480 assert(limit < _region_limit, "only way to get here"); 3481 // This can happen under some pretty unusual circumstances. An 3482 // evacuation pause empties the region underneath our feet (NTAMS 3483 // at bottom). We then do some allocation in the region (NTAMS 3484 // stays at bottom), followed by the region being used as a GC 3485 // alloc region (NTAMS will move to top() and the objects 3486 // originally below it will be grayed). All objects now marked in 3487 // the region are explicitly grayed, if below the global finger, 3488 // and we do not need in fact to scan anything else. So, we simply 3489 // set _finger to be limit to ensure that the bitmap iteration 3490 // doesn't do anything. 3491 _finger = limit; 3492 } 3493 3494 _region_limit = limit; 3495 } 3496 3497 void CMTask::giveup_current_region() { 3498 assert(_curr_region != NULL, "invariant"); 3499 if (_cm->verbose_low()) { 3500 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT, 3501 _worker_id, p2i(_curr_region)); 3502 } 3503 clear_region_fields(); 3504 } 3505 3506 void CMTask::clear_region_fields() { 3507 // Values for these three fields that indicate that we're not 3508 // holding on to a region. 3509 _curr_region = NULL; 3510 _finger = NULL; 3511 _region_limit = NULL; 3512 } 3513 3514 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3515 if (cm_oop_closure == NULL) { 3516 assert(_cm_oop_closure != NULL, "invariant"); 3517 } else { 3518 assert(_cm_oop_closure == NULL, "invariant"); 3519 } 3520 _cm_oop_closure = cm_oop_closure; 3521 } 3522 3523 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3524 guarantee(nextMarkBitMap != NULL, "invariant"); 3525 3526 if (_cm->verbose_low()) { 3527 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3528 } 3529 3530 _nextMarkBitMap = nextMarkBitMap; 3531 clear_region_fields(); 3532 3533 _calls = 0; 3534 _elapsed_time_ms = 0.0; 3535 _termination_time_ms = 0.0; 3536 _termination_start_time_ms = 0.0; 3537 3538 #if _MARKING_STATS_ 3539 _local_pushes = 0; 3540 _local_pops = 0; 3541 _local_max_size = 0; 3542 _objs_scanned = 0; 3543 _global_pushes = 0; 3544 _global_pops = 0; 3545 _global_max_size = 0; 3546 _global_transfers_to = 0; 3547 _global_transfers_from = 0; 3548 _regions_claimed = 0; 3549 _objs_found_on_bitmap = 0; 3550 _satb_buffers_processed = 0; 3551 _steal_attempts = 0; 3552 _steals = 0; 3553 _aborted = 0; 3554 _aborted_overflow = 0; 3555 _aborted_cm_aborted = 0; 3556 _aborted_yield = 0; 3557 _aborted_timed_out = 0; 3558 _aborted_satb = 0; 3559 _aborted_termination = 0; 3560 #endif // _MARKING_STATS_ 3561 } 3562 3563 bool CMTask::should_exit_termination() { 3564 regular_clock_call(); 3565 // This is called when we are in the termination protocol. We should 3566 // quit if, for some reason, this task wants to abort or the global 3567 // stack is not empty (this means that we can get work from it). 3568 return !_cm->mark_stack_empty() || has_aborted(); 3569 } 3570 3571 void CMTask::reached_limit() { 3572 assert(_words_scanned >= _words_scanned_limit || 3573 _refs_reached >= _refs_reached_limit , 3574 "shouldn't have been called otherwise"); 3575 regular_clock_call(); 3576 } 3577 3578 void CMTask::regular_clock_call() { 3579 if (has_aborted()) return; 3580 3581 // First, we need to recalculate the words scanned and refs reached 3582 // limits for the next clock call. 3583 recalculate_limits(); 3584 3585 // During the regular clock call we do the following 3586 3587 // (1) If an overflow has been flagged, then we abort. 3588 if (_cm->has_overflown()) { 3589 set_has_aborted(); 3590 return; 3591 } 3592 3593 // If we are not concurrent (i.e. we're doing remark) we don't need 3594 // to check anything else. The other steps are only needed during 3595 // the concurrent marking phase. 3596 if (!concurrent()) return; 3597 3598 // (2) If marking has been aborted for Full GC, then we also abort. 3599 if (_cm->has_aborted()) { 3600 set_has_aborted(); 3601 statsOnly( ++_aborted_cm_aborted ); 3602 return; 3603 } 3604 3605 double curr_time_ms = os::elapsedVTime() * 1000.0; 3606 3607 // (3) If marking stats are enabled, then we update the step history. 3608 #if _MARKING_STATS_ 3609 if (_words_scanned >= _words_scanned_limit) { 3610 ++_clock_due_to_scanning; 3611 } 3612 if (_refs_reached >= _refs_reached_limit) { 3613 ++_clock_due_to_marking; 3614 } 3615 3616 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3617 _interval_start_time_ms = curr_time_ms; 3618 _all_clock_intervals_ms.add(last_interval_ms); 3619 3620 if (_cm->verbose_medium()) { 3621 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3622 "scanned = %d%s, refs reached = %d%s", 3623 _worker_id, last_interval_ms, 3624 _words_scanned, 3625 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3626 _refs_reached, 3627 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3628 } 3629 #endif // _MARKING_STATS_ 3630 3631 // (4) We check whether we should yield. If we have to, then we abort. 3632 if (SuspendibleThreadSet::should_yield()) { 3633 // We should yield. To do this we abort the task. The caller is 3634 // responsible for yielding. 3635 set_has_aborted(); 3636 statsOnly( ++_aborted_yield ); 3637 return; 3638 } 3639 3640 // (5) We check whether we've reached our time quota. If we have, 3641 // then we abort. 3642 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3643 if (elapsed_time_ms > _time_target_ms) { 3644 set_has_aborted(); 3645 _has_timed_out = true; 3646 statsOnly( ++_aborted_timed_out ); 3647 return; 3648 } 3649 3650 // (6) Finally, we check whether there are enough completed STAB 3651 // buffers available for processing. If there are, we abort. 3652 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3653 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3654 if (_cm->verbose_low()) { 3655 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3656 _worker_id); 3657 } 3658 // we do need to process SATB buffers, we'll abort and restart 3659 // the marking task to do so 3660 set_has_aborted(); 3661 statsOnly( ++_aborted_satb ); 3662 return; 3663 } 3664 } 3665 3666 void CMTask::recalculate_limits() { 3667 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3668 _words_scanned_limit = _real_words_scanned_limit; 3669 3670 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3671 _refs_reached_limit = _real_refs_reached_limit; 3672 } 3673 3674 void CMTask::decrease_limits() { 3675 // This is called when we believe that we're going to do an infrequent 3676 // operation which will increase the per byte scanned cost (i.e. move 3677 // entries to/from the global stack). It basically tries to decrease the 3678 // scanning limit so that the clock is called earlier. 3679 3680 if (_cm->verbose_medium()) { 3681 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3682 } 3683 3684 _words_scanned_limit = _real_words_scanned_limit - 3685 3 * words_scanned_period / 4; 3686 _refs_reached_limit = _real_refs_reached_limit - 3687 3 * refs_reached_period / 4; 3688 } 3689 3690 void CMTask::move_entries_to_global_stack() { 3691 // local array where we'll store the entries that will be popped 3692 // from the local queue 3693 oop buffer[global_stack_transfer_size]; 3694 3695 int n = 0; 3696 oop obj; 3697 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3698 buffer[n] = obj; 3699 ++n; 3700 } 3701 3702 if (n > 0) { 3703 // we popped at least one entry from the local queue 3704 3705 statsOnly( ++_global_transfers_to; _local_pops += n ); 3706 3707 if (!_cm->mark_stack_push(buffer, n)) { 3708 if (_cm->verbose_low()) { 3709 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3710 _worker_id); 3711 } 3712 set_has_aborted(); 3713 } else { 3714 // the transfer was successful 3715 3716 if (_cm->verbose_medium()) { 3717 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3718 _worker_id, n); 3719 } 3720 statsOnly( int tmp_size = _cm->mark_stack_size(); 3721 if (tmp_size > _global_max_size) { 3722 _global_max_size = tmp_size; 3723 } 3724 _global_pushes += n ); 3725 } 3726 } 3727 3728 // this operation was quite expensive, so decrease the limits 3729 decrease_limits(); 3730 } 3731 3732 void CMTask::get_entries_from_global_stack() { 3733 // local array where we'll store the entries that will be popped 3734 // from the global stack. 3735 oop buffer[global_stack_transfer_size]; 3736 int n; 3737 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3738 assert(n <= global_stack_transfer_size, 3739 "we should not pop more than the given limit"); 3740 if (n > 0) { 3741 // yes, we did actually pop at least one entry 3742 3743 statsOnly( ++_global_transfers_from; _global_pops += n ); 3744 if (_cm->verbose_medium()) { 3745 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3746 _worker_id, n); 3747 } 3748 for (int i = 0; i < n; ++i) { 3749 bool success = _task_queue->push(buffer[i]); 3750 // We only call this when the local queue is empty or under a 3751 // given target limit. So, we do not expect this push to fail. 3752 assert(success, "invariant"); 3753 } 3754 3755 statsOnly( int tmp_size = _task_queue->size(); 3756 if (tmp_size > _local_max_size) { 3757 _local_max_size = tmp_size; 3758 } 3759 _local_pushes += n ); 3760 } 3761 3762 // this operation was quite expensive, so decrease the limits 3763 decrease_limits(); 3764 } 3765 3766 void CMTask::drain_local_queue(bool partially) { 3767 if (has_aborted()) return; 3768 3769 // Decide what the target size is, depending whether we're going to 3770 // drain it partially (so that other tasks can steal if they run out 3771 // of things to do) or totally (at the very end). 3772 size_t target_size; 3773 if (partially) { 3774 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3775 } else { 3776 target_size = 0; 3777 } 3778 3779 if (_task_queue->size() > target_size) { 3780 if (_cm->verbose_high()) { 3781 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT, 3782 _worker_id, target_size); 3783 } 3784 3785 oop obj; 3786 bool ret = _task_queue->pop_local(obj); 3787 while (ret) { 3788 statsOnly( ++_local_pops ); 3789 3790 if (_cm->verbose_high()) { 3791 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id, 3792 p2i((void*) obj)); 3793 } 3794 3795 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3796 assert(!_g1h->is_on_master_free_list( 3797 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3798 3799 scan_object(obj); 3800 3801 if (_task_queue->size() <= target_size || has_aborted()) { 3802 ret = false; 3803 } else { 3804 ret = _task_queue->pop_local(obj); 3805 } 3806 } 3807 3808 if (_cm->verbose_high()) { 3809 gclog_or_tty->print_cr("[%u] drained local queue, size = %u", 3810 _worker_id, _task_queue->size()); 3811 } 3812 } 3813 } 3814 3815 void CMTask::drain_global_stack(bool partially) { 3816 if (has_aborted()) return; 3817 3818 // We have a policy to drain the local queue before we attempt to 3819 // drain the global stack. 3820 assert(partially || _task_queue->size() == 0, "invariant"); 3821 3822 // Decide what the target size is, depending whether we're going to 3823 // drain it partially (so that other tasks can steal if they run out 3824 // of things to do) or totally (at the very end). Notice that, 3825 // because we move entries from the global stack in chunks or 3826 // because another task might be doing the same, we might in fact 3827 // drop below the target. But, this is not a problem. 3828 size_t target_size; 3829 if (partially) { 3830 target_size = _cm->partial_mark_stack_size_target(); 3831 } else { 3832 target_size = 0; 3833 } 3834 3835 if (_cm->mark_stack_size() > target_size) { 3836 if (_cm->verbose_low()) { 3837 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT, 3838 _worker_id, target_size); 3839 } 3840 3841 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3842 get_entries_from_global_stack(); 3843 drain_local_queue(partially); 3844 } 3845 3846 if (_cm->verbose_low()) { 3847 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT, 3848 _worker_id, _cm->mark_stack_size()); 3849 } 3850 } 3851 } 3852 3853 // SATB Queue has several assumptions on whether to call the par or 3854 // non-par versions of the methods. this is why some of the code is 3855 // replicated. We should really get rid of the single-threaded version 3856 // of the code to simplify things. 3857 void CMTask::drain_satb_buffers() { 3858 if (has_aborted()) return; 3859 3860 // We set this so that the regular clock knows that we're in the 3861 // middle of draining buffers and doesn't set the abort flag when it 3862 // notices that SATB buffers are available for draining. It'd be 3863 // very counter productive if it did that. :-) 3864 _draining_satb_buffers = true; 3865 3866 CMObjectClosure oc(this); 3867 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3868 if (G1CollectedHeap::use_parallel_gc_threads()) { 3869 satb_mq_set.set_par_closure(_worker_id, &oc); 3870 } else { 3871 satb_mq_set.set_closure(&oc); 3872 } 3873 3874 // This keeps claiming and applying the closure to completed buffers 3875 // until we run out of buffers or we need to abort. 3876 if (G1CollectedHeap::use_parallel_gc_threads()) { 3877 while (!has_aborted() && 3878 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) { 3879 if (_cm->verbose_medium()) { 3880 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3881 } 3882 statsOnly( ++_satb_buffers_processed ); 3883 regular_clock_call(); 3884 } 3885 } else { 3886 while (!has_aborted() && 3887 satb_mq_set.apply_closure_to_completed_buffer()) { 3888 if (_cm->verbose_medium()) { 3889 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3890 } 3891 statsOnly( ++_satb_buffers_processed ); 3892 regular_clock_call(); 3893 } 3894 } 3895 3896 if (!concurrent() && !has_aborted()) { 3897 // We should only do this during remark. 3898 if (G1CollectedHeap::use_parallel_gc_threads()) { 3899 satb_mq_set.par_iterate_closure_all_threads(_worker_id); 3900 } else { 3901 satb_mq_set.iterate_closure_all_threads(); 3902 } 3903 } 3904 3905 _draining_satb_buffers = false; 3906 3907 assert(has_aborted() || 3908 concurrent() || 3909 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3910 3911 if (G1CollectedHeap::use_parallel_gc_threads()) { 3912 satb_mq_set.set_par_closure(_worker_id, NULL); 3913 } else { 3914 satb_mq_set.set_closure(NULL); 3915 } 3916 3917 // again, this was a potentially expensive operation, decrease the 3918 // limits to get the regular clock call early 3919 decrease_limits(); 3920 } 3921 3922 void CMTask::print_stats() { 3923 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3924 _worker_id, _calls); 3925 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3926 _elapsed_time_ms, _termination_time_ms); 3927 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3928 _step_times_ms.num(), _step_times_ms.avg(), 3929 _step_times_ms.sd()); 3930 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3931 _step_times_ms.maximum(), _step_times_ms.sum()); 3932 3933 #if _MARKING_STATS_ 3934 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3935 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3936 _all_clock_intervals_ms.sd()); 3937 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3938 _all_clock_intervals_ms.maximum(), 3939 _all_clock_intervals_ms.sum()); 3940 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3941 _clock_due_to_scanning, _clock_due_to_marking); 3942 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3943 _objs_scanned, _objs_found_on_bitmap); 3944 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3945 _local_pushes, _local_pops, _local_max_size); 3946 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3947 _global_pushes, _global_pops, _global_max_size); 3948 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3949 _global_transfers_to,_global_transfers_from); 3950 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3951 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3952 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3953 _steal_attempts, _steals); 3954 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3955 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3956 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3957 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3958 _aborted_timed_out, _aborted_satb, _aborted_termination); 3959 #endif // _MARKING_STATS_ 3960 } 3961 3962 /***************************************************************************** 3963 3964 The do_marking_step(time_target_ms, ...) method is the building 3965 block of the parallel marking framework. It can be called in parallel 3966 with other invocations of do_marking_step() on different tasks 3967 (but only one per task, obviously) and concurrently with the 3968 mutator threads, or during remark, hence it eliminates the need 3969 for two versions of the code. When called during remark, it will 3970 pick up from where the task left off during the concurrent marking 3971 phase. Interestingly, tasks are also claimable during evacuation 3972 pauses too, since do_marking_step() ensures that it aborts before 3973 it needs to yield. 3974 3975 The data structures that it uses to do marking work are the 3976 following: 3977 3978 (1) Marking Bitmap. If there are gray objects that appear only 3979 on the bitmap (this happens either when dealing with an overflow 3980 or when the initial marking phase has simply marked the roots 3981 and didn't push them on the stack), then tasks claim heap 3982 regions whose bitmap they then scan to find gray objects. A 3983 global finger indicates where the end of the last claimed region 3984 is. A local finger indicates how far into the region a task has 3985 scanned. The two fingers are used to determine how to gray an 3986 object (i.e. whether simply marking it is OK, as it will be 3987 visited by a task in the future, or whether it needs to be also 3988 pushed on a stack). 3989 3990 (2) Local Queue. The local queue of the task which is accessed 3991 reasonably efficiently by the task. Other tasks can steal from 3992 it when they run out of work. Throughout the marking phase, a 3993 task attempts to keep its local queue short but not totally 3994 empty, so that entries are available for stealing by other 3995 tasks. Only when there is no more work, a task will totally 3996 drain its local queue. 3997 3998 (3) Global Mark Stack. This handles local queue overflow. During 3999 marking only sets of entries are moved between it and the local 4000 queues, as access to it requires a mutex and more fine-grain 4001 interaction with it which might cause contention. If it 4002 overflows, then the marking phase should restart and iterate 4003 over the bitmap to identify gray objects. Throughout the marking 4004 phase, tasks attempt to keep the global mark stack at a small 4005 length but not totally empty, so that entries are available for 4006 popping by other tasks. Only when there is no more work, tasks 4007 will totally drain the global mark stack. 4008 4009 (4) SATB Buffer Queue. This is where completed SATB buffers are 4010 made available. Buffers are regularly removed from this queue 4011 and scanned for roots, so that the queue doesn't get too 4012 long. During remark, all completed buffers are processed, as 4013 well as the filled in parts of any uncompleted buffers. 4014 4015 The do_marking_step() method tries to abort when the time target 4016 has been reached. There are a few other cases when the 4017 do_marking_step() method also aborts: 4018 4019 (1) When the marking phase has been aborted (after a Full GC). 4020 4021 (2) When a global overflow (on the global stack) has been 4022 triggered. Before the task aborts, it will actually sync up with 4023 the other tasks to ensure that all the marking data structures 4024 (local queues, stacks, fingers etc.) are re-initialized so that 4025 when do_marking_step() completes, the marking phase can 4026 immediately restart. 4027 4028 (3) When enough completed SATB buffers are available. The 4029 do_marking_step() method only tries to drain SATB buffers right 4030 at the beginning. So, if enough buffers are available, the 4031 marking step aborts and the SATB buffers are processed at 4032 the beginning of the next invocation. 4033 4034 (4) To yield. when we have to yield then we abort and yield 4035 right at the end of do_marking_step(). This saves us from a lot 4036 of hassle as, by yielding we might allow a Full GC. If this 4037 happens then objects will be compacted underneath our feet, the 4038 heap might shrink, etc. We save checking for this by just 4039 aborting and doing the yield right at the end. 4040 4041 From the above it follows that the do_marking_step() method should 4042 be called in a loop (or, otherwise, regularly) until it completes. 4043 4044 If a marking step completes without its has_aborted() flag being 4045 true, it means it has completed the current marking phase (and 4046 also all other marking tasks have done so and have all synced up). 4047 4048 A method called regular_clock_call() is invoked "regularly" (in 4049 sub ms intervals) throughout marking. It is this clock method that 4050 checks all the abort conditions which were mentioned above and 4051 decides when the task should abort. A work-based scheme is used to 4052 trigger this clock method: when the number of object words the 4053 marking phase has scanned or the number of references the marking 4054 phase has visited reach a given limit. Additional invocations to 4055 the method clock have been planted in a few other strategic places 4056 too. The initial reason for the clock method was to avoid calling 4057 vtime too regularly, as it is quite expensive. So, once it was in 4058 place, it was natural to piggy-back all the other conditions on it 4059 too and not constantly check them throughout the code. 4060 4061 If do_termination is true then do_marking_step will enter its 4062 termination protocol. 4063 4064 The value of is_serial must be true when do_marking_step is being 4065 called serially (i.e. by the VMThread) and do_marking_step should 4066 skip any synchronization in the termination and overflow code. 4067 Examples include the serial remark code and the serial reference 4068 processing closures. 4069 4070 The value of is_serial must be false when do_marking_step is 4071 being called by any of the worker threads in a work gang. 4072 Examples include the concurrent marking code (CMMarkingTask), 4073 the MT remark code, and the MT reference processing closures. 4074 4075 *****************************************************************************/ 4076 4077 void CMTask::do_marking_step(double time_target_ms, 4078 bool do_termination, 4079 bool is_serial) { 4080 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4081 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4082 4083 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4084 assert(_task_queues != NULL, "invariant"); 4085 assert(_task_queue != NULL, "invariant"); 4086 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 4087 4088 assert(!_claimed, 4089 "only one thread should claim this task at any one time"); 4090 4091 // OK, this doesn't safeguard again all possible scenarios, as it is 4092 // possible for two threads to set the _claimed flag at the same 4093 // time. But it is only for debugging purposes anyway and it will 4094 // catch most problems. 4095 _claimed = true; 4096 4097 _start_time_ms = os::elapsedVTime() * 1000.0; 4098 statsOnly( _interval_start_time_ms = _start_time_ms ); 4099 4100 // If do_stealing is true then do_marking_step will attempt to 4101 // steal work from the other CMTasks. It only makes sense to 4102 // enable stealing when the termination protocol is enabled 4103 // and do_marking_step() is not being called serially. 4104 bool do_stealing = do_termination && !is_serial; 4105 4106 double diff_prediction_ms = 4107 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4108 _time_target_ms = time_target_ms - diff_prediction_ms; 4109 4110 // set up the variables that are used in the work-based scheme to 4111 // call the regular clock method 4112 _words_scanned = 0; 4113 _refs_reached = 0; 4114 recalculate_limits(); 4115 4116 // clear all flags 4117 clear_has_aborted(); 4118 _has_timed_out = false; 4119 _draining_satb_buffers = false; 4120 4121 ++_calls; 4122 4123 if (_cm->verbose_low()) { 4124 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 4125 "target = %1.2lfms >>>>>>>>>>", 4126 _worker_id, _calls, _time_target_ms); 4127 } 4128 4129 // Set up the bitmap and oop closures. Anything that uses them is 4130 // eventually called from this method, so it is OK to allocate these 4131 // statically. 4132 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4133 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4134 set_cm_oop_closure(&cm_oop_closure); 4135 4136 if (_cm->has_overflown()) { 4137 // This can happen if the mark stack overflows during a GC pause 4138 // and this task, after a yield point, restarts. We have to abort 4139 // as we need to get into the overflow protocol which happens 4140 // right at the end of this task. 4141 set_has_aborted(); 4142 } 4143 4144 // First drain any available SATB buffers. After this, we will not 4145 // look at SATB buffers before the next invocation of this method. 4146 // If enough completed SATB buffers are queued up, the regular clock 4147 // will abort this task so that it restarts. 4148 drain_satb_buffers(); 4149 // ...then partially drain the local queue and the global stack 4150 drain_local_queue(true); 4151 drain_global_stack(true); 4152 4153 do { 4154 if (!has_aborted() && _curr_region != NULL) { 4155 // This means that we're already holding on to a region. 4156 assert(_finger != NULL, "if region is not NULL, then the finger " 4157 "should not be NULL either"); 4158 4159 // We might have restarted this task after an evacuation pause 4160 // which might have evacuated the region we're holding on to 4161 // underneath our feet. Let's read its limit again to make sure 4162 // that we do not iterate over a region of the heap that 4163 // contains garbage (update_region_limit() will also move 4164 // _finger to the start of the region if it is found empty). 4165 update_region_limit(); 4166 // We will start from _finger not from the start of the region, 4167 // as we might be restarting this task after aborting half-way 4168 // through scanning this region. In this case, _finger points to 4169 // the address where we last found a marked object. If this is a 4170 // fresh region, _finger points to start(). 4171 MemRegion mr = MemRegion(_finger, _region_limit); 4172 4173 if (_cm->verbose_low()) { 4174 gclog_or_tty->print_cr("[%u] we're scanning part " 4175 "["PTR_FORMAT", "PTR_FORMAT") " 4176 "of region "HR_FORMAT, 4177 _worker_id, p2i(_finger), p2i(_region_limit), 4178 HR_FORMAT_PARAMS(_curr_region)); 4179 } 4180 4181 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(), 4182 "humongous regions should go around loop once only"); 4183 4184 // Some special cases: 4185 // If the memory region is empty, we can just give up the region. 4186 // If the current region is humongous then we only need to check 4187 // the bitmap for the bit associated with the start of the object, 4188 // scan the object if it's live, and give up the region. 4189 // Otherwise, let's iterate over the bitmap of the part of the region 4190 // that is left. 4191 // If the iteration is successful, give up the region. 4192 if (mr.is_empty()) { 4193 giveup_current_region(); 4194 regular_clock_call(); 4195 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) { 4196 if (_nextMarkBitMap->isMarked(mr.start())) { 4197 // The object is marked - apply the closure 4198 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 4199 bitmap_closure.do_bit(offset); 4200 } 4201 // Even if this task aborted while scanning the humongous object 4202 // we can (and should) give up the current region. 4203 giveup_current_region(); 4204 regular_clock_call(); 4205 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4206 giveup_current_region(); 4207 regular_clock_call(); 4208 } else { 4209 assert(has_aborted(), "currently the only way to do so"); 4210 // The only way to abort the bitmap iteration is to return 4211 // false from the do_bit() method. However, inside the 4212 // do_bit() method we move the _finger to point to the 4213 // object currently being looked at. So, if we bail out, we 4214 // have definitely set _finger to something non-null. 4215 assert(_finger != NULL, "invariant"); 4216 4217 // Region iteration was actually aborted. So now _finger 4218 // points to the address of the object we last scanned. If we 4219 // leave it there, when we restart this task, we will rescan 4220 // the object. It is easy to avoid this. We move the finger by 4221 // enough to point to the next possible object header (the 4222 // bitmap knows by how much we need to move it as it knows its 4223 // granularity). 4224 assert(_finger < _region_limit, "invariant"); 4225 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 4226 // Check if bitmap iteration was aborted while scanning the last object 4227 if (new_finger >= _region_limit) { 4228 giveup_current_region(); 4229 } else { 4230 move_finger_to(new_finger); 4231 } 4232 } 4233 } 4234 // At this point we have either completed iterating over the 4235 // region we were holding on to, or we have aborted. 4236 4237 // We then partially drain the local queue and the global stack. 4238 // (Do we really need this?) 4239 drain_local_queue(true); 4240 drain_global_stack(true); 4241 4242 // Read the note on the claim_region() method on why it might 4243 // return NULL with potentially more regions available for 4244 // claiming and why we have to check out_of_regions() to determine 4245 // whether we're done or not. 4246 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4247 // We are going to try to claim a new region. We should have 4248 // given up on the previous one. 4249 // Separated the asserts so that we know which one fires. 4250 assert(_curr_region == NULL, "invariant"); 4251 assert(_finger == NULL, "invariant"); 4252 assert(_region_limit == NULL, "invariant"); 4253 if (_cm->verbose_low()) { 4254 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 4255 } 4256 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 4257 if (claimed_region != NULL) { 4258 // Yes, we managed to claim one 4259 statsOnly( ++_regions_claimed ); 4260 4261 if (_cm->verbose_low()) { 4262 gclog_or_tty->print_cr("[%u] we successfully claimed " 4263 "region "PTR_FORMAT, 4264 _worker_id, p2i(claimed_region)); 4265 } 4266 4267 setup_for_region(claimed_region); 4268 assert(_curr_region == claimed_region, "invariant"); 4269 } 4270 // It is important to call the regular clock here. It might take 4271 // a while to claim a region if, for example, we hit a large 4272 // block of empty regions. So we need to call the regular clock 4273 // method once round the loop to make sure it's called 4274 // frequently enough. 4275 regular_clock_call(); 4276 } 4277 4278 if (!has_aborted() && _curr_region == NULL) { 4279 assert(_cm->out_of_regions(), 4280 "at this point we should be out of regions"); 4281 } 4282 } while ( _curr_region != NULL && !has_aborted()); 4283 4284 if (!has_aborted()) { 4285 // We cannot check whether the global stack is empty, since other 4286 // tasks might be pushing objects to it concurrently. 4287 assert(_cm->out_of_regions(), 4288 "at this point we should be out of regions"); 4289 4290 if (_cm->verbose_low()) { 4291 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 4292 } 4293 4294 // Try to reduce the number of available SATB buffers so that 4295 // remark has less work to do. 4296 drain_satb_buffers(); 4297 } 4298 4299 // Since we've done everything else, we can now totally drain the 4300 // local queue and global stack. 4301 drain_local_queue(false); 4302 drain_global_stack(false); 4303 4304 // Attempt at work stealing from other task's queues. 4305 if (do_stealing && !has_aborted()) { 4306 // We have not aborted. This means that we have finished all that 4307 // we could. Let's try to do some stealing... 4308 4309 // We cannot check whether the global stack is empty, since other 4310 // tasks might be pushing objects to it concurrently. 4311 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4312 "only way to reach here"); 4313 4314 if (_cm->verbose_low()) { 4315 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 4316 } 4317 4318 while (!has_aborted()) { 4319 oop obj; 4320 statsOnly( ++_steal_attempts ); 4321 4322 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 4323 if (_cm->verbose_medium()) { 4324 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully", 4325 _worker_id, p2i((void*) obj)); 4326 } 4327 4328 statsOnly( ++_steals ); 4329 4330 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4331 "any stolen object should be marked"); 4332 scan_object(obj); 4333 4334 // And since we're towards the end, let's totally drain the 4335 // local queue and global stack. 4336 drain_local_queue(false); 4337 drain_global_stack(false); 4338 } else { 4339 break; 4340 } 4341 } 4342 } 4343 4344 // If we are about to wrap up and go into termination, check if we 4345 // should raise the overflow flag. 4346 if (do_termination && !has_aborted()) { 4347 if (_cm->force_overflow()->should_force()) { 4348 _cm->set_has_overflown(); 4349 regular_clock_call(); 4350 } 4351 } 4352 4353 // We still haven't aborted. Now, let's try to get into the 4354 // termination protocol. 4355 if (do_termination && !has_aborted()) { 4356 // We cannot check whether the global stack is empty, since other 4357 // tasks might be concurrently pushing objects on it. 4358 // Separated the asserts so that we know which one fires. 4359 assert(_cm->out_of_regions(), "only way to reach here"); 4360 assert(_task_queue->size() == 0, "only way to reach here"); 4361 4362 if (_cm->verbose_low()) { 4363 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4364 } 4365 4366 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4367 4368 // The CMTask class also extends the TerminatorTerminator class, 4369 // hence its should_exit_termination() method will also decide 4370 // whether to exit the termination protocol or not. 4371 bool finished = (is_serial || 4372 _cm->terminator()->offer_termination(this)); 4373 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4374 _termination_time_ms += 4375 termination_end_time_ms - _termination_start_time_ms; 4376 4377 if (finished) { 4378 // We're all done. 4379 4380 if (_worker_id == 0) { 4381 // let's allow task 0 to do this 4382 if (concurrent()) { 4383 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4384 // we need to set this to false before the next 4385 // safepoint. This way we ensure that the marking phase 4386 // doesn't observe any more heap expansions. 4387 _cm->clear_concurrent_marking_in_progress(); 4388 } 4389 } 4390 4391 // We can now guarantee that the global stack is empty, since 4392 // all other tasks have finished. We separated the guarantees so 4393 // that, if a condition is false, we can immediately find out 4394 // which one. 4395 guarantee(_cm->out_of_regions(), "only way to reach here"); 4396 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4397 guarantee(_task_queue->size() == 0, "only way to reach here"); 4398 guarantee(!_cm->has_overflown(), "only way to reach here"); 4399 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4400 4401 if (_cm->verbose_low()) { 4402 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4403 } 4404 } else { 4405 // Apparently there's more work to do. Let's abort this task. It 4406 // will restart it and we can hopefully find more things to do. 4407 4408 if (_cm->verbose_low()) { 4409 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4410 _worker_id); 4411 } 4412 4413 set_has_aborted(); 4414 statsOnly( ++_aborted_termination ); 4415 } 4416 } 4417 4418 // Mainly for debugging purposes to make sure that a pointer to the 4419 // closure which was statically allocated in this frame doesn't 4420 // escape it by accident. 4421 set_cm_oop_closure(NULL); 4422 double end_time_ms = os::elapsedVTime() * 1000.0; 4423 double elapsed_time_ms = end_time_ms - _start_time_ms; 4424 // Update the step history. 4425 _step_times_ms.add(elapsed_time_ms); 4426 4427 if (has_aborted()) { 4428 // The task was aborted for some reason. 4429 4430 statsOnly( ++_aborted ); 4431 4432 if (_has_timed_out) { 4433 double diff_ms = elapsed_time_ms - _time_target_ms; 4434 // Keep statistics of how well we did with respect to hitting 4435 // our target only if we actually timed out (if we aborted for 4436 // other reasons, then the results might get skewed). 4437 _marking_step_diffs_ms.add(diff_ms); 4438 } 4439 4440 if (_cm->has_overflown()) { 4441 // This is the interesting one. We aborted because a global 4442 // overflow was raised. This means we have to restart the 4443 // marking phase and start iterating over regions. However, in 4444 // order to do this we have to make sure that all tasks stop 4445 // what they are doing and re-initialize in a safe manner. We 4446 // will achieve this with the use of two barrier sync points. 4447 4448 if (_cm->verbose_low()) { 4449 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4450 } 4451 4452 if (!is_serial) { 4453 // We only need to enter the sync barrier if being called 4454 // from a parallel context 4455 _cm->enter_first_sync_barrier(_worker_id); 4456 4457 // When we exit this sync barrier we know that all tasks have 4458 // stopped doing marking work. So, it's now safe to 4459 // re-initialize our data structures. At the end of this method, 4460 // task 0 will clear the global data structures. 4461 } 4462 4463 statsOnly( ++_aborted_overflow ); 4464 4465 // We clear the local state of this task... 4466 clear_region_fields(); 4467 4468 if (!is_serial) { 4469 // ...and enter the second barrier. 4470 _cm->enter_second_sync_barrier(_worker_id); 4471 } 4472 // At this point, if we're during the concurrent phase of 4473 // marking, everything has been re-initialized and we're 4474 // ready to restart. 4475 } 4476 4477 if (_cm->verbose_low()) { 4478 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4479 "elapsed = %1.2lfms <<<<<<<<<<", 4480 _worker_id, _time_target_ms, elapsed_time_ms); 4481 if (_cm->has_aborted()) { 4482 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4483 _worker_id); 4484 } 4485 } 4486 } else { 4487 if (_cm->verbose_low()) { 4488 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4489 "elapsed = %1.2lfms <<<<<<<<<<", 4490 _worker_id, _time_target_ms, elapsed_time_ms); 4491 } 4492 } 4493 4494 _claimed = false; 4495 } 4496 4497 CMTask::CMTask(uint worker_id, 4498 ConcurrentMark* cm, 4499 size_t* marked_bytes, 4500 BitMap* card_bm, 4501 CMTaskQueue* task_queue, 4502 CMTaskQueueSet* task_queues) 4503 : _g1h(G1CollectedHeap::heap()), 4504 _worker_id(worker_id), _cm(cm), 4505 _claimed(false), 4506 _nextMarkBitMap(NULL), _hash_seed(17), 4507 _task_queue(task_queue), 4508 _task_queues(task_queues), 4509 _cm_oop_closure(NULL), 4510 _marked_bytes_array(marked_bytes), 4511 _card_bm(card_bm) { 4512 guarantee(task_queue != NULL, "invariant"); 4513 guarantee(task_queues != NULL, "invariant"); 4514 4515 statsOnly( _clock_due_to_scanning = 0; 4516 _clock_due_to_marking = 0 ); 4517 4518 _marking_step_diffs_ms.add(0.5); 4519 } 4520 4521 // These are formatting macros that are used below to ensure 4522 // consistent formatting. The *_H_* versions are used to format the 4523 // header for a particular value and they should be kept consistent 4524 // with the corresponding macro. Also note that most of the macros add 4525 // the necessary white space (as a prefix) which makes them a bit 4526 // easier to compose. 4527 4528 // All the output lines are prefixed with this string to be able to 4529 // identify them easily in a large log file. 4530 #define G1PPRL_LINE_PREFIX "###" 4531 4532 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4533 #ifdef _LP64 4534 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4535 #else // _LP64 4536 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4537 #endif // _LP64 4538 4539 // For per-region info 4540 #define G1PPRL_TYPE_FORMAT " %-4s" 4541 #define G1PPRL_TYPE_H_FORMAT " %4s" 4542 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4543 #define G1PPRL_BYTE_H_FORMAT " %9s" 4544 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4545 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4546 4547 // For summary info 4548 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4549 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4550 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4551 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4552 4553 G1PrintRegionLivenessInfoClosure:: 4554 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4555 : _out(out), 4556 _total_used_bytes(0), _total_capacity_bytes(0), 4557 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4558 _hum_used_bytes(0), _hum_capacity_bytes(0), 4559 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4560 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 4561 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4562 MemRegion g1_committed = g1h->g1_committed(); 4563 MemRegion g1_reserved = g1h->g1_reserved(); 4564 double now = os::elapsedTime(); 4565 4566 // Print the header of the output. 4567 _out->cr(); 4568 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4569 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4570 G1PPRL_SUM_ADDR_FORMAT("committed") 4571 G1PPRL_SUM_ADDR_FORMAT("reserved") 4572 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4573 p2i(g1_committed.start()), p2i(g1_committed.end()), 4574 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 4575 HeapRegion::GrainBytes); 4576 _out->print_cr(G1PPRL_LINE_PREFIX); 4577 _out->print_cr(G1PPRL_LINE_PREFIX 4578 G1PPRL_TYPE_H_FORMAT 4579 G1PPRL_ADDR_BASE_H_FORMAT 4580 G1PPRL_BYTE_H_FORMAT 4581 G1PPRL_BYTE_H_FORMAT 4582 G1PPRL_BYTE_H_FORMAT 4583 G1PPRL_DOUBLE_H_FORMAT 4584 G1PPRL_BYTE_H_FORMAT 4585 G1PPRL_BYTE_H_FORMAT, 4586 "type", "address-range", 4587 "used", "prev-live", "next-live", "gc-eff", 4588 "remset", "code-roots"); 4589 _out->print_cr(G1PPRL_LINE_PREFIX 4590 G1PPRL_TYPE_H_FORMAT 4591 G1PPRL_ADDR_BASE_H_FORMAT 4592 G1PPRL_BYTE_H_FORMAT 4593 G1PPRL_BYTE_H_FORMAT 4594 G1PPRL_BYTE_H_FORMAT 4595 G1PPRL_DOUBLE_H_FORMAT 4596 G1PPRL_BYTE_H_FORMAT 4597 G1PPRL_BYTE_H_FORMAT, 4598 "", "", 4599 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 4600 "(bytes)", "(bytes)"); 4601 } 4602 4603 // It takes as a parameter a reference to one of the _hum_* fields, it 4604 // deduces the corresponding value for a region in a humongous region 4605 // series (either the region size, or what's left if the _hum_* field 4606 // is < the region size), and updates the _hum_* field accordingly. 4607 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4608 size_t bytes = 0; 4609 // The > 0 check is to deal with the prev and next live bytes which 4610 // could be 0. 4611 if (*hum_bytes > 0) { 4612 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4613 *hum_bytes -= bytes; 4614 } 4615 return bytes; 4616 } 4617 4618 // It deduces the values for a region in a humongous region series 4619 // from the _hum_* fields and updates those accordingly. It assumes 4620 // that that _hum_* fields have already been set up from the "starts 4621 // humongous" region and we visit the regions in address order. 4622 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4623 size_t* capacity_bytes, 4624 size_t* prev_live_bytes, 4625 size_t* next_live_bytes) { 4626 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4627 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4628 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4629 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4630 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4631 } 4632 4633 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4634 const char* type = ""; 4635 HeapWord* bottom = r->bottom(); 4636 HeapWord* end = r->end(); 4637 size_t capacity_bytes = r->capacity(); 4638 size_t used_bytes = r->used(); 4639 size_t prev_live_bytes = r->live_bytes(); 4640 size_t next_live_bytes = r->next_live_bytes(); 4641 double gc_eff = r->gc_efficiency(); 4642 size_t remset_bytes = r->rem_set()->mem_size(); 4643 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 4644 4645 if (r->used() == 0) { 4646 type = "FREE"; 4647 } else if (r->is_survivor()) { 4648 type = "SURV"; 4649 } else if (r->is_young()) { 4650 type = "EDEN"; 4651 } else if (r->startsHumongous()) { 4652 type = "HUMS"; 4653 4654 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4655 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4656 "they should have been zeroed after the last time we used them"); 4657 // Set up the _hum_* fields. 4658 _hum_capacity_bytes = capacity_bytes; 4659 _hum_used_bytes = used_bytes; 4660 _hum_prev_live_bytes = prev_live_bytes; 4661 _hum_next_live_bytes = next_live_bytes; 4662 get_hum_bytes(&used_bytes, &capacity_bytes, 4663 &prev_live_bytes, &next_live_bytes); 4664 end = bottom + HeapRegion::GrainWords; 4665 } else if (r->continuesHumongous()) { 4666 type = "HUMC"; 4667 get_hum_bytes(&used_bytes, &capacity_bytes, 4668 &prev_live_bytes, &next_live_bytes); 4669 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4670 } else { 4671 type = "OLD"; 4672 } 4673 4674 _total_used_bytes += used_bytes; 4675 _total_capacity_bytes += capacity_bytes; 4676 _total_prev_live_bytes += prev_live_bytes; 4677 _total_next_live_bytes += next_live_bytes; 4678 _total_remset_bytes += remset_bytes; 4679 _total_strong_code_roots_bytes += strong_code_roots_bytes; 4680 4681 // Print a line for this particular region. 4682 _out->print_cr(G1PPRL_LINE_PREFIX 4683 G1PPRL_TYPE_FORMAT 4684 G1PPRL_ADDR_BASE_FORMAT 4685 G1PPRL_BYTE_FORMAT 4686 G1PPRL_BYTE_FORMAT 4687 G1PPRL_BYTE_FORMAT 4688 G1PPRL_DOUBLE_FORMAT 4689 G1PPRL_BYTE_FORMAT 4690 G1PPRL_BYTE_FORMAT, 4691 type, p2i(bottom), p2i(end), 4692 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 4693 remset_bytes, strong_code_roots_bytes); 4694 4695 return false; 4696 } 4697 4698 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4699 // add static memory usages to remembered set sizes 4700 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4701 // Print the footer of the output. 4702 _out->print_cr(G1PPRL_LINE_PREFIX); 4703 _out->print_cr(G1PPRL_LINE_PREFIX 4704 " SUMMARY" 4705 G1PPRL_SUM_MB_FORMAT("capacity") 4706 G1PPRL_SUM_MB_PERC_FORMAT("used") 4707 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4708 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4709 G1PPRL_SUM_MB_FORMAT("remset") 4710 G1PPRL_SUM_MB_FORMAT("code-roots"), 4711 bytes_to_mb(_total_capacity_bytes), 4712 bytes_to_mb(_total_used_bytes), 4713 perc(_total_used_bytes, _total_capacity_bytes), 4714 bytes_to_mb(_total_prev_live_bytes), 4715 perc(_total_prev_live_bytes, _total_capacity_bytes), 4716 bytes_to_mb(_total_next_live_bytes), 4717 perc(_total_next_live_bytes, _total_capacity_bytes), 4718 bytes_to_mb(_total_remset_bytes), 4719 bytes_to_mb(_total_strong_code_roots_bytes)); 4720 _out->cr(); 4721 }