Old src/share/vm/gc_implementation/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1Log.hpp"
  33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34 #include "gc_implementation/g1/g1RemSet.hpp"
  35 #include "gc_implementation/g1/heapRegion.inline.hpp"
  36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38 #include "gc_implementation/shared/vmGCOperations.hpp"
  39 #include "gc_implementation/shared/gcTimer.hpp"
  40 #include "gc_implementation/shared/gcTrace.hpp"
  41 #include "gc_implementation/shared/gcTraceTime.hpp"
  42 #include "memory/genOopClosures.inline.hpp"
  43 #include "memory/referencePolicy.hpp"
  44 #include "memory/resourceArea.hpp"
  45 #include "oops/oop.inline.hpp"
  46 #include "runtime/handles.inline.hpp"
  47 #include "runtime/java.hpp"
  48 #include "runtime/prefetch.inline.hpp"
  49 #include "services/memTracker.hpp"
  50 
  51 // Concurrent marking bit map wrapper
  52 
  53 CMBitMapRO::CMBitMapRO(int shifter) :
  54   _bm(),
  55   _shifter(shifter) {
  56   _bmStartWord = 0;
  57   _bmWordSize = 0;
  58 }
  59 
  60 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  61                                                HeapWord* limit) const {
  62   // First we must round addr *up* to a possible object boundary.
  63   addr = (HeapWord*)align_size_up((intptr_t)addr,
  64                                   HeapWordSize << _shifter);
  65   size_t addrOffset = heapWordToOffset(addr);
  66   if (limit == NULL) {
  67     limit = _bmStartWord + _bmWordSize;
  68   }
  69   size_t limitOffset = heapWordToOffset(limit);
  70   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  71   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  72   assert(nextAddr >= addr, "get_next_one postcondition");
  73   assert(nextAddr == limit || isMarked(nextAddr),
  74          "get_next_one postcondition");
  75   return nextAddr;
  76 }
  77 
  78 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  79                                                  HeapWord* limit) const {
  80   size_t addrOffset = heapWordToOffset(addr);
  81   if (limit == NULL) {
  82     limit = _bmStartWord + _bmWordSize;
  83   }
  84   size_t limitOffset = heapWordToOffset(limit);
  85   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  86   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  87   assert(nextAddr >= addr, "get_next_one postcondition");
  88   assert(nextAddr == limit || !isMarked(nextAddr),
  89          "get_next_one postcondition");
  90   return nextAddr;
  91 }
  92 
  93 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  94   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  95   return (int) (diff >> _shifter);
  96 }
  97 
  98 #ifndef PRODUCT
  99 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
 100   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 101   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 102          "size inconsistency");
 103   return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
 104          _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
 105 }
 106 #endif
 107 
 108 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 109   _bm.print_on_error(st, prefix);
 110 }
 111 
 112 bool CMBitMap::allocate(ReservedSpace heap_rs) {
 113   _bmStartWord = (HeapWord*)(heap_rs.base());
 114   _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
 115   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
 116                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
 117   if (!brs.is_reserved()) {
 118     warning("ConcurrentMark marking bit map allocation failure");
 119     return false;
 120   }
 121   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
 122   // For now we'll just commit all of the bit map up front.
 123   // Later on we'll try to be more parsimonious with swap.
 124   if (!_virtual_space.initialize(brs, brs.size())) {
 125     warning("ConcurrentMark marking bit map backing store failure");
 126     return false;
 127   }
 128   assert(_virtual_space.committed_size() == brs.size(),
 129          "didn't reserve backing store for all of concurrent marking bit map?");
 130   _bm.set_map((uintptr_t*)_virtual_space.low());
 131   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
 132          _bmWordSize, "inconsistency in bit map sizing");
 133   _bm.set_size(_bmWordSize >> _shifter);
 134   return true;
 135 }
 136 
 137 void CMBitMap::clearAll() {
 138   _bm.clear();
 139   return;
 140 }
 141 
 142 void CMBitMap::markRange(MemRegion mr) {
 143   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 144   assert(!mr.is_empty(), "unexpected empty region");
 145   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 146           ((HeapWord *) mr.end())),
 147          "markRange memory region end is not card aligned");
 148   // convert address range into offset range
 149   _bm.at_put_range(heapWordToOffset(mr.start()),
 150                    heapWordToOffset(mr.end()), true);
 151 }
 152 
 153 void CMBitMap::clearRange(MemRegion mr) {
 154   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 155   assert(!mr.is_empty(), "unexpected empty region");
 156   // convert address range into offset range
 157   _bm.at_put_range(heapWordToOffset(mr.start()),
 158                    heapWordToOffset(mr.end()), false);
 159 }
 160 
 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 162                                             HeapWord* end_addr) {
 163   HeapWord* start = getNextMarkedWordAddress(addr);
 164   start = MIN2(start, end_addr);
 165   HeapWord* end   = getNextUnmarkedWordAddress(start);
 166   end = MIN2(end, end_addr);
 167   assert(start <= end, "Consistency check");
 168   MemRegion mr(start, end);
 169   if (!mr.is_empty()) {
 170     clearRange(mr);
 171   }
 172   return mr;
 173 }
 174 
 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 176   _base(NULL), _cm(cm)
 177 #ifdef ASSERT
 178   , _drain_in_progress(false)
 179   , _drain_in_progress_yields(false)
 180 #endif
 181 {}
 182 
 183 bool CMMarkStack::allocate(size_t capacity) {
 184   // allocate a stack of the requisite depth
 185   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 186   if (!rs.is_reserved()) {
 187     warning("ConcurrentMark MarkStack allocation failure");
 188     return false;
 189   }
 190   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 191   if (!_virtual_space.initialize(rs, rs.size())) {
 192     warning("ConcurrentMark MarkStack backing store failure");
 193     // Release the virtual memory reserved for the marking stack
 194     rs.release();
 195     return false;
 196   }
 197   assert(_virtual_space.committed_size() == rs.size(),
 198          "Didn't reserve backing store for all of ConcurrentMark stack?");
 199   _base = (oop*) _virtual_space.low();
 200   setEmpty();
 201   _capacity = (jint) capacity;
 202   _saved_index = -1;
 203   _should_expand = false;
 204   NOT_PRODUCT(_max_depth = 0);
 205   return true;
 206 }
 207 
 208 void CMMarkStack::expand() {
 209   // Called, during remark, if we've overflown the marking stack during marking.
 210   assert(isEmpty(), "stack should been emptied while handling overflow");
 211   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 212   // Clear expansion flag
 213   _should_expand = false;
 214   if (_capacity == (jint) MarkStackSizeMax) {
 215     if (PrintGCDetails && Verbose) {
 216       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 217     }
 218     return;
 219   }
 220   // Double capacity if possible
 221   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 222   // Do not give up existing stack until we have managed to
 223   // get the double capacity that we desired.
 224   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 225                                                            sizeof(oop)));
 226   if (rs.is_reserved()) {
 227     // Release the backing store associated with old stack
 228     _virtual_space.release();
 229     // Reinitialize virtual space for new stack
 230     if (!_virtual_space.initialize(rs, rs.size())) {
 231       fatal("Not enough swap for expanded marking stack capacity");
 232     }
 233     _base = (oop*)(_virtual_space.low());
 234     _index = 0;
 235     _capacity = new_capacity;
 236   } else {
 237     if (PrintGCDetails && Verbose) {
 238       // Failed to double capacity, continue;
 239       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 240                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 241                           _capacity / K, new_capacity / K);
 242     }
 243   }
 244 }
 245 
 246 void CMMarkStack::set_should_expand() {
 247   // If we're resetting the marking state because of an
 248   // marking stack overflow, record that we should, if
 249   // possible, expand the stack.
 250   _should_expand = _cm->has_overflown();
 251 }
 252 
 253 CMMarkStack::~CMMarkStack() {
 254   if (_base != NULL) {
 255     _base = NULL;
 256     _virtual_space.release();
 257   }
 258 }
 259 
 260 void CMMarkStack::par_push(oop ptr) {
 261   while (true) {
 262     if (isFull()) {
 263       _overflow = true;
 264       return;
 265     }
 266     // Otherwise...
 267     jint index = _index;
 268     jint next_index = index+1;
 269     jint res = Atomic::cmpxchg(next_index, &_index, index);
 270     if (res == index) {
 271       _base[index] = ptr;
 272       // Note that we don't maintain this atomically.  We could, but it
 273       // doesn't seem necessary.
 274       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 275       return;
 276     }
 277     // Otherwise, we need to try again.
 278   }
 279 }
 280 
 281 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 282   while (true) {
 283     if (isFull()) {
 284       _overflow = true;
 285       return;
 286     }
 287     // Otherwise...
 288     jint index = _index;
 289     jint next_index = index + n;
 290     if (next_index > _capacity) {
 291       _overflow = true;
 292       return;
 293     }
 294     jint res = Atomic::cmpxchg(next_index, &_index, index);
 295     if (res == index) {
 296       for (int i = 0; i < n; i++) {
 297         int  ind = index + i;
 298         assert(ind < _capacity, "By overflow test above.");
 299         _base[ind] = ptr_arr[i];
 300       }
 301       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 302       return;
 303     }
 304     // Otherwise, we need to try again.
 305   }
 306 }
 307 
 308 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 309   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 310   jint start = _index;
 311   jint next_index = start + n;
 312   if (next_index > _capacity) {
 313     _overflow = true;
 314     return;
 315   }
 316   // Otherwise.
 317   _index = next_index;
 318   for (int i = 0; i < n; i++) {
 319     int ind = start + i;
 320     assert(ind < _capacity, "By overflow test above.");
 321     _base[ind] = ptr_arr[i];
 322   }
 323   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 324 }
 325 
 326 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 327   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 328   jint index = _index;
 329   if (index == 0) {
 330     *n = 0;
 331     return false;
 332   } else {
 333     int k = MIN2(max, index);
 334     jint  new_ind = index - k;
 335     for (int j = 0; j < k; j++) {
 336       ptr_arr[j] = _base[new_ind + j];
 337     }
 338     _index = new_ind;
 339     *n = k;
 340     return true;
 341   }
 342 }
 343 
 344 template<class OopClosureClass>
 345 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 346   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 347          || SafepointSynchronize::is_at_safepoint(),
 348          "Drain recursion must be yield-safe.");
 349   bool res = true;
 350   debug_only(_drain_in_progress = true);
 351   debug_only(_drain_in_progress_yields = yield_after);
 352   while (!isEmpty()) {
 353     oop newOop = pop();
 354     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 355     assert(newOop->is_oop(), "Expected an oop");
 356     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 357            "only grey objects on this stack");
 358     newOop->oop_iterate(cl);
 359     if (yield_after && _cm->do_yield_check()) {
 360       res = false;
 361       break;
 362     }
 363   }
 364   debug_only(_drain_in_progress = false);
 365   return res;
 366 }
 367 
 368 void CMMarkStack::note_start_of_gc() {
 369   assert(_saved_index == -1,
 370          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 371   _saved_index = _index;
 372 }
 373 
 374 void CMMarkStack::note_end_of_gc() {
 375   // This is intentionally a guarantee, instead of an assert. If we
 376   // accidentally add something to the mark stack during GC, it
 377   // will be a correctness issue so it's better if we crash. we'll
 378   // only check this once per GC anyway, so it won't be a performance
 379   // issue in any way.
 380   guarantee(_saved_index == _index,
 381             err_msg("saved index: %d index: %d", _saved_index, _index));
 382   _saved_index = -1;
 383 }
 384 
 385 void CMMarkStack::oops_do(OopClosure* f) {
 386   assert(_saved_index == _index,
 387          err_msg("saved index: %d index: %d", _saved_index, _index));
 388   for (int i = 0; i < _index; i += 1) {
 389     f->do_oop(&_base[i]);
 390   }
 391 }
 392 
 393 bool ConcurrentMark::not_yet_marked(oop obj) const {
 394   return _g1h->is_obj_ill(obj);
 395 }
 396 
 397 CMRootRegions::CMRootRegions() :
 398   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 399   _should_abort(false),  _next_survivor(NULL) { }
 400 
 401 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 402   _young_list = g1h->young_list();
 403   _cm = cm;
 404 }
 405 
 406 void CMRootRegions::prepare_for_scan() {
 407   assert(!scan_in_progress(), "pre-condition");
 408 
 409   // Currently, only survivors can be root regions.
 410   assert(_next_survivor == NULL, "pre-condition");
 411   _next_survivor = _young_list->first_survivor_region();
 412   _scan_in_progress = (_next_survivor != NULL);
 413   _should_abort = false;
 414 }
 415 
 416 HeapRegion* CMRootRegions::claim_next() {
 417   if (_should_abort) {
 418     // If someone has set the should_abort flag, we return NULL to
 419     // force the caller to bail out of their loop.
 420     return NULL;
 421   }
 422 
 423   // Currently, only survivors can be root regions.
 424   HeapRegion* res = _next_survivor;
 425   if (res != NULL) {
 426     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 427     // Read it again in case it changed while we were waiting for the lock.
 428     res = _next_survivor;
 429     if (res != NULL) {
 430       if (res == _young_list->last_survivor_region()) {
 431         // We just claimed the last survivor so store NULL to indicate
 432         // that we're done.
 433         _next_survivor = NULL;
 434       } else {
 435         _next_survivor = res->get_next_young_region();
 436       }
 437     } else {
 438       // Someone else claimed the last survivor while we were trying
 439       // to take the lock so nothing else to do.
 440     }
 441   }
 442   assert(res == NULL || res->is_survivor(), "post-condition");
 443 
 444   return res;
 445 }
 446 
 447 void CMRootRegions::scan_finished() {
 448   assert(scan_in_progress(), "pre-condition");
 449 
 450   // Currently, only survivors can be root regions.
 451   if (!_should_abort) {
 452     assert(_next_survivor == NULL, "we should have claimed all survivors");
 453   }
 454   _next_survivor = NULL;
 455 
 456   {
 457     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 458     _scan_in_progress = false;
 459     RootRegionScan_lock->notify_all();
 460   }
 461 }
 462 
 463 bool CMRootRegions::wait_until_scan_finished() {
 464   if (!scan_in_progress()) return false;
 465 
 466   {
 467     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 468     while (scan_in_progress()) {
 469       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 470     }
 471   }
 472   return true;
 473 }
 474 
 475 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 476 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 477 #endif // _MSC_VER
 478 
 479 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 480   return MAX2((n_par_threads + 2) / 4, 1U);
 481 }
 482 
 483 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
 484   _g1h(g1h),
 485   _markBitMap1(log2_intptr(MinObjAlignment)),
 486   _markBitMap2(log2_intptr(MinObjAlignment)),
 487   _parallel_marking_threads(0),
 488   _max_parallel_marking_threads(0),
 489   _sleep_factor(0.0),
 490   _marking_task_overhead(1.0),
 491   _cleanup_sleep_factor(0.0),
 492   _cleanup_task_overhead(1.0),
 493   _cleanup_list("Cleanup List"),
 494   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 495   _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
 496             CardTableModRefBS::card_shift,
 497             false /* in_resource_area*/),
 498 
 499   _prevMarkBitMap(&_markBitMap1),
 500   _nextMarkBitMap(&_markBitMap2),
 501 
 502   _markStack(this),
 503   // _finger set in set_non_marking_state
 504 
 505   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 506   // _active_tasks set in set_non_marking_state
 507   // _tasks set inside the constructor
 508   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 509   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 510 
 511   _has_overflown(false),
 512   _concurrent(false),
 513   _has_aborted(false),
 514   _restart_for_overflow(false),
 515   _concurrent_marking_in_progress(false),
 516 
 517   // _verbose_level set below
 518 
 519   _init_times(),
 520   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 521   _cleanup_times(),
 522   _total_counting_time(0.0),
 523   _total_rs_scrub_time(0.0),
 524 
 525   _parallel_workers(NULL),
 526 
 527   _count_card_bitmaps(NULL),
 528   _count_marked_bytes(NULL),
 529   _completed_initialization(false) {
 530   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 531   if (verbose_level < no_verbose) {
 532     verbose_level = no_verbose;
 533   }
 534   if (verbose_level > high_verbose) {
 535     verbose_level = high_verbose;
 536   }
 537   _verbose_level = verbose_level;
 538 
 539   if (verbose_low()) {
 540     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 541                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 542   }
 543 
 544   if (!_markBitMap1.allocate(heap_rs)) {
 545     warning("Failed to allocate first CM bit map");
 546     return;
 547   }
 548   if (!_markBitMap2.allocate(heap_rs)) {
 549     warning("Failed to allocate second CM bit map");
 550     return;
 551   }
 552 
 553   // Create & start a ConcurrentMark thread.
 554   _cmThread = new ConcurrentMarkThread(this);
 555   assert(cmThread() != NULL, "CM Thread should have been created");
 556   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 557   if (_cmThread->osthread() == NULL) {
 558       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 559   }
 560 
 561   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 562   assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
 563   assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
 564 
 565   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 566   satb_qs.set_buffer_size(G1SATBBufferSize);
 567 
 568   _root_regions.init(_g1h, this);
 569 
 570   if (ConcGCThreads > ParallelGCThreads) {
 571     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
 572             "than ParallelGCThreads (" UINTX_FORMAT ").",
 573             ConcGCThreads, ParallelGCThreads);
 574     return;
 575   }
 576   if (ParallelGCThreads == 0) {
 577     // if we are not running with any parallel GC threads we will not
 578     // spawn any marking threads either
 579     _parallel_marking_threads =       0;
 580     _max_parallel_marking_threads =   0;
 581     _sleep_factor             =     0.0;
 582     _marking_task_overhead    =     1.0;
 583   } else {
 584     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 585       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 586       // if both are set
 587       _sleep_factor             = 0.0;
 588       _marking_task_overhead    = 1.0;
 589     } else if (G1MarkingOverheadPercent > 0) {
 590       // We will calculate the number of parallel marking threads based
 591       // on a target overhead with respect to the soft real-time goal
 592       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 593       double overall_cm_overhead =
 594         (double) MaxGCPauseMillis * marking_overhead /
 595         (double) GCPauseIntervalMillis;
 596       double cpu_ratio = 1.0 / (double) os::processor_count();
 597       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 598       double marking_task_overhead =
 599         overall_cm_overhead / marking_thread_num *
 600                                                 (double) os::processor_count();
 601       double sleep_factor =
 602                          (1.0 - marking_task_overhead) / marking_task_overhead;
 603 
 604       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 605       _sleep_factor             = sleep_factor;
 606       _marking_task_overhead    = marking_task_overhead;
 607     } else {
 608       // Calculate the number of parallel marking threads by scaling
 609       // the number of parallel GC threads.
 610       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 611       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 612       _sleep_factor             = 0.0;
 613       _marking_task_overhead    = 1.0;
 614     }
 615 
 616     assert(ConcGCThreads > 0, "Should have been set");
 617     _parallel_marking_threads = (uint) ConcGCThreads;
 618     _max_parallel_marking_threads = _parallel_marking_threads;
 619 
 620     if (parallel_marking_threads() > 1) {
 621       _cleanup_task_overhead = 1.0;
 622     } else {
 623       _cleanup_task_overhead = marking_task_overhead();
 624     }
 625     _cleanup_sleep_factor =
 626                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 627 
 628 #if 0
 629     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 630     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 631     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 632     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 633     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 634 #endif
 635 
 636     guarantee(parallel_marking_threads() > 0, "peace of mind");
 637     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 638          _max_parallel_marking_threads, false, true);
 639     if (_parallel_workers == NULL) {
 640       vm_exit_during_initialization("Failed necessary allocation.");
 641     } else {
 642       _parallel_workers->initialize_workers();
 643     }
 644   }
 645 
 646   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 647     uintx mark_stack_size =
 648       MIN2(MarkStackSizeMax,
 649           MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 650     // Verify that the calculated value for MarkStackSize is in range.
 651     // It would be nice to use the private utility routine from Arguments.
 652     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 653       warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
 654               "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 655               mark_stack_size, (uintx) 1, MarkStackSizeMax);
 656       return;
 657     }
 658     FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
 659   } else {
 660     // Verify MarkStackSize is in range.
 661     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 662       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 663         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 664           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
 665                   "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 666                   MarkStackSize, (uintx) 1, MarkStackSizeMax);
 667           return;
 668         }
 669       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 670         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 671           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
 672                   " or for MarkStackSizeMax (" UINTX_FORMAT ")",
 673                   MarkStackSize, MarkStackSizeMax);
 674           return;
 675         }
 676       }
 677     }
 678   }
 679 
 680   if (!_markStack.allocate(MarkStackSize)) {
 681     warning("Failed to allocate CM marking stack");
 682     return;
 683   }
 684 
 685   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 686   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 687 
 688   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 689   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 690 
 691   BitMap::idx_t card_bm_size = _card_bm.size();
 692 
 693   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 694   _active_tasks = _max_worker_id;
 695 
 696   size_t max_regions = (size_t) _g1h->max_regions();
 697   for (uint i = 0; i < _max_worker_id; ++i) {
 698     CMTaskQueue* task_queue = new CMTaskQueue();
 699     task_queue->initialize();
 700     _task_queues->register_queue(i, task_queue);
 701 
 702     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 703     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 704 
 705     _tasks[i] = new CMTask(i, this,
 706                            _count_marked_bytes[i],
 707                            &_count_card_bitmaps[i],
 708                            task_queue, _task_queues);
 709 
 710     _accum_task_vtime[i] = 0.0;
 711   }
 712 
 713   // Calculate the card number for the bottom of the heap. Used
 714   // in biasing indexes into the accounting card bitmaps.
 715   _heap_bottom_card_num =
 716     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 717                                 CardTableModRefBS::card_shift);
 718 
 719   // Clear all the liveness counting data
 720   clear_all_count_data();
 721 
 722   // so that the call below can read a sensible value
 723   _heap_start = (HeapWord*) heap_rs.base();
 724   set_non_marking_state();
 725   _completed_initialization = true;
 726 }
 727 
 728 void ConcurrentMark::update_g1_committed(bool force) {
 729   // If concurrent marking is not in progress, then we do not need to
 730   // update _heap_end.
 731   if (!concurrent_marking_in_progress() && !force) return;
 732 
 733   MemRegion committed = _g1h->g1_committed();
 734   assert(committed.start() == _heap_start, "start shouldn't change");
 735   HeapWord* new_end = committed.end();
 736   if (new_end > _heap_end) {
 737     // The heap has been expanded.
 738 
 739     _heap_end = new_end;
 740   }
 741   // Notice that the heap can also shrink. However, this only happens
 742   // during a Full GC (at least currently) and the entire marking
 743   // phase will bail out and the task will not be restarted. So, let's
 744   // do nothing.
 745 }
 746 
 747 void ConcurrentMark::reset() {
 748   // Starting values for these two. This should be called in a STW
 749   // phase. CM will be notified of any future g1_committed expansions
 750   // will be at the end of evacuation pauses, when tasks are
 751   // inactive.
 752   MemRegion committed = _g1h->g1_committed();
 753   _heap_start = committed.start();
 754   _heap_end   = committed.end();
 755 
 756   // Separated the asserts so that we know which one fires.
 757   assert(_heap_start != NULL, "heap bounds should look ok");
 758   assert(_heap_end != NULL, "heap bounds should look ok");
 759   assert(_heap_start < _heap_end, "heap bounds should look ok");
 760 
 761   // Reset all the marking data structures and any necessary flags
 762   reset_marking_state();
 763 
 764   if (verbose_low()) {
 765     gclog_or_tty->print_cr("[global] resetting");
 766   }
 767 
 768   // We do reset all of them, since different phases will use
 769   // different number of active threads. So, it's easiest to have all
 770   // of them ready.
 771   for (uint i = 0; i < _max_worker_id; ++i) {
 772     _tasks[i]->reset(_nextMarkBitMap);
 773   }
 774 
 775   // we need this to make sure that the flag is on during the evac
 776   // pause with initial mark piggy-backed
 777   set_concurrent_marking_in_progress();
 778 }
 779 
 780 
 781 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 782   _markStack.set_should_expand();
 783   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 784   if (clear_overflow) {
 785     clear_has_overflown();
 786   } else {
 787     assert(has_overflown(), "pre-condition");
 788   }
 789   _finger = _heap_start;
 790 
 791   for (uint i = 0; i < _max_worker_id; ++i) {
 792     CMTaskQueue* queue = _task_queues->queue(i);
 793     queue->set_empty();
 794   }
 795 }
 796 
 797 void ConcurrentMark::set_concurrency(uint active_tasks) {
 798   assert(active_tasks <= _max_worker_id, "we should not have more");
 799 
 800   _active_tasks = active_tasks;
 801   // Need to update the three data structures below according to the
 802   // number of active threads for this phase.
 803   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 804   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 805   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 806 }
 807 
 808 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 809   set_concurrency(active_tasks);
 810 
 811   _concurrent = concurrent;
 812   // We propagate this to all tasks, not just the active ones.
 813   for (uint i = 0; i < _max_worker_id; ++i)
 814     _tasks[i]->set_concurrent(concurrent);
 815 
 816   if (concurrent) {
 817     set_concurrent_marking_in_progress();
 818   } else {
 819     // We currently assume that the concurrent flag has been set to
 820     // false before we start remark. At this point we should also be
 821     // in a STW phase.
 822     assert(!concurrent_marking_in_progress(), "invariant");
 823     assert(out_of_regions(),
 824            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 825                    p2i(_finger), p2i(_heap_end)));
 826     update_g1_committed(true);
 827   }
 828 }
 829 
 830 void ConcurrentMark::set_non_marking_state() {
 831   // We set the global marking state to some default values when we're
 832   // not doing marking.
 833   reset_marking_state();
 834   _active_tasks = 0;
 835   clear_concurrent_marking_in_progress();
 836 }
 837 
 838 ConcurrentMark::~ConcurrentMark() {
 839   // The ConcurrentMark instance is never freed.
 840   ShouldNotReachHere();
 841 }
 842 
 843 void ConcurrentMark::clearNextBitmap() {
 844   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 845   G1CollectorPolicy* g1p = g1h->g1_policy();
 846 
 847   // Make sure that the concurrent mark thread looks to still be in
 848   // the current cycle.
 849   guarantee(cmThread()->during_cycle(), "invariant");
 850 
 851   // We are finishing up the current cycle by clearing the next
 852   // marking bitmap and getting it ready for the next cycle. During
 853   // this time no other cycle can start. So, let's make sure that this
 854   // is the case.
 855   guarantee(!g1h->mark_in_progress(), "invariant");
 856 
 857   // clear the mark bitmap (no grey objects to start with).
 858   // We need to do this in chunks and offer to yield in between
 859   // each chunk.
 860   HeapWord* start  = _nextMarkBitMap->startWord();
 861   HeapWord* end    = _nextMarkBitMap->endWord();
 862   HeapWord* cur    = start;
 863   size_t chunkSize = M;
 864   while (cur < end) {
 865     HeapWord* next = cur + chunkSize;
 866     if (next > end) {
 867       next = end;
 868     }
 869     MemRegion mr(cur,next);
 870     _nextMarkBitMap->clearRange(mr);
 871     cur = next;
 872     do_yield_check();
 873 
 874     // Repeat the asserts from above. We'll do them as asserts here to
 875     // minimize their overhead on the product. However, we'll have
 876     // them as guarantees at the beginning / end of the bitmap
 877     // clearing to get some checking in the product.
 878     assert(cmThread()->during_cycle(), "invariant");
 879     assert(!g1h->mark_in_progress(), "invariant");
 880   }
 881 
 882   // Clear the liveness counting data
 883   clear_all_count_data();
 884 
 885   // Repeat the asserts from above.
 886   guarantee(cmThread()->during_cycle(), "invariant");
 887   guarantee(!g1h->mark_in_progress(), "invariant");
 888 }
 889 
 890 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 891 public:
 892   bool doHeapRegion(HeapRegion* r) {
 893     if (!r->continuesHumongous()) {
 894       r->note_start_of_marking();
 895     }
 896     return false;
 897   }
 898 };
 899 
 900 void ConcurrentMark::checkpointRootsInitialPre() {
 901   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 902   G1CollectorPolicy* g1p = g1h->g1_policy();
 903 
 904   _has_aborted = false;
 905 
 906 #ifndef PRODUCT
 907   if (G1PrintReachableAtInitialMark) {
 908     print_reachable("at-cycle-start",
 909                     VerifyOption_G1UsePrevMarking, true /* all */);
 910   }
 911 #endif
 912 
 913   // Initialize marking structures. This has to be done in a STW phase.
 914   reset();
 915 
 916   // For each region note start of marking.
 917   NoteStartOfMarkHRClosure startcl;
 918   g1h->heap_region_iterate(&startcl);
 919 }
 920 
 921 
 922 void ConcurrentMark::checkpointRootsInitialPost() {
 923   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 924 
 925   // If we force an overflow during remark, the remark operation will
 926   // actually abort and we'll restart concurrent marking. If we always
 927   // force an overflow during remark we'll never actually complete the
 928   // marking phase. So, we initialize this here, at the start of the
 929   // cycle, so that at the remaining overflow number will decrease at
 930   // every remark and we'll eventually not need to cause one.
 931   force_overflow_stw()->init();
 932 
 933   // Start Concurrent Marking weak-reference discovery.
 934   ReferenceProcessor* rp = g1h->ref_processor_cm();
 935   // enable ("weak") refs discovery
 936   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 937   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 938 
 939   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 940   // This is the start of  the marking cycle, we're expected all
 941   // threads to have SATB queues with active set to false.
 942   satb_mq_set.set_active_all_threads(true, /* new active value */
 943                                      false /* expected_active */);
 944 
 945   _root_regions.prepare_for_scan();
 946 
 947   // update_g1_committed() will be called at the end of an evac pause
 948   // when marking is on. So, it's also called at the end of the
 949   // initial-mark pause to update the heap end, if the heap expands
 950   // during it. No need to call it here.
 951 }
 952 
 953 /*
 954  * Notice that in the next two methods, we actually leave the STS
 955  * during the barrier sync and join it immediately afterwards. If we
 956  * do not do this, the following deadlock can occur: one thread could
 957  * be in the barrier sync code, waiting for the other thread to also
 958  * sync up, whereas another one could be trying to yield, while also
 959  * waiting for the other threads to sync up too.
 960  *
 961  * Note, however, that this code is also used during remark and in
 962  * this case we should not attempt to leave / enter the STS, otherwise
 963  * we'll either hit an assert (debug / fastdebug) or deadlock
 964  * (product). So we should only leave / enter the STS if we are
 965  * operating concurrently.
 966  *
 967  * Because the thread that does the sync barrier has left the STS, it
 968  * is possible to be suspended for a Full GC or an evacuation pause
 969  * could occur. This is actually safe, since the entering the sync
 970  * barrier is one of the last things do_marking_step() does, and it
 971  * doesn't manipulate any data structures afterwards.
 972  */
 973 
 974 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 975   if (verbose_low()) {
 976     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 977   }
 978 
 979   if (concurrent()) {
 980     SuspendibleThreadSet::leave();
 981   }
 982 
 983   bool barrier_aborted = !_first_overflow_barrier_sync.enter();
 984 
 985   if (concurrent()) {
 986     SuspendibleThreadSet::join();
 987   }
 988   // at this point everyone should have synced up and not be doing any
 989   // more work
 990 
 991   if (verbose_low()) {
 992     if (barrier_aborted) {
 993       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
 994     } else {
 995       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 996     }
 997   }
 998 
 999   if (barrier_aborted) {
1000     // If the barrier aborted we ignore the overflow condition and
1001     // just abort the whole marking phase as quickly as possible.
1002     return;
1003   }
1004 
1005   // If we're executing the concurrent phase of marking, reset the marking
1006   // state; otherwise the marking state is reset after reference processing,
1007   // during the remark pause.
1008   // If we reset here as a result of an overflow during the remark we will
1009   // see assertion failures from any subsequent set_concurrency_and_phase()
1010   // calls.
1011   if (concurrent()) {
1012     // let the task associated with with worker 0 do this
1013     if (worker_id == 0) {
1014       // task 0 is responsible for clearing the global data structures
1015       // We should be here because of an overflow. During STW we should
1016       // not clear the overflow flag since we rely on it being true when
1017       // we exit this method to abort the pause and restart concurrent
1018       // marking.
1019       reset_marking_state(true /* clear_overflow */);
1020       force_overflow()->update();
1021 
1022       if (G1Log::fine()) {
1023         gclog_or_tty->date_stamp(PrintGCDateStamps);
1024         gclog_or_tty->stamp(PrintGCTimeStamps);
1025         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1026       }
1027     }
1028   }
1029 
1030   // after this, each task should reset its own data structures then
1031   // then go into the second barrier
1032 }
1033 
1034 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1035   if (verbose_low()) {
1036     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1037   }
1038 
1039   if (concurrent()) {
1040     SuspendibleThreadSet::leave();
1041   }
1042 
1043   bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1044 
1045   if (concurrent()) {
1046     SuspendibleThreadSet::join();
1047   }
1048   // at this point everything should be re-initialized and ready to go
1049 
1050   if (verbose_low()) {
1051     if (barrier_aborted) {
1052       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1053     } else {
1054       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1055     }
1056   }
1057 }
1058 
1059 #ifndef PRODUCT
1060 void ForceOverflowSettings::init() {
1061   _num_remaining = G1ConcMarkForceOverflow;
1062   _force = false;
1063   update();
1064 }
1065 
1066 void ForceOverflowSettings::update() {
1067   if (_num_remaining > 0) {
1068     _num_remaining -= 1;
1069     _force = true;
1070   } else {
1071     _force = false;
1072   }
1073 }
1074 
1075 bool ForceOverflowSettings::should_force() {
1076   if (_force) {
1077     _force = false;
1078     return true;
1079   } else {
1080     return false;
1081   }
1082 }
1083 #endif // !PRODUCT
1084 
1085 class CMConcurrentMarkingTask: public AbstractGangTask {
1086 private:
1087   ConcurrentMark*       _cm;
1088   ConcurrentMarkThread* _cmt;
1089 
1090 public:
1091   void work(uint worker_id) {
1092     assert(Thread::current()->is_ConcurrentGC_thread(),
1093            "this should only be done by a conc GC thread");
1094     ResourceMark rm;
1095 
1096     double start_vtime = os::elapsedVTime();
1097 
1098     SuspendibleThreadSet::join();
1099 
1100     assert(worker_id < _cm->active_tasks(), "invariant");
1101     CMTask* the_task = _cm->task(worker_id);
1102     the_task->record_start_time();
1103     if (!_cm->has_aborted()) {
1104       do {
1105         double start_vtime_sec = os::elapsedVTime();
1106         double start_time_sec = os::elapsedTime();
1107         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1108 
1109         the_task->do_marking_step(mark_step_duration_ms,
1110                                   true  /* do_termination */,
1111                                   false /* is_serial*/);
1112 
1113         double end_time_sec = os::elapsedTime();
1114         double end_vtime_sec = os::elapsedVTime();
1115         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1116         double elapsed_time_sec = end_time_sec - start_time_sec;
1117         _cm->clear_has_overflown();
1118 
1119         bool ret = _cm->do_yield_check(worker_id);
1120 
1121         jlong sleep_time_ms;
1122         if (!_cm->has_aborted() && the_task->has_aborted()) {
1123           sleep_time_ms =
1124             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1125           SuspendibleThreadSet::leave();
1126           os::sleep(Thread::current(), sleep_time_ms, false);
1127           SuspendibleThreadSet::join();
1128         }
1129         double end_time2_sec = os::elapsedTime();
1130         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1131 
1132 #if 0
1133           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1134                                  "overhead %1.4lf",
1135                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1136                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1137           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1138                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1139 #endif
1140       } while (!_cm->has_aborted() && the_task->has_aborted());
1141     }
1142     the_task->record_end_time();
1143     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1144 
1145     SuspendibleThreadSet::leave();
1146 
1147     double end_vtime = os::elapsedVTime();
1148     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1149   }
1150 
1151   CMConcurrentMarkingTask(ConcurrentMark* cm,
1152                           ConcurrentMarkThread* cmt) :
1153       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1154 
1155   ~CMConcurrentMarkingTask() { }
1156 };
1157 
1158 // Calculates the number of active workers for a concurrent
1159 // phase.
1160 uint ConcurrentMark::calc_parallel_marking_threads() {
1161   if (G1CollectedHeap::use_parallel_gc_threads()) {
1162     uint n_conc_workers = 0;
1163     if (!UseDynamicNumberOfGCThreads ||
1164         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1165          !ForceDynamicNumberOfGCThreads)) {
1166       n_conc_workers = max_parallel_marking_threads();
1167     } else {
1168       n_conc_workers =
1169         AdaptiveSizePolicy::calc_default_active_workers(
1170                                      max_parallel_marking_threads(),
1171                                      1, /* Minimum workers */
1172                                      parallel_marking_threads(),
1173                                      Threads::number_of_non_daemon_threads());
1174       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1175       // that scaling has already gone into "_max_parallel_marking_threads".
1176     }
1177     assert(n_conc_workers > 0, "Always need at least 1");
1178     return n_conc_workers;
1179   }
1180   // If we are not running with any parallel GC threads we will not
1181   // have spawned any marking threads either. Hence the number of
1182   // concurrent workers should be 0.
1183   return 0;
1184 }
1185 
1186 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1187   // Currently, only survivors can be root regions.
1188   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1189   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1190 
1191   const uintx interval = PrefetchScanIntervalInBytes;
1192   HeapWord* curr = hr->bottom();
1193   const HeapWord* end = hr->top();
1194   while (curr < end) {
1195     Prefetch::read(curr, interval);
1196     oop obj = oop(curr);
1197     int size = obj->oop_iterate(&cl);
1198     assert(size == obj->size(), "sanity");
1199     curr += size;
1200   }
1201 }
1202 
1203 class CMRootRegionScanTask : public AbstractGangTask {
1204 private:
1205   ConcurrentMark* _cm;
1206 
1207 public:
1208   CMRootRegionScanTask(ConcurrentMark* cm) :
1209     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1210 
1211   void work(uint worker_id) {
1212     assert(Thread::current()->is_ConcurrentGC_thread(),
1213            "this should only be done by a conc GC thread");
1214 
1215     CMRootRegions* root_regions = _cm->root_regions();
1216     HeapRegion* hr = root_regions->claim_next();
1217     while (hr != NULL) {
1218       _cm->scanRootRegion(hr, worker_id);
1219       hr = root_regions->claim_next();
1220     }
1221   }
1222 };
1223 
1224 void ConcurrentMark::scanRootRegions() {
1225   // scan_in_progress() will have been set to true only if there was
1226   // at least one root region to scan. So, if it's false, we
1227   // should not attempt to do any further work.
1228   if (root_regions()->scan_in_progress()) {
1229     _parallel_marking_threads = calc_parallel_marking_threads();
1230     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1231            "Maximum number of marking threads exceeded");
1232     uint active_workers = MAX2(1U, parallel_marking_threads());
1233 
1234     CMRootRegionScanTask task(this);
1235     if (use_parallel_marking_threads()) {
1236       _parallel_workers->set_active_workers((int) active_workers);
1237       _parallel_workers->run_task(&task);
1238     } else {
1239       task.work(0);
1240     }
1241 
1242     // It's possible that has_aborted() is true here without actually
1243     // aborting the survivor scan earlier. This is OK as it's
1244     // mainly used for sanity checking.
1245     root_regions()->scan_finished();
1246   }
1247 }
1248 
1249 void ConcurrentMark::markFromRoots() {
1250   // we might be tempted to assert that:
1251   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1252   //        "inconsistent argument?");
1253   // However that wouldn't be right, because it's possible that
1254   // a safepoint is indeed in progress as a younger generation
1255   // stop-the-world GC happens even as we mark in this generation.
1256 
1257   _restart_for_overflow = false;
1258   force_overflow_conc()->init();
1259 
1260   // _g1h has _n_par_threads
1261   _parallel_marking_threads = calc_parallel_marking_threads();
1262   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1263     "Maximum number of marking threads exceeded");
1264 
1265   uint active_workers = MAX2(1U, parallel_marking_threads());
1266 
1267   // Parallel task terminator is set in "set_concurrency_and_phase()"
1268   set_concurrency_and_phase(active_workers, true /* concurrent */);
1269 
1270   CMConcurrentMarkingTask markingTask(this, cmThread());
1271   if (use_parallel_marking_threads()) {
1272     _parallel_workers->set_active_workers((int)active_workers);
1273     // Don't set _n_par_threads because it affects MT in process_strong_roots()
1274     // and the decisions on that MT processing is made elsewhere.
1275     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1276     _parallel_workers->run_task(&markingTask);
1277   } else {
1278     markingTask.work(0);
1279   }
1280   print_stats();
1281 }
1282 
1283 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1284   // world is stopped at this checkpoint
1285   assert(SafepointSynchronize::is_at_safepoint(),
1286          "world should be stopped");
1287 
1288   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1289 
1290   // If a full collection has happened, we shouldn't do this.
1291   if (has_aborted()) {
1292     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1293     return;
1294   }
1295 
1296   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1297 
1298   if (VerifyDuringGC) {
1299     HandleMark hm;  // handle scope
1300     Universe::heap()->prepare_for_verify();
1301     Universe::verify(VerifyOption_G1UsePrevMarking,
1302                      " VerifyDuringGC:(before)");
1303   }
1304   g1h->check_bitmaps("Remark Start");
1305 
1306   G1CollectorPolicy* g1p = g1h->g1_policy();
1307   g1p->record_concurrent_mark_remark_start();
1308 
1309   double start = os::elapsedTime();
1310 
1311   checkpointRootsFinalWork();
1312 
1313   double mark_work_end = os::elapsedTime();
1314 
1315   weakRefsWork(clear_all_soft_refs);
1316 
1317   if (has_overflown()) {
1318     // Oops.  We overflowed.  Restart concurrent marking.
1319     _restart_for_overflow = true;
1320     if (G1TraceMarkStackOverflow) {
1321       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1322     }
1323 
1324     // Verify the heap w.r.t. the previous marking bitmap.
1325     if (VerifyDuringGC) {
1326       HandleMark hm;  // handle scope
1327       Universe::heap()->prepare_for_verify();
1328       Universe::verify(VerifyOption_G1UsePrevMarking,
1329                        " VerifyDuringGC:(overflow)");
1330     }
1331 
1332     // Clear the marking state because we will be restarting
1333     // marking due to overflowing the global mark stack.
1334     reset_marking_state();
1335   } else {
1336     // Aggregate the per-task counting data that we have accumulated
1337     // while marking.
1338     aggregate_count_data();
1339 
1340     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1341     // We're done with marking.
1342     // This is the end of  the marking cycle, we're expected all
1343     // threads to have SATB queues with active set to true.
1344     satb_mq_set.set_active_all_threads(false, /* new active value */
1345                                        true /* expected_active */);
1346 
1347     if (VerifyDuringGC) {
1348       HandleMark hm;  // handle scope
1349       Universe::heap()->prepare_for_verify();
1350       Universe::verify(VerifyOption_G1UseNextMarking,
1351                        " VerifyDuringGC:(after)");
1352     }
1353     g1h->check_bitmaps("Remark End");
1354     assert(!restart_for_overflow(), "sanity");
1355     // Completely reset the marking state since marking completed
1356     set_non_marking_state();
1357   }
1358 
1359   // Expand the marking stack, if we have to and if we can.
1360   if (_markStack.should_expand()) {
1361     _markStack.expand();
1362   }
1363 
1364   // Statistics
1365   double now = os::elapsedTime();
1366   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1367   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1368   _remark_times.add((now - start) * 1000.0);
1369 
1370   g1p->record_concurrent_mark_remark_end();
1371 
1372   G1CMIsAliveClosure is_alive(g1h);
1373   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1374 }
1375 
1376 // Base class of the closures that finalize and verify the
1377 // liveness counting data.
1378 class CMCountDataClosureBase: public HeapRegionClosure {
1379 protected:
1380   G1CollectedHeap* _g1h;
1381   ConcurrentMark* _cm;
1382   CardTableModRefBS* _ct_bs;
1383 
1384   BitMap* _region_bm;
1385   BitMap* _card_bm;
1386 
1387   // Takes a region that's not empty (i.e., it has at least one
1388   // live object in it and sets its corresponding bit on the region
1389   // bitmap to 1. If the region is "starts humongous" it will also set
1390   // to 1 the bits on the region bitmap that correspond to its
1391   // associated "continues humongous" regions.
1392   void set_bit_for_region(HeapRegion* hr) {
1393     assert(!hr->continuesHumongous(), "should have filtered those out");
1394 
1395     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1396     if (!hr->startsHumongous()) {
1397       // Normal (non-humongous) case: just set the bit.
1398       _region_bm->par_at_put(index, true);
1399     } else {
1400       // Starts humongous case: calculate how many regions are part of
1401       // this humongous region and then set the bit range.
1402       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1403       _region_bm->par_at_put_range(index, end_index, true);
1404     }
1405   }
1406 
1407 public:
1408   CMCountDataClosureBase(G1CollectedHeap* g1h,
1409                          BitMap* region_bm, BitMap* card_bm):
1410     _g1h(g1h), _cm(g1h->concurrent_mark()),
1411     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1412     _region_bm(region_bm), _card_bm(card_bm) { }
1413 };
1414 
1415 // Closure that calculates the # live objects per region. Used
1416 // for verification purposes during the cleanup pause.
1417 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1418   CMBitMapRO* _bm;
1419   size_t _region_marked_bytes;
1420 
1421 public:
1422   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1423                          BitMap* region_bm, BitMap* card_bm) :
1424     CMCountDataClosureBase(g1h, region_bm, card_bm),
1425     _bm(bm), _region_marked_bytes(0) { }
1426 
1427   bool doHeapRegion(HeapRegion* hr) {
1428 
1429     if (hr->continuesHumongous()) {
1430       // We will ignore these here and process them when their
1431       // associated "starts humongous" region is processed (see
1432       // set_bit_for_heap_region()). Note that we cannot rely on their
1433       // associated "starts humongous" region to have their bit set to
1434       // 1 since, due to the region chunking in the parallel region
1435       // iteration, a "continues humongous" region might be visited
1436       // before its associated "starts humongous".
1437       return false;
1438     }
1439 
1440     HeapWord* ntams = hr->next_top_at_mark_start();
1441     HeapWord* start = hr->bottom();
1442 
1443     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1444            err_msg("Preconditions not met - "
1445                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1446                    p2i(start), p2i(ntams), p2i(hr->end())));
1447 
1448     // Find the first marked object at or after "start".
1449     start = _bm->getNextMarkedWordAddress(start, ntams);
1450 
1451     size_t marked_bytes = 0;
1452 
1453     while (start < ntams) {
1454       oop obj = oop(start);
1455       int obj_sz = obj->size();
1456       HeapWord* obj_end = start + obj_sz;
1457 
1458       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1459       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1460 
1461       // Note: if we're looking at the last region in heap - obj_end
1462       // could be actually just beyond the end of the heap; end_idx
1463       // will then correspond to a (non-existent) card that is also
1464       // just beyond the heap.
1465       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1466         // end of object is not card aligned - increment to cover
1467         // all the cards spanned by the object
1468         end_idx += 1;
1469       }
1470 
1471       // Set the bits in the card BM for the cards spanned by this object.
1472       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1473 
1474       // Add the size of this object to the number of marked bytes.
1475       marked_bytes += (size_t)obj_sz * HeapWordSize;
1476 
1477       // Find the next marked object after this one.
1478       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1479     }
1480 
1481     // Mark the allocated-since-marking portion...
1482     HeapWord* top = hr->top();
1483     if (ntams < top) {
1484       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1485       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1486 
1487       // Note: if we're looking at the last region in heap - top
1488       // could be actually just beyond the end of the heap; end_idx
1489       // will then correspond to a (non-existent) card that is also
1490       // just beyond the heap.
1491       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1492         // end of object is not card aligned - increment to cover
1493         // all the cards spanned by the object
1494         end_idx += 1;
1495       }
1496       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1497 
1498       // This definitely means the region has live objects.
1499       set_bit_for_region(hr);
1500     }
1501 
1502     // Update the live region bitmap.
1503     if (marked_bytes > 0) {
1504       set_bit_for_region(hr);
1505     }
1506 
1507     // Set the marked bytes for the current region so that
1508     // it can be queried by a calling verification routine
1509     _region_marked_bytes = marked_bytes;
1510 
1511     return false;
1512   }
1513 
1514   size_t region_marked_bytes() const { return _region_marked_bytes; }
1515 };
1516 
1517 // Heap region closure used for verifying the counting data
1518 // that was accumulated concurrently and aggregated during
1519 // the remark pause. This closure is applied to the heap
1520 // regions during the STW cleanup pause.
1521 
1522 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1523   G1CollectedHeap* _g1h;
1524   ConcurrentMark* _cm;
1525   CalcLiveObjectsClosure _calc_cl;
1526   BitMap* _region_bm;   // Region BM to be verified
1527   BitMap* _card_bm;     // Card BM to be verified
1528   bool _verbose;        // verbose output?
1529 
1530   BitMap* _exp_region_bm; // Expected Region BM values
1531   BitMap* _exp_card_bm;   // Expected card BM values
1532 
1533   int _failures;
1534 
1535 public:
1536   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1537                                 BitMap* region_bm,
1538                                 BitMap* card_bm,
1539                                 BitMap* exp_region_bm,
1540                                 BitMap* exp_card_bm,
1541                                 bool verbose) :
1542     _g1h(g1h), _cm(g1h->concurrent_mark()),
1543     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1544     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1545     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1546     _failures(0) { }
1547 
1548   int failures() const { return _failures; }
1549 
1550   bool doHeapRegion(HeapRegion* hr) {
1551     if (hr->continuesHumongous()) {
1552       // We will ignore these here and process them when their
1553       // associated "starts humongous" region is processed (see
1554       // set_bit_for_heap_region()). Note that we cannot rely on their
1555       // associated "starts humongous" region to have their bit set to
1556       // 1 since, due to the region chunking in the parallel region
1557       // iteration, a "continues humongous" region might be visited
1558       // before its associated "starts humongous".
1559       return false;
1560     }
1561 
1562     int failures = 0;
1563 
1564     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1565     // this region and set the corresponding bits in the expected region
1566     // and card bitmaps.
1567     bool res = _calc_cl.doHeapRegion(hr);
1568     assert(res == false, "should be continuing");
1569 
1570     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1571                     Mutex::_no_safepoint_check_flag);
1572 
1573     // Verify the marked bytes for this region.
1574     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1575     size_t act_marked_bytes = hr->next_marked_bytes();
1576 
1577     // We're not OK if expected marked bytes > actual marked bytes. It means
1578     // we have missed accounting some objects during the actual marking.
1579     if (exp_marked_bytes > act_marked_bytes) {
1580       if (_verbose) {
1581         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1582                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1583                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1584       }
1585       failures += 1;
1586     }
1587 
1588     // Verify the bit, for this region, in the actual and expected
1589     // (which was just calculated) region bit maps.
1590     // We're not OK if the bit in the calculated expected region
1591     // bitmap is set and the bit in the actual region bitmap is not.
1592     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1593 
1594     bool expected = _exp_region_bm->at(index);
1595     bool actual = _region_bm->at(index);
1596     if (expected && !actual) {
1597       if (_verbose) {
1598         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1599                                "expected: %s, actual: %s",
1600                                hr->hrs_index(),
1601                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1602       }
1603       failures += 1;
1604     }
1605 
1606     // Verify that the card bit maps for the cards spanned by the current
1607     // region match. We have an error if we have a set bit in the expected
1608     // bit map and the corresponding bit in the actual bitmap is not set.
1609 
1610     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1611     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1612 
1613     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1614       expected = _exp_card_bm->at(i);
1615       actual = _card_bm->at(i);
1616 
1617       if (expected && !actual) {
1618         if (_verbose) {
1619           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1620                                  "expected: %s, actual: %s",
1621                                  hr->hrs_index(), i,
1622                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1623         }
1624         failures += 1;
1625       }
1626     }
1627 
1628     if (failures > 0 && _verbose)  {
1629       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1630                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1631                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1632                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1633     }
1634 
1635     _failures += failures;
1636 
1637     // We could stop iteration over the heap when we
1638     // find the first violating region by returning true.
1639     return false;
1640   }
1641 };
1642 
1643 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1644 protected:
1645   G1CollectedHeap* _g1h;
1646   ConcurrentMark* _cm;
1647   BitMap* _actual_region_bm;
1648   BitMap* _actual_card_bm;
1649 
1650   uint    _n_workers;
1651 
1652   BitMap* _expected_region_bm;
1653   BitMap* _expected_card_bm;
1654 
1655   int  _failures;
1656   bool _verbose;
1657 
1658 public:
1659   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1660                             BitMap* region_bm, BitMap* card_bm,
1661                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1662     : AbstractGangTask("G1 verify final counting"),
1663       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1664       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1665       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1666       _failures(0), _verbose(false),
1667       _n_workers(0) {
1668     assert(VerifyDuringGC, "don't call this otherwise");
1669 
1670     // Use the value already set as the number of active threads
1671     // in the call to run_task().
1672     if (G1CollectedHeap::use_parallel_gc_threads()) {
1673       assert( _g1h->workers()->active_workers() > 0,
1674         "Should have been previously set");
1675       _n_workers = _g1h->workers()->active_workers();
1676     } else {
1677       _n_workers = 1;
1678     }
1679 
1680     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1681     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1682 
1683     _verbose = _cm->verbose_medium();
1684   }
1685 
1686   void work(uint worker_id) {
1687     assert(worker_id < _n_workers, "invariant");
1688 
1689     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1690                                             _actual_region_bm, _actual_card_bm,
1691                                             _expected_region_bm,
1692                                             _expected_card_bm,
1693                                             _verbose);
1694 
1695     if (G1CollectedHeap::use_parallel_gc_threads()) {
1696       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1697                                             worker_id,
1698                                             _n_workers,
1699                                             HeapRegion::VerifyCountClaimValue);
1700     } else {
1701       _g1h->heap_region_iterate(&verify_cl);
1702     }
1703 
1704     Atomic::add(verify_cl.failures(), &_failures);
1705   }
1706 
1707   int failures() const { return _failures; }
1708 };
1709 
1710 // Closure that finalizes the liveness counting data.
1711 // Used during the cleanup pause.
1712 // Sets the bits corresponding to the interval [NTAMS, top]
1713 // (which contains the implicitly live objects) in the
1714 // card liveness bitmap. Also sets the bit for each region,
1715 // containing live data, in the region liveness bitmap.
1716 
1717 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1718  public:
1719   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1720                               BitMap* region_bm,
1721                               BitMap* card_bm) :
1722     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1723 
1724   bool doHeapRegion(HeapRegion* hr) {
1725 
1726     if (hr->continuesHumongous()) {
1727       // We will ignore these here and process them when their
1728       // associated "starts humongous" region is processed (see
1729       // set_bit_for_heap_region()). Note that we cannot rely on their
1730       // associated "starts humongous" region to have their bit set to
1731       // 1 since, due to the region chunking in the parallel region
1732       // iteration, a "continues humongous" region might be visited
1733       // before its associated "starts humongous".
1734       return false;
1735     }
1736 
1737     HeapWord* ntams = hr->next_top_at_mark_start();
1738     HeapWord* top   = hr->top();
1739 
1740     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1741 
1742     // Mark the allocated-since-marking portion...
1743     if (ntams < top) {
1744       // This definitely means the region has live objects.
1745       set_bit_for_region(hr);
1746 
1747       // Now set the bits in the card bitmap for [ntams, top)
1748       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1749       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1750 
1751       // Note: if we're looking at the last region in heap - top
1752       // could be actually just beyond the end of the heap; end_idx
1753       // will then correspond to a (non-existent) card that is also
1754       // just beyond the heap.
1755       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1756         // end of object is not card aligned - increment to cover
1757         // all the cards spanned by the object
1758         end_idx += 1;
1759       }
1760 
1761       assert(end_idx <= _card_bm->size(),
1762              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1763                      end_idx, _card_bm->size()));
1764       assert(start_idx < _card_bm->size(),
1765              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1766                      start_idx, _card_bm->size()));
1767 
1768       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1769     }
1770 
1771     // Set the bit for the region if it contains live data
1772     if (hr->next_marked_bytes() > 0) {
1773       set_bit_for_region(hr);
1774     }
1775 
1776     return false;
1777   }
1778 };
1779 
1780 class G1ParFinalCountTask: public AbstractGangTask {
1781 protected:
1782   G1CollectedHeap* _g1h;
1783   ConcurrentMark* _cm;
1784   BitMap* _actual_region_bm;
1785   BitMap* _actual_card_bm;
1786 
1787   uint    _n_workers;
1788 
1789 public:
1790   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1791     : AbstractGangTask("G1 final counting"),
1792       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1793       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1794       _n_workers(0) {
1795     // Use the value already set as the number of active threads
1796     // in the call to run_task().
1797     if (G1CollectedHeap::use_parallel_gc_threads()) {
1798       assert( _g1h->workers()->active_workers() > 0,
1799         "Should have been previously set");
1800       _n_workers = _g1h->workers()->active_workers();
1801     } else {
1802       _n_workers = 1;
1803     }
1804   }
1805 
1806   void work(uint worker_id) {
1807     assert(worker_id < _n_workers, "invariant");
1808 
1809     FinalCountDataUpdateClosure final_update_cl(_g1h,
1810                                                 _actual_region_bm,
1811                                                 _actual_card_bm);
1812 
1813     if (G1CollectedHeap::use_parallel_gc_threads()) {
1814       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1815                                             worker_id,
1816                                             _n_workers,
1817                                             HeapRegion::FinalCountClaimValue);
1818     } else {
1819       _g1h->heap_region_iterate(&final_update_cl);
1820     }
1821   }
1822 };
1823 
1824 class G1ParNoteEndTask;
1825 
1826 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1827   G1CollectedHeap* _g1;
1828   size_t _max_live_bytes;
1829   uint _regions_claimed;
1830   size_t _freed_bytes;
1831   FreeRegionList* _local_cleanup_list;
1832   HeapRegionSetCount _old_regions_removed;
1833   HeapRegionSetCount _humongous_regions_removed;
1834   HRRSCleanupTask* _hrrs_cleanup_task;
1835   double _claimed_region_time;
1836   double _max_region_time;
1837 
1838 public:
1839   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1840                              FreeRegionList* local_cleanup_list,
1841                              HRRSCleanupTask* hrrs_cleanup_task) :
1842     _g1(g1),
1843     _max_live_bytes(0), _regions_claimed(0),
1844     _freed_bytes(0),
1845     _claimed_region_time(0.0), _max_region_time(0.0),
1846     _local_cleanup_list(local_cleanup_list),
1847     _old_regions_removed(),
1848     _humongous_regions_removed(),
1849     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1850 
1851   size_t freed_bytes() { return _freed_bytes; }
1852   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1853   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1854 
1855   bool doHeapRegion(HeapRegion *hr) {
1856     if (hr->continuesHumongous()) {
1857       return false;
1858     }
1859     // We use a claim value of zero here because all regions
1860     // were claimed with value 1 in the FinalCount task.
1861     _g1->reset_gc_time_stamps(hr);
1862     double start = os::elapsedTime();
1863     _regions_claimed++;
1864     hr->note_end_of_marking();
1865     _max_live_bytes += hr->max_live_bytes();
1866 
1867     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1868       _freed_bytes += hr->used();
1869       hr->set_containing_set(NULL);
1870       if (hr->isHumongous()) {
1871         assert(hr->startsHumongous(), "we should only see starts humongous");
1872         _humongous_regions_removed.increment(1u, hr->capacity());
1873         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1874       } else {
1875         _old_regions_removed.increment(1u, hr->capacity());
1876         _g1->free_region(hr, _local_cleanup_list, true);
1877       }
1878     } else {
1879       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1880     }
1881 
1882     double region_time = (os::elapsedTime() - start);
1883     _claimed_region_time += region_time;
1884     if (region_time > _max_region_time) {
1885       _max_region_time = region_time;
1886     }
1887     return false;
1888   }
1889 
1890   size_t max_live_bytes() { return _max_live_bytes; }
1891   uint regions_claimed() { return _regions_claimed; }
1892   double claimed_region_time_sec() { return _claimed_region_time; }
1893   double max_region_time_sec() { return _max_region_time; }
1894 };
1895 
1896 class G1ParNoteEndTask: public AbstractGangTask {
1897   friend class G1NoteEndOfConcMarkClosure;
1898 
1899 protected:
1900   G1CollectedHeap* _g1h;
1901   size_t _max_live_bytes;
1902   size_t _freed_bytes;
1903   FreeRegionList* _cleanup_list;
1904 
1905 public:
1906   G1ParNoteEndTask(G1CollectedHeap* g1h,
1907                    FreeRegionList* cleanup_list) :
1908     AbstractGangTask("G1 note end"), _g1h(g1h),
1909     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1910 
1911   void work(uint worker_id) {
1912     double start = os::elapsedTime();
1913     FreeRegionList local_cleanup_list("Local Cleanup List");
1914     HRRSCleanupTask hrrs_cleanup_task;
1915     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1916                                            &hrrs_cleanup_task);
1917     if (G1CollectedHeap::use_parallel_gc_threads()) {
1918       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1919                                             _g1h->workers()->active_workers(),
1920                                             HeapRegion::NoteEndClaimValue);
1921     } else {
1922       _g1h->heap_region_iterate(&g1_note_end);
1923     }
1924     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1925 
1926     // Now update the lists
1927     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1928     {
1929       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1930       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1931       _max_live_bytes += g1_note_end.max_live_bytes();
1932       _freed_bytes += g1_note_end.freed_bytes();
1933 
1934       // If we iterate over the global cleanup list at the end of
1935       // cleanup to do this printing we will not guarantee to only
1936       // generate output for the newly-reclaimed regions (the list
1937       // might not be empty at the beginning of cleanup; we might
1938       // still be working on its previous contents). So we do the
1939       // printing here, before we append the new regions to the global
1940       // cleanup list.
1941 
1942       G1HRPrinter* hr_printer = _g1h->hr_printer();
1943       if (hr_printer->is_active()) {
1944         FreeRegionListIterator iter(&local_cleanup_list);
1945         while (iter.more_available()) {
1946           HeapRegion* hr = iter.get_next();
1947           hr_printer->cleanup(hr);
1948         }
1949       }
1950 
1951       _cleanup_list->add_ordered(&local_cleanup_list);
1952       assert(local_cleanup_list.is_empty(), "post-condition");
1953 
1954       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1955     }
1956   }
1957   size_t max_live_bytes() { return _max_live_bytes; }
1958   size_t freed_bytes() { return _freed_bytes; }
1959 };
1960 
1961 class G1ParScrubRemSetTask: public AbstractGangTask {
1962 protected:
1963   G1RemSet* _g1rs;
1964   BitMap* _region_bm;
1965   BitMap* _card_bm;
1966 public:
1967   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1968                        BitMap* region_bm, BitMap* card_bm) :
1969     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1970     _region_bm(region_bm), _card_bm(card_bm) { }
1971 
1972   void work(uint worker_id) {
1973     if (G1CollectedHeap::use_parallel_gc_threads()) {
1974       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1975                        HeapRegion::ScrubRemSetClaimValue);
1976     } else {
1977       _g1rs->scrub(_region_bm, _card_bm);
1978     }
1979   }
1980 
1981 };
1982 
1983 void ConcurrentMark::cleanup() {
1984   // world is stopped at this checkpoint
1985   assert(SafepointSynchronize::is_at_safepoint(),
1986          "world should be stopped");
1987   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1988 
1989   // If a full collection has happened, we shouldn't do this.
1990   if (has_aborted()) {
1991     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1992     return;
1993   }
1994 
1995   g1h->verify_region_sets_optional();
1996 
1997   if (VerifyDuringGC) {
1998     HandleMark hm;  // handle scope
1999     Universe::heap()->prepare_for_verify();
2000     Universe::verify(VerifyOption_G1UsePrevMarking,
2001                      " VerifyDuringGC:(before)");
2002   }
2003   g1h->check_bitmaps("Cleanup Start");
2004 
2005   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2006   g1p->record_concurrent_mark_cleanup_start();
2007 
2008   double start = os::elapsedTime();
2009 
2010   HeapRegionRemSet::reset_for_cleanup_tasks();
2011 
2012   uint n_workers;
2013 
2014   // Do counting once more with the world stopped for good measure.
2015   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2016 
2017   if (G1CollectedHeap::use_parallel_gc_threads()) {
2018    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2019            "sanity check");
2020 
2021     g1h->set_par_threads();
2022     n_workers = g1h->n_par_threads();
2023     assert(g1h->n_par_threads() == n_workers,
2024            "Should not have been reset");
2025     g1h->workers()->run_task(&g1_par_count_task);
2026     // Done with the parallel phase so reset to 0.
2027     g1h->set_par_threads(0);
2028 
2029     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2030            "sanity check");
2031   } else {
2032     n_workers = 1;
2033     g1_par_count_task.work(0);
2034   }
2035 
2036   if (VerifyDuringGC) {
2037     // Verify that the counting data accumulated during marking matches
2038     // that calculated by walking the marking bitmap.
2039 
2040     // Bitmaps to hold expected values
2041     BitMap expected_region_bm(_region_bm.size(), true);
2042     BitMap expected_card_bm(_card_bm.size(), true);
2043 
2044     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2045                                                  &_region_bm,
2046                                                  &_card_bm,
2047                                                  &expected_region_bm,
2048                                                  &expected_card_bm);
2049 
2050     if (G1CollectedHeap::use_parallel_gc_threads()) {
2051       g1h->set_par_threads((int)n_workers);
2052       g1h->workers()->run_task(&g1_par_verify_task);
2053       // Done with the parallel phase so reset to 0.
2054       g1h->set_par_threads(0);
2055 
2056       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2057              "sanity check");
2058     } else {
2059       g1_par_verify_task.work(0);
2060     }
2061 
2062     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2063   }
2064 
2065   size_t start_used_bytes = g1h->used();
2066   g1h->set_marking_complete();
2067 
2068   double count_end = os::elapsedTime();
2069   double this_final_counting_time = (count_end - start);
2070   _total_counting_time += this_final_counting_time;
2071 
2072   if (G1PrintRegionLivenessInfo) {
2073     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2074     _g1h->heap_region_iterate(&cl);
2075   }
2076 
2077   // Install newly created mark bitMap as "prev".
2078   swapMarkBitMaps();
2079 
2080   g1h->reset_gc_time_stamp();
2081 
2082   // Note end of marking in all heap regions.
2083   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2084   if (G1CollectedHeap::use_parallel_gc_threads()) {
2085     g1h->set_par_threads((int)n_workers);
2086     g1h->workers()->run_task(&g1_par_note_end_task);
2087     g1h->set_par_threads(0);
2088 
2089     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2090            "sanity check");
2091   } else {
2092     g1_par_note_end_task.work(0);
2093   }
2094   g1h->check_gc_time_stamps();
2095 
2096   if (!cleanup_list_is_empty()) {
2097     // The cleanup list is not empty, so we'll have to process it
2098     // concurrently. Notify anyone else that might be wanting free
2099     // regions that there will be more free regions coming soon.
2100     g1h->set_free_regions_coming();
2101   }
2102 
2103   // call below, since it affects the metric by which we sort the heap
2104   // regions.
2105   if (G1ScrubRemSets) {
2106     double rs_scrub_start = os::elapsedTime();
2107     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2108     if (G1CollectedHeap::use_parallel_gc_threads()) {
2109       g1h->set_par_threads((int)n_workers);
2110       g1h->workers()->run_task(&g1_par_scrub_rs_task);
2111       g1h->set_par_threads(0);
2112 
2113       assert(g1h->check_heap_region_claim_values(
2114                                             HeapRegion::ScrubRemSetClaimValue),
2115              "sanity check");
2116     } else {
2117       g1_par_scrub_rs_task.work(0);
2118     }
2119 
2120     double rs_scrub_end = os::elapsedTime();
2121     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2122     _total_rs_scrub_time += this_rs_scrub_time;
2123   }
2124 
2125   // this will also free any regions totally full of garbage objects,
2126   // and sort the regions.
2127   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2128 
2129   // Statistics.
2130   double end = os::elapsedTime();
2131   _cleanup_times.add((end - start) * 1000.0);
2132 
2133   if (G1Log::fine()) {
2134     g1h->print_size_transition(gclog_or_tty,
2135                                start_used_bytes,
2136                                g1h->used(),
2137                                g1h->capacity());
2138   }
2139 
2140   // Clean up will have freed any regions completely full of garbage.
2141   // Update the soft reference policy with the new heap occupancy.
2142   Universe::update_heap_info_at_gc();
2143 
2144   // We need to make this be a "collection" so any collection pause that
2145   // races with it goes around and waits for completeCleanup to finish.
2146   g1h->increment_total_collections();
2147 
2148   // We reclaimed old regions so we should calculate the sizes to make
2149   // sure we update the old gen/space data.
2150   g1h->g1mm()->update_sizes();
2151 
2152   if (VerifyDuringGC) {
2153     HandleMark hm;  // handle scope
2154     Universe::heap()->prepare_for_verify();
2155     Universe::verify(VerifyOption_G1UsePrevMarking,
2156                      " VerifyDuringGC:(after)");
2157   }
2158   g1h->check_bitmaps("Cleanup End");
2159 
2160   g1h->verify_region_sets_optional();
2161   g1h->trace_heap_after_concurrent_cycle();
2162 }
2163 
2164 void ConcurrentMark::completeCleanup() {
2165   if (has_aborted()) return;
2166 
2167   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2168 
2169   _cleanup_list.verify_optional();
2170   FreeRegionList tmp_free_list("Tmp Free List");
2171 
2172   if (G1ConcRegionFreeingVerbose) {
2173     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2174                            "cleanup list has %u entries",
2175                            _cleanup_list.length());
2176   }
2177 
2178   // Noone else should be accessing the _cleanup_list at this point,
2179   // so it's not necessary to take any locks
2180   while (!_cleanup_list.is_empty()) {
2181     HeapRegion* hr = _cleanup_list.remove_head();
2182     assert(hr != NULL, "Got NULL from a non-empty list");
2183     hr->par_clear();
2184     tmp_free_list.add_ordered(hr);
2185 
2186     // Instead of adding one region at a time to the secondary_free_list,
2187     // we accumulate them in the local list and move them a few at a
2188     // time. This also cuts down on the number of notify_all() calls
2189     // we do during this process. We'll also append the local list when
2190     // _cleanup_list is empty (which means we just removed the last
2191     // region from the _cleanup_list).
2192     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2193         _cleanup_list.is_empty()) {
2194       if (G1ConcRegionFreeingVerbose) {
2195         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2196                                "appending %u entries to the secondary_free_list, "
2197                                "cleanup list still has %u entries",
2198                                tmp_free_list.length(),
2199                                _cleanup_list.length());
2200       }
2201 
2202       {
2203         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2204         g1h->secondary_free_list_add(&tmp_free_list);
2205         SecondaryFreeList_lock->notify_all();
2206       }
2207 
2208       if (G1StressConcRegionFreeing) {
2209         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2210           os::sleep(Thread::current(), (jlong) 1, false);
2211         }
2212       }
2213     }
2214   }
2215   assert(tmp_free_list.is_empty(), "post-condition");
2216 }
2217 
2218 // Supporting Object and Oop closures for reference discovery
2219 // and processing in during marking
2220 
2221 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2222   HeapWord* addr = (HeapWord*)obj;
2223   return addr != NULL &&
2224          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2225 }
2226 
2227 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2228 // Uses the CMTask associated with a worker thread (for serial reference
2229 // processing the CMTask for worker 0 is used) to preserve (mark) and
2230 // trace referent objects.
2231 //
2232 // Using the CMTask and embedded local queues avoids having the worker
2233 // threads operating on the global mark stack. This reduces the risk
2234 // of overflowing the stack - which we would rather avoid at this late
2235 // state. Also using the tasks' local queues removes the potential
2236 // of the workers interfering with each other that could occur if
2237 // operating on the global stack.
2238 
2239 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2240   ConcurrentMark* _cm;
2241   CMTask*         _task;
2242   int             _ref_counter_limit;
2243   int             _ref_counter;
2244   bool            _is_serial;
2245  public:
2246   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2247     _cm(cm), _task(task), _is_serial(is_serial),
2248     _ref_counter_limit(G1RefProcDrainInterval) {
2249     assert(_ref_counter_limit > 0, "sanity");
2250     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2251     _ref_counter = _ref_counter_limit;
2252   }
2253 
2254   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2255   virtual void do_oop(      oop* p) { do_oop_work(p); }
2256 
2257   template <class T> void do_oop_work(T* p) {
2258     if (!_cm->has_overflown()) {
2259       oop obj = oopDesc::load_decode_heap_oop(p);
2260       if (_cm->verbose_high()) {
2261         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2262                                "*"PTR_FORMAT" = "PTR_FORMAT,
2263                                _task->worker_id(), p2i(p), p2i((void*) obj));
2264       }
2265 
2266       _task->deal_with_reference(obj);
2267       _ref_counter--;
2268 
2269       if (_ref_counter == 0) {
2270         // We have dealt with _ref_counter_limit references, pushing them
2271         // and objects reachable from them on to the local stack (and
2272         // possibly the global stack). Call CMTask::do_marking_step() to
2273         // process these entries.
2274         //
2275         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2276         // there's nothing more to do (i.e. we're done with the entries that
2277         // were pushed as a result of the CMTask::deal_with_reference() calls
2278         // above) or we overflow.
2279         //
2280         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2281         // flag while there may still be some work to do. (See the comment at
2282         // the beginning of CMTask::do_marking_step() for those conditions -
2283         // one of which is reaching the specified time target.) It is only
2284         // when CMTask::do_marking_step() returns without setting the
2285         // has_aborted() flag that the marking step has completed.
2286         do {
2287           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2288           _task->do_marking_step(mark_step_duration_ms,
2289                                  false      /* do_termination */,
2290                                  _is_serial);
2291         } while (_task->has_aborted() && !_cm->has_overflown());
2292         _ref_counter = _ref_counter_limit;
2293       }
2294     } else {
2295       if (_cm->verbose_high()) {
2296          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2297       }
2298     }
2299   }
2300 };
2301 
2302 // 'Drain' oop closure used by both serial and parallel reference processing.
2303 // Uses the CMTask associated with a given worker thread (for serial
2304 // reference processing the CMtask for worker 0 is used). Calls the
2305 // do_marking_step routine, with an unbelievably large timeout value,
2306 // to drain the marking data structures of the remaining entries
2307 // added by the 'keep alive' oop closure above.
2308 
2309 class G1CMDrainMarkingStackClosure: public VoidClosure {
2310   ConcurrentMark* _cm;
2311   CMTask*         _task;
2312   bool            _is_serial;
2313  public:
2314   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2315     _cm(cm), _task(task), _is_serial(is_serial) {
2316     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2317   }
2318 
2319   void do_void() {
2320     do {
2321       if (_cm->verbose_high()) {
2322         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2323                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2324       }
2325 
2326       // We call CMTask::do_marking_step() to completely drain the local
2327       // and global marking stacks of entries pushed by the 'keep alive'
2328       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2329       //
2330       // CMTask::do_marking_step() is called in a loop, which we'll exit
2331       // if there's nothing more to do (i.e. we've completely drained the
2332       // entries that were pushed as a a result of applying the 'keep alive'
2333       // closure to the entries on the discovered ref lists) or we overflow
2334       // the global marking stack.
2335       //
2336       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2337       // flag while there may still be some work to do. (See the comment at
2338       // the beginning of CMTask::do_marking_step() for those conditions -
2339       // one of which is reaching the specified time target.) It is only
2340       // when CMTask::do_marking_step() returns without setting the
2341       // has_aborted() flag that the marking step has completed.
2342 
2343       _task->do_marking_step(1000000000.0 /* something very large */,
2344                              true         /* do_termination */,
2345                              _is_serial);
2346     } while (_task->has_aborted() && !_cm->has_overflown());
2347   }
2348 };
2349 
2350 // Implementation of AbstractRefProcTaskExecutor for parallel
2351 // reference processing at the end of G1 concurrent marking
2352 
2353 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2354 private:
2355   G1CollectedHeap* _g1h;
2356   ConcurrentMark*  _cm;
2357   WorkGang*        _workers;
2358   int              _active_workers;
2359 
2360 public:
2361   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2362                         ConcurrentMark* cm,
2363                         WorkGang* workers,
2364                         int n_workers) :
2365     _g1h(g1h), _cm(cm),
2366     _workers(workers), _active_workers(n_workers) { }
2367 
2368   // Executes the given task using concurrent marking worker threads.
2369   virtual void execute(ProcessTask& task);
2370   virtual void execute(EnqueueTask& task);
2371 };
2372 
2373 class G1CMRefProcTaskProxy: public AbstractGangTask {
2374   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2375   ProcessTask&     _proc_task;
2376   G1CollectedHeap* _g1h;
2377   ConcurrentMark*  _cm;
2378 
2379 public:
2380   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2381                      G1CollectedHeap* g1h,
2382                      ConcurrentMark* cm) :
2383     AbstractGangTask("Process reference objects in parallel"),
2384     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2385     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2386     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2387   }
2388 
2389   virtual void work(uint worker_id) {
2390     CMTask* task = _cm->task(worker_id);
2391     G1CMIsAliveClosure g1_is_alive(_g1h);
2392     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2393     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2394 
2395     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2396   }
2397 };
2398 
2399 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2400   assert(_workers != NULL, "Need parallel worker threads.");
2401   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2402 
2403   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2404 
2405   // We need to reset the concurrency level before each
2406   // proxy task execution, so that the termination protocol
2407   // and overflow handling in CMTask::do_marking_step() knows
2408   // how many workers to wait for.
2409   _cm->set_concurrency(_active_workers);
2410   _g1h->set_par_threads(_active_workers);
2411   _workers->run_task(&proc_task_proxy);
2412   _g1h->set_par_threads(0);
2413 }
2414 
2415 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2416   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2417   EnqueueTask& _enq_task;
2418 
2419 public:
2420   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2421     AbstractGangTask("Enqueue reference objects in parallel"),
2422     _enq_task(enq_task) { }
2423 
2424   virtual void work(uint worker_id) {
2425     _enq_task.work(worker_id);
2426   }
2427 };
2428 
2429 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2430   assert(_workers != NULL, "Need parallel worker threads.");
2431   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2432 
2433   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2434 
2435   // Not strictly necessary but...
2436   //
2437   // We need to reset the concurrency level before each
2438   // proxy task execution, so that the termination protocol
2439   // and overflow handling in CMTask::do_marking_step() knows
2440   // how many workers to wait for.
2441   _cm->set_concurrency(_active_workers);
2442   _g1h->set_par_threads(_active_workers);
2443   _workers->run_task(&enq_task_proxy);
2444   _g1h->set_par_threads(0);
2445 }
2446 
2447 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2448   if (has_overflown()) {
2449     // Skip processing the discovered references if we have
2450     // overflown the global marking stack. Reference objects
2451     // only get discovered once so it is OK to not
2452     // de-populate the discovered reference lists. We could have,
2453     // but the only benefit would be that, when marking restarts,
2454     // less reference objects are discovered.
2455     return;
2456   }
2457 
2458   ResourceMark rm;
2459   HandleMark   hm;
2460 
2461   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2462 
2463   // Is alive closure.
2464   G1CMIsAliveClosure g1_is_alive(g1h);
2465 
2466   // Inner scope to exclude the cleaning of the string and symbol
2467   // tables from the displayed time.
2468   {
2469     if (G1Log::finer()) {
2470       gclog_or_tty->put(' ');
2471     }
2472     GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm());
2473 
2474     ReferenceProcessor* rp = g1h->ref_processor_cm();
2475 
2476     // See the comment in G1CollectedHeap::ref_processing_init()
2477     // about how reference processing currently works in G1.
2478 
2479     // Set the soft reference policy
2480     rp->setup_policy(clear_all_soft_refs);
2481     assert(_markStack.isEmpty(), "mark stack should be empty");
2482 
2483     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2484     // in serial reference processing. Note these closures are also
2485     // used for serially processing (by the the current thread) the
2486     // JNI references during parallel reference processing.
2487     //
2488     // These closures do not need to synchronize with the worker
2489     // threads involved in parallel reference processing as these
2490     // instances are executed serially by the current thread (e.g.
2491     // reference processing is not multi-threaded and is thus
2492     // performed by the current thread instead of a gang worker).
2493     //
2494     // The gang tasks involved in parallel reference processing create
2495     // their own instances of these closures, which do their own
2496     // synchronization among themselves.
2497     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2498     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2499 
2500     // We need at least one active thread. If reference processing
2501     // is not multi-threaded we use the current (VMThread) thread,
2502     // otherwise we use the work gang from the G1CollectedHeap and
2503     // we utilize all the worker threads we can.
2504     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2505     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2506     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2507 
2508     // Parallel processing task executor.
2509     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2510                                               g1h->workers(), active_workers);
2511     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2512 
2513     // Set the concurrency level. The phase was already set prior to
2514     // executing the remark task.
2515     set_concurrency(active_workers);
2516 
2517     // Set the degree of MT processing here.  If the discovery was done MT,
2518     // the number of threads involved during discovery could differ from
2519     // the number of active workers.  This is OK as long as the discovered
2520     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2521     rp->set_active_mt_degree(active_workers);
2522 
2523     // Process the weak references.
2524     const ReferenceProcessorStats& stats =
2525         rp->process_discovered_references(&g1_is_alive,
2526                                           &g1_keep_alive,
2527                                           &g1_drain_mark_stack,
2528                                           executor,
2529                                           g1h->gc_timer_cm());
2530     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2531 
2532     // The do_oop work routines of the keep_alive and drain_marking_stack
2533     // oop closures will set the has_overflown flag if we overflow the
2534     // global marking stack.
2535 
2536     assert(_markStack.overflow() || _markStack.isEmpty(),
2537             "mark stack should be empty (unless it overflowed)");
2538 
2539     if (_markStack.overflow()) {
2540       // This should have been done already when we tried to push an
2541       // entry on to the global mark stack. But let's do it again.
2542       set_has_overflown();
2543     }
2544 
2545     assert(rp->num_q() == active_workers, "why not");
2546 
2547     rp->enqueue_discovered_references(executor);
2548 
2549     rp->verify_no_references_recorded();
2550     assert(!rp->discovery_enabled(), "Post condition");
2551   }
2552 
2553   if (has_overflown()) {
2554     // We can not trust g1_is_alive if the marking stack overflowed
2555     return;
2556   }
2557 
2558   g1h->unlink_string_and_symbol_table(&g1_is_alive,
2559                                       /* process_strings */ false, // currently strings are always roots
2560                                       /* process_symbols */ true);
2561 }
2562 
2563 void ConcurrentMark::swapMarkBitMaps() {
2564   CMBitMapRO* temp = _prevMarkBitMap;
2565   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2566   _nextMarkBitMap  = (CMBitMap*)  temp;
2567 }
2568 
2569 class CMRemarkTask: public AbstractGangTask {
2570 private:
2571   ConcurrentMark* _cm;
2572   bool            _is_serial;
2573 public:
2574   void work(uint worker_id) {
2575     // Since all available tasks are actually started, we should
2576     // only proceed if we're supposed to be active.
2577     if (worker_id < _cm->active_tasks()) {
2578       CMTask* task = _cm->task(worker_id);
2579       task->record_start_time();
2580       do {
2581         task->do_marking_step(1000000000.0 /* something very large */,
2582                               true         /* do_termination       */,
2583                               _is_serial);
2584       } while (task->has_aborted() && !_cm->has_overflown());
2585       // If we overflow, then we do not want to restart. We instead
2586       // want to abort remark and do concurrent marking again.
2587       task->record_end_time();
2588     }
2589   }
2590 
2591   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2592     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2593     _cm->terminator()->reset_for_reuse(active_workers);
2594   }
2595 };
2596 
2597 void ConcurrentMark::checkpointRootsFinalWork() {
2598   ResourceMark rm;
2599   HandleMark   hm;
2600   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2601 
2602   g1h->ensure_parsability(false);
2603 
2604   if (G1CollectedHeap::use_parallel_gc_threads()) {
2605     G1CollectedHeap::StrongRootsScope srs(g1h);
2606     // this is remark, so we'll use up all active threads
2607     uint active_workers = g1h->workers()->active_workers();
2608     if (active_workers == 0) {
2609       assert(active_workers > 0, "Should have been set earlier");
2610       active_workers = (uint) ParallelGCThreads;
2611       g1h->workers()->set_active_workers(active_workers);
2612     }
2613     set_concurrency_and_phase(active_workers, false /* concurrent */);
2614     // Leave _parallel_marking_threads at it's
2615     // value originally calculated in the ConcurrentMark
2616     // constructor and pass values of the active workers
2617     // through the gang in the task.
2618 
2619     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2620     // We will start all available threads, even if we decide that the
2621     // active_workers will be fewer. The extra ones will just bail out
2622     // immediately.
2623     g1h->set_par_threads(active_workers);
2624     g1h->workers()->run_task(&remarkTask);
2625     g1h->set_par_threads(0);
2626   } else {
2627     G1CollectedHeap::StrongRootsScope srs(g1h);
2628     uint active_workers = 1;
2629     set_concurrency_and_phase(active_workers, false /* concurrent */);
2630 
2631     // Note - if there's no work gang then the VMThread will be
2632     // the thread to execute the remark - serially. We have
2633     // to pass true for the is_serial parameter so that
2634     // CMTask::do_marking_step() doesn't enter the sync
2635     // barriers in the event of an overflow. Doing so will
2636     // cause an assert that the current thread is not a
2637     // concurrent GC thread.
2638     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2639     remarkTask.work(0);
2640   }
2641   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2642   guarantee(has_overflown() ||
2643             satb_mq_set.completed_buffers_num() == 0,
2644             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2645                     BOOL_TO_STR(has_overflown()),
2646                     satb_mq_set.completed_buffers_num()));
2647 
2648   print_stats();
2649 }
2650 
2651 #ifndef PRODUCT
2652 
2653 class PrintReachableOopClosure: public OopClosure {
2654 private:
2655   G1CollectedHeap* _g1h;
2656   outputStream*    _out;
2657   VerifyOption     _vo;
2658   bool             _all;
2659 
2660 public:
2661   PrintReachableOopClosure(outputStream* out,
2662                            VerifyOption  vo,
2663                            bool          all) :
2664     _g1h(G1CollectedHeap::heap()),
2665     _out(out), _vo(vo), _all(all) { }
2666 
2667   void do_oop(narrowOop* p) { do_oop_work(p); }
2668   void do_oop(      oop* p) { do_oop_work(p); }
2669 
2670   template <class T> void do_oop_work(T* p) {
2671     oop         obj = oopDesc::load_decode_heap_oop(p);
2672     const char* str = NULL;
2673     const char* str2 = "";
2674 
2675     if (obj == NULL) {
2676       str = "";
2677     } else if (!_g1h->is_in_g1_reserved(obj)) {
2678       str = " O";
2679     } else {
2680       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2681       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2682       bool marked = _g1h->is_marked(obj, _vo);
2683 
2684       if (over_tams) {
2685         str = " >";
2686         if (marked) {
2687           str2 = " AND MARKED";
2688         }
2689       } else if (marked) {
2690         str = " M";
2691       } else {
2692         str = " NOT";
2693       }
2694     }
2695 
2696     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2697                    p2i(p), p2i((void*) obj), str, str2);
2698   }
2699 };
2700 
2701 class PrintReachableObjectClosure : public ObjectClosure {
2702 private:
2703   G1CollectedHeap* _g1h;
2704   outputStream*    _out;
2705   VerifyOption     _vo;
2706   bool             _all;
2707   HeapRegion*      _hr;
2708 
2709 public:
2710   PrintReachableObjectClosure(outputStream* out,
2711                               VerifyOption  vo,
2712                               bool          all,
2713                               HeapRegion*   hr) :
2714     _g1h(G1CollectedHeap::heap()),
2715     _out(out), _vo(vo), _all(all), _hr(hr) { }
2716 
2717   void do_object(oop o) {
2718     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2719     bool marked = _g1h->is_marked(o, _vo);
2720     bool print_it = _all || over_tams || marked;
2721 
2722     if (print_it) {
2723       _out->print_cr(" "PTR_FORMAT"%s",
2724                      p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2725       PrintReachableOopClosure oopCl(_out, _vo, _all);
2726       o->oop_iterate_no_header(&oopCl);
2727     }
2728   }
2729 };
2730 
2731 class PrintReachableRegionClosure : public HeapRegionClosure {
2732 private:
2733   G1CollectedHeap* _g1h;
2734   outputStream*    _out;
2735   VerifyOption     _vo;
2736   bool             _all;
2737 
2738 public:
2739   bool doHeapRegion(HeapRegion* hr) {
2740     HeapWord* b = hr->bottom();
2741     HeapWord* e = hr->end();
2742     HeapWord* t = hr->top();
2743     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2744     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2745                    "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2746     _out->cr();
2747 
2748     HeapWord* from = b;
2749     HeapWord* to   = t;
2750 
2751     if (to > from) {
2752       _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2753       _out->cr();
2754       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2755       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2756       _out->cr();
2757     }
2758 
2759     return false;
2760   }
2761 
2762   PrintReachableRegionClosure(outputStream* out,
2763                               VerifyOption  vo,
2764                               bool          all) :
2765     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2766 };
2767 
2768 void ConcurrentMark::print_reachable(const char* str,
2769                                      VerifyOption vo,
2770                                      bool all) {
2771   gclog_or_tty->cr();
2772   gclog_or_tty->print_cr("== Doing heap dump... ");
2773 
2774   if (G1PrintReachableBaseFile == NULL) {
2775     gclog_or_tty->print_cr("  #### error: no base file defined");
2776     return;
2777   }
2778 
2779   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2780       (JVM_MAXPATHLEN - 1)) {
2781     gclog_or_tty->print_cr("  #### error: file name too long");
2782     return;
2783   }
2784 
2785   char file_name[JVM_MAXPATHLEN];
2786   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2787   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2788 
2789   fileStream fout(file_name);
2790   if (!fout.is_open()) {
2791     gclog_or_tty->print_cr("  #### error: could not open file");
2792     return;
2793   }
2794 
2795   outputStream* out = &fout;
2796   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2797   out->cr();
2798 
2799   out->print_cr("--- ITERATING OVER REGIONS");
2800   out->cr();
2801   PrintReachableRegionClosure rcl(out, vo, all);
2802   _g1h->heap_region_iterate(&rcl);
2803   out->cr();
2804 
2805   gclog_or_tty->print_cr("  done");
2806   gclog_or_tty->flush();
2807 }
2808 
2809 #endif // PRODUCT
2810 
2811 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2812   // Note we are overriding the read-only view of the prev map here, via
2813   // the cast.
2814   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2815 }
2816 
2817 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2818   _nextMarkBitMap->clearRange(mr);
2819 }
2820 
2821 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2822   clearRangePrevBitmap(mr);
2823   clearRangeNextBitmap(mr);
2824 }
2825 
2826 HeapRegion*
2827 ConcurrentMark::claim_region(uint worker_id) {
2828   // "checkpoint" the finger
2829   HeapWord* finger = _finger;
2830 
2831   // _heap_end will not change underneath our feet; it only changes at
2832   // yield points.
2833   while (finger < _heap_end) {
2834     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2835 
2836     // Note on how this code handles humongous regions. In the
2837     // normal case the finger will reach the start of a "starts
2838     // humongous" (SH) region. Its end will either be the end of the
2839     // last "continues humongous" (CH) region in the sequence, or the
2840     // standard end of the SH region (if the SH is the only region in
2841     // the sequence). That way claim_region() will skip over the CH
2842     // regions. However, there is a subtle race between a CM thread
2843     // executing this method and a mutator thread doing a humongous
2844     // object allocation. The two are not mutually exclusive as the CM
2845     // thread does not need to hold the Heap_lock when it gets
2846     // here. So there is a chance that claim_region() will come across
2847     // a free region that's in the progress of becoming a SH or a CH
2848     // region. In the former case, it will either
2849     //   a) Miss the update to the region's end, in which case it will
2850     //      visit every subsequent CH region, will find their bitmaps
2851     //      empty, and do nothing, or
2852     //   b) Will observe the update of the region's end (in which case
2853     //      it will skip the subsequent CH regions).
2854     // If it comes across a region that suddenly becomes CH, the
2855     // scenario will be similar to b). So, the race between
2856     // claim_region() and a humongous object allocation might force us
2857     // to do a bit of unnecessary work (due to some unnecessary bitmap
2858     // iterations) but it should not introduce and correctness issues.
2859     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2860     HeapWord*   bottom        = curr_region->bottom();
2861     HeapWord*   end           = curr_region->end();
2862     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2863 
2864     if (verbose_low()) {
2865       gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2866                              "["PTR_FORMAT", "PTR_FORMAT"), "
2867                              "limit = "PTR_FORMAT,
2868                              worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2869     }
2870 
2871     // Is the gap between reading the finger and doing the CAS too long?
2872     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2873     if (res == finger) {
2874       // we succeeded
2875 
2876       // notice that _finger == end cannot be guaranteed here since,
2877       // someone else might have moved the finger even further
2878       assert(_finger >= end, "the finger should have moved forward");
2879 
2880       if (verbose_low()) {
2881         gclog_or_tty->print_cr("[%u] we were successful with region = "
2882                                PTR_FORMAT, worker_id, p2i(curr_region));
2883       }
2884 
2885       if (limit > bottom) {
2886         if (verbose_low()) {
2887           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2888                                  "returning it ", worker_id, p2i(curr_region));
2889         }
2890         return curr_region;
2891       } else {
2892         assert(limit == bottom,
2893                "the region limit should be at bottom");
2894         if (verbose_low()) {
2895           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2896                                  "returning NULL", worker_id, p2i(curr_region));
2897         }
2898         // we return NULL and the caller should try calling
2899         // claim_region() again.
2900         return NULL;
2901       }
2902     } else {
2903       assert(_finger > finger, "the finger should have moved forward");
2904       if (verbose_low()) {
2905         gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2906                                "global finger = "PTR_FORMAT", "
2907                                "our finger = "PTR_FORMAT,
2908                                worker_id, p2i(_finger), p2i(finger));
2909       }
2910 
2911       // read it again
2912       finger = _finger;
2913     }
2914   }
2915 
2916   return NULL;
2917 }
2918 
2919 #ifndef PRODUCT
2920 enum VerifyNoCSetOopsPhase {
2921   VerifyNoCSetOopsStack,
2922   VerifyNoCSetOopsQueues,
2923   VerifyNoCSetOopsSATBCompleted,
2924   VerifyNoCSetOopsSATBThread
2925 };
2926 
2927 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2928 private:
2929   G1CollectedHeap* _g1h;
2930   VerifyNoCSetOopsPhase _phase;
2931   int _info;
2932 
2933   const char* phase_str() {
2934     switch (_phase) {
2935     case VerifyNoCSetOopsStack:         return "Stack";
2936     case VerifyNoCSetOopsQueues:        return "Queue";
2937     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2938     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2939     default:                            ShouldNotReachHere();
2940     }
2941     return NULL;
2942   }
2943 
2944   void do_object_work(oop obj) {
2945     guarantee(!_g1h->obj_in_cs(obj),
2946               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2947                       p2i((void*) obj), phase_str(), _info));
2948   }
2949 
2950 public:
2951   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2952 
2953   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2954     _phase = phase;
2955     _info = info;
2956   }
2957 
2958   virtual void do_oop(oop* p) {
2959     oop obj = oopDesc::load_decode_heap_oop(p);
2960     do_object_work(obj);
2961   }
2962 
2963   virtual void do_oop(narrowOop* p) {
2964     // We should not come across narrow oops while scanning marking
2965     // stacks and SATB buffers.
2966     ShouldNotReachHere();
2967   }
2968 
2969   virtual void do_object(oop obj) {
2970     do_object_work(obj);
2971   }
2972 };
2973 
2974 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2975                                          bool verify_enqueued_buffers,
2976                                          bool verify_thread_buffers,
2977                                          bool verify_fingers) {
2978   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2979   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2980     return;
2981   }
2982 
2983   VerifyNoCSetOopsClosure cl;
2984 
2985   if (verify_stacks) {
2986     // Verify entries on the global mark stack
2987     cl.set_phase(VerifyNoCSetOopsStack);
2988     _markStack.oops_do(&cl);
2989 
2990     // Verify entries on the task queues
2991     for (uint i = 0; i < _max_worker_id; i += 1) {
2992       cl.set_phase(VerifyNoCSetOopsQueues, i);
2993       CMTaskQueue* queue = _task_queues->queue(i);
2994       queue->oops_do(&cl);
2995     }
2996   }
2997 
2998   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2999 
3000   // Verify entries on the enqueued SATB buffers
3001   if (verify_enqueued_buffers) {
3002     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3003     satb_qs.iterate_completed_buffers_read_only(&cl);
3004   }
3005 
3006   // Verify entries on the per-thread SATB buffers
3007   if (verify_thread_buffers) {
3008     cl.set_phase(VerifyNoCSetOopsSATBThread);
3009     satb_qs.iterate_thread_buffers_read_only(&cl);
3010   }
3011 
3012   if (verify_fingers) {
3013     // Verify the global finger
3014     HeapWord* global_finger = finger();
3015     if (global_finger != NULL && global_finger < _heap_end) {
3016       // The global finger always points to a heap region boundary. We
3017       // use heap_region_containing_raw() to get the containing region
3018       // given that the global finger could be pointing to a free region
3019       // which subsequently becomes continues humongous. If that
3020       // happens, heap_region_containing() will return the bottom of the
3021       // corresponding starts humongous region and the check below will
3022       // not hold any more.
3023       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3024       guarantee(global_finger == global_hr->bottom(),
3025                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3026                         p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3027     }
3028 
3029     // Verify the task fingers
3030     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3031     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3032       CMTask* task = _tasks[i];
3033       HeapWord* task_finger = task->finger();
3034       if (task_finger != NULL && task_finger < _heap_end) {
3035         // See above note on the global finger verification.
3036         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3037         guarantee(task_finger == task_hr->bottom() ||
3038                   !task_hr->in_collection_set(),
3039                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3040                           p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3041       }
3042     }
3043   }
3044 }
3045 #endif // PRODUCT
3046 
3047 // Aggregate the counting data that was constructed concurrently
3048 // with marking.
3049 class AggregateCountDataHRClosure: public HeapRegionClosure {
3050   G1CollectedHeap* _g1h;
3051   ConcurrentMark* _cm;
3052   CardTableModRefBS* _ct_bs;
3053   BitMap* _cm_card_bm;
3054   uint _max_worker_id;
3055 
3056  public:
3057   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3058                               BitMap* cm_card_bm,
3059                               uint max_worker_id) :
3060     _g1h(g1h), _cm(g1h->concurrent_mark()),
3061     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3062     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3063 
3064   bool doHeapRegion(HeapRegion* hr) {
3065     if (hr->continuesHumongous()) {
3066       // We will ignore these here and process them when their
3067       // associated "starts humongous" region is processed.
3068       // Note that we cannot rely on their associated
3069       // "starts humongous" region to have their bit set to 1
3070       // since, due to the region chunking in the parallel region
3071       // iteration, a "continues humongous" region might be visited
3072       // before its associated "starts humongous".
3073       return false;
3074     }
3075 
3076     HeapWord* start = hr->bottom();
3077     HeapWord* limit = hr->next_top_at_mark_start();
3078     HeapWord* end = hr->end();
3079 
3080     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3081            err_msg("Preconditions not met - "
3082                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3083                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
3084                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3085 
3086     assert(hr->next_marked_bytes() == 0, "Precondition");
3087 
3088     if (start == limit) {
3089       // NTAMS of this region has not been set so nothing to do.
3090       return false;
3091     }
3092 
3093     // 'start' should be in the heap.
3094     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3095     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
3096     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3097 
3098     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3099     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3100     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3101 
3102     // If ntams is not card aligned then we bump card bitmap index
3103     // for limit so that we get the all the cards spanned by
3104     // the object ending at ntams.
3105     // Note: if this is the last region in the heap then ntams
3106     // could be actually just beyond the end of the the heap;
3107     // limit_idx will then  correspond to a (non-existent) card
3108     // that is also outside the heap.
3109     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3110       limit_idx += 1;
3111     }
3112 
3113     assert(limit_idx <= end_idx, "or else use atomics");
3114 
3115     // Aggregate the "stripe" in the count data associated with hr.
3116     uint hrs_index = hr->hrs_index();
3117     size_t marked_bytes = 0;
3118 
3119     for (uint i = 0; i < _max_worker_id; i += 1) {
3120       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3121       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3122 
3123       // Fetch the marked_bytes in this region for task i and
3124       // add it to the running total for this region.
3125       marked_bytes += marked_bytes_array[hrs_index];
3126 
3127       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3128       // into the global card bitmap.
3129       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3130 
3131       while (scan_idx < limit_idx) {
3132         assert(task_card_bm->at(scan_idx) == true, "should be");
3133         _cm_card_bm->set_bit(scan_idx);
3134         assert(_cm_card_bm->at(scan_idx) == true, "should be");
3135 
3136         // BitMap::get_next_one_offset() can handle the case when
3137         // its left_offset parameter is greater than its right_offset
3138         // parameter. It does, however, have an early exit if
3139         // left_offset == right_offset. So let's limit the value
3140         // passed in for left offset here.
3141         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3142         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3143       }
3144     }
3145 
3146     // Update the marked bytes for this region.
3147     hr->add_to_marked_bytes(marked_bytes);
3148 
3149     // Next heap region
3150     return false;
3151   }
3152 };
3153 
3154 class G1AggregateCountDataTask: public AbstractGangTask {
3155 protected:
3156   G1CollectedHeap* _g1h;
3157   ConcurrentMark* _cm;
3158   BitMap* _cm_card_bm;
3159   uint _max_worker_id;
3160   int _active_workers;
3161 
3162 public:
3163   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3164                            ConcurrentMark* cm,
3165                            BitMap* cm_card_bm,
3166                            uint max_worker_id,
3167                            int n_workers) :
3168     AbstractGangTask("Count Aggregation"),
3169     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3170     _max_worker_id(max_worker_id),
3171     _active_workers(n_workers) { }
3172 
3173   void work(uint worker_id) {
3174     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3175 
3176     if (G1CollectedHeap::use_parallel_gc_threads()) {
3177       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3178                                             _active_workers,
3179                                             HeapRegion::AggregateCountClaimValue);
3180     } else {
3181       _g1h->heap_region_iterate(&cl);
3182     }
3183   }
3184 };
3185 
3186 
3187 void ConcurrentMark::aggregate_count_data() {
3188   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3189                         _g1h->workers()->active_workers() :
3190                         1);
3191 
3192   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3193                                            _max_worker_id, n_workers);
3194 
3195   if (G1CollectedHeap::use_parallel_gc_threads()) {
3196     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3197            "sanity check");
3198     _g1h->set_par_threads(n_workers);
3199     _g1h->workers()->run_task(&g1_par_agg_task);
3200     _g1h->set_par_threads(0);
3201 
3202     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3203            "sanity check");
3204     _g1h->reset_heap_region_claim_values();
3205   } else {
3206     g1_par_agg_task.work(0);
3207   }
3208 }
3209 
3210 // Clear the per-worker arrays used to store the per-region counting data
3211 void ConcurrentMark::clear_all_count_data() {
3212   // Clear the global card bitmap - it will be filled during
3213   // liveness count aggregation (during remark) and the
3214   // final counting task.
3215   _card_bm.clear();
3216 
3217   // Clear the global region bitmap - it will be filled as part
3218   // of the final counting task.
3219   _region_bm.clear();
3220 
3221   uint max_regions = _g1h->max_regions();
3222   assert(_max_worker_id > 0, "uninitialized");
3223 
3224   for (uint i = 0; i < _max_worker_id; i += 1) {
3225     BitMap* task_card_bm = count_card_bitmap_for(i);
3226     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3227 
3228     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3229     assert(marked_bytes_array != NULL, "uninitialized");
3230 
3231     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3232     task_card_bm->clear();
3233   }
3234 }
3235 
3236 void ConcurrentMark::print_stats() {
3237   if (verbose_stats()) {
3238     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3239     for (size_t i = 0; i < _active_tasks; ++i) {
3240       _tasks[i]->print_stats();
3241       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3242     }
3243   }
3244 }
3245 
3246 // abandon current marking iteration due to a Full GC
3247 void ConcurrentMark::abort() {
3248   // Clear all marks to force marking thread to do nothing
3249   _nextMarkBitMap->clearAll();
3250 
3251   // Note we cannot clear the previous marking bitmap here
3252   // since VerifyDuringGC verifies the objects marked during
3253   // a full GC against the previous bitmap.
3254 
3255   // Clear the liveness counting data
3256   clear_all_count_data();
3257   // Empty mark stack
3258   reset_marking_state();
3259   for (uint i = 0; i < _max_worker_id; ++i) {
3260     _tasks[i]->clear_region_fields();
3261   }
3262   _first_overflow_barrier_sync.abort();
3263   _second_overflow_barrier_sync.abort();
3264   _has_aborted = true;
3265 
3266   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3267   satb_mq_set.abandon_partial_marking();
3268   // This can be called either during or outside marking, we'll read
3269   // the expected_active value from the SATB queue set.
3270   satb_mq_set.set_active_all_threads(
3271                                  false, /* new active value */
3272                                  satb_mq_set.is_active() /* expected_active */);
3273 
3274   _g1h->trace_heap_after_concurrent_cycle();
3275   _g1h->register_concurrent_cycle_end();
3276 }
3277 
3278 static void print_ms_time_info(const char* prefix, const char* name,
3279                                NumberSeq& ns) {
3280   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3281                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3282   if (ns.num() > 0) {
3283     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3284                            prefix, ns.sd(), ns.maximum());
3285   }
3286 }
3287 
3288 void ConcurrentMark::print_summary_info() {
3289   gclog_or_tty->print_cr(" Concurrent marking:");
3290   print_ms_time_info("  ", "init marks", _init_times);
3291   print_ms_time_info("  ", "remarks", _remark_times);
3292   {
3293     print_ms_time_info("     ", "final marks", _remark_mark_times);
3294     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3295 
3296   }
3297   print_ms_time_info("  ", "cleanups", _cleanup_times);
3298   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3299                          _total_counting_time,
3300                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3301                           (double)_cleanup_times.num()
3302                          : 0.0));
3303   if (G1ScrubRemSets) {
3304     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3305                            _total_rs_scrub_time,
3306                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3307                             (double)_cleanup_times.num()
3308                            : 0.0));
3309   }
3310   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3311                          (_init_times.sum() + _remark_times.sum() +
3312                           _cleanup_times.sum())/1000.0);
3313   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3314                 "(%8.2f s marking).",
3315                 cmThread()->vtime_accum(),
3316                 cmThread()->vtime_mark_accum());
3317 }
3318 
3319 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3320   if (use_parallel_marking_threads()) {
3321     _parallel_workers->print_worker_threads_on(st);
3322   }
3323 }
3324 
3325 void ConcurrentMark::print_on_error(outputStream* st) const {
3326   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3327       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3328   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3329   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3330 }
3331 
3332 // We take a break if someone is trying to stop the world.
3333 bool ConcurrentMark::do_yield_check(uint worker_id) {
3334   if (SuspendibleThreadSet::should_yield()) {
3335     if (worker_id == 0) {
3336       _g1h->g1_policy()->record_concurrent_pause();
3337     }
3338     SuspendibleThreadSet::yield();
3339     return true;
3340   } else {
3341     return false;
3342   }
3343 }
3344 
3345 bool ConcurrentMark::containing_card_is_marked(void* p) {
3346   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3347   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3348 }
3349 
3350 bool ConcurrentMark::containing_cards_are_marked(void* start,
3351                                                  void* last) {
3352   return containing_card_is_marked(start) &&
3353          containing_card_is_marked(last);
3354 }
3355 
3356 #ifndef PRODUCT
3357 // for debugging purposes
3358 void ConcurrentMark::print_finger() {
3359   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3360                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3361   for (uint i = 0; i < _max_worker_id; ++i) {
3362     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3363   }
3364   gclog_or_tty->cr();
3365 }
3366 #endif
3367 
3368 void CMTask::scan_object(oop obj) {
3369   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3370 
3371   if (_cm->verbose_high()) {
3372     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3373                            _worker_id, p2i((void*) obj));
3374   }
3375 
3376   size_t obj_size = obj->size();
3377   _words_scanned += obj_size;
3378 
3379   obj->oop_iterate(_cm_oop_closure);
3380   statsOnly( ++_objs_scanned );
3381   check_limits();
3382 }
3383 
3384 // Closure for iteration over bitmaps
3385 class CMBitMapClosure : public BitMapClosure {
3386 private:
3387   // the bitmap that is being iterated over
3388   CMBitMap*                   _nextMarkBitMap;
3389   ConcurrentMark*             _cm;
3390   CMTask*                     _task;
3391 
3392 public:
3393   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3394     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3395 
3396   bool do_bit(size_t offset) {
3397     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3398     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3399     assert( addr < _cm->finger(), "invariant");
3400 
3401     statsOnly( _task->increase_objs_found_on_bitmap() );
3402     assert(addr >= _task->finger(), "invariant");
3403 
3404     // We move that task's local finger along.
3405     _task->move_finger_to(addr);
3406 
3407     _task->scan_object(oop(addr));
3408     // we only partially drain the local queue and global stack
3409     _task->drain_local_queue(true);
3410     _task->drain_global_stack(true);
3411 
3412     // if the has_aborted flag has been raised, we need to bail out of
3413     // the iteration
3414     return !_task->has_aborted();
3415   }
3416 };
3417 
3418 // Closure for iterating over objects, currently only used for
3419 // processing SATB buffers.
3420 class CMObjectClosure : public ObjectClosure {
3421 private:
3422   CMTask* _task;
3423 
3424 public:
3425   void do_object(oop obj) {
3426     _task->deal_with_reference(obj);
3427   }
3428 
3429   CMObjectClosure(CMTask* task) : _task(task) { }
3430 };
3431 
3432 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3433                                ConcurrentMark* cm,
3434                                CMTask* task)
3435   : _g1h(g1h), _cm(cm), _task(task) {
3436   assert(_ref_processor == NULL, "should be initialized to NULL");
3437 
3438   if (G1UseConcMarkReferenceProcessing) {
3439     _ref_processor = g1h->ref_processor_cm();
3440     assert(_ref_processor != NULL, "should not be NULL");
3441   }
3442 }
3443 
3444 void CMTask::setup_for_region(HeapRegion* hr) {
3445   assert(hr != NULL,
3446         "claim_region() should have filtered out NULL regions");
3447   assert(!hr->continuesHumongous(),
3448         "claim_region() should have filtered out continues humongous regions");
3449 
3450   if (_cm->verbose_low()) {
3451     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3452                            _worker_id, p2i(hr));
3453   }
3454 
3455   _curr_region  = hr;
3456   _finger       = hr->bottom();
3457   update_region_limit();
3458 }
3459 
3460 void CMTask::update_region_limit() {
3461   HeapRegion* hr            = _curr_region;
3462   HeapWord* bottom          = hr->bottom();
3463   HeapWord* limit           = hr->next_top_at_mark_start();
3464 
3465   if (limit == bottom) {
3466     if (_cm->verbose_low()) {
3467       gclog_or_tty->print_cr("[%u] found an empty region "
3468                              "["PTR_FORMAT", "PTR_FORMAT")",
3469                              _worker_id, p2i(bottom), p2i(limit));
3470     }
3471     // The region was collected underneath our feet.
3472     // We set the finger to bottom to ensure that the bitmap
3473     // iteration that will follow this will not do anything.
3474     // (this is not a condition that holds when we set the region up,
3475     // as the region is not supposed to be empty in the first place)
3476     _finger = bottom;
3477   } else if (limit >= _region_limit) {
3478     assert(limit >= _finger, "peace of mind");
3479   } else {
3480     assert(limit < _region_limit, "only way to get here");
3481     // This can happen under some pretty unusual circumstances.  An
3482     // evacuation pause empties the region underneath our feet (NTAMS
3483     // at bottom). We then do some allocation in the region (NTAMS
3484     // stays at bottom), followed by the region being used as a GC
3485     // alloc region (NTAMS will move to top() and the objects
3486     // originally below it will be grayed). All objects now marked in
3487     // the region are explicitly grayed, if below the global finger,
3488     // and we do not need in fact to scan anything else. So, we simply
3489     // set _finger to be limit to ensure that the bitmap iteration
3490     // doesn't do anything.
3491     _finger = limit;
3492   }
3493 
3494   _region_limit = limit;
3495 }
3496 
3497 void CMTask::giveup_current_region() {
3498   assert(_curr_region != NULL, "invariant");
3499   if (_cm->verbose_low()) {
3500     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3501                            _worker_id, p2i(_curr_region));
3502   }
3503   clear_region_fields();
3504 }
3505 
3506 void CMTask::clear_region_fields() {
3507   // Values for these three fields that indicate that we're not
3508   // holding on to a region.
3509   _curr_region   = NULL;
3510   _finger        = NULL;
3511   _region_limit  = NULL;
3512 }
3513 
3514 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3515   if (cm_oop_closure == NULL) {
3516     assert(_cm_oop_closure != NULL, "invariant");
3517   } else {
3518     assert(_cm_oop_closure == NULL, "invariant");
3519   }
3520   _cm_oop_closure = cm_oop_closure;
3521 }
3522 
3523 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3524   guarantee(nextMarkBitMap != NULL, "invariant");
3525 
3526   if (_cm->verbose_low()) {
3527     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3528   }
3529 
3530   _nextMarkBitMap                = nextMarkBitMap;
3531   clear_region_fields();
3532 
3533   _calls                         = 0;
3534   _elapsed_time_ms               = 0.0;
3535   _termination_time_ms           = 0.0;
3536   _termination_start_time_ms     = 0.0;
3537 
3538 #if _MARKING_STATS_
3539   _local_pushes                  = 0;
3540   _local_pops                    = 0;
3541   _local_max_size                = 0;
3542   _objs_scanned                  = 0;
3543   _global_pushes                 = 0;
3544   _global_pops                   = 0;
3545   _global_max_size               = 0;
3546   _global_transfers_to           = 0;
3547   _global_transfers_from         = 0;
3548   _regions_claimed               = 0;
3549   _objs_found_on_bitmap          = 0;
3550   _satb_buffers_processed        = 0;
3551   _steal_attempts                = 0;
3552   _steals                        = 0;
3553   _aborted                       = 0;
3554   _aborted_overflow              = 0;
3555   _aborted_cm_aborted            = 0;
3556   _aborted_yield                 = 0;
3557   _aborted_timed_out             = 0;
3558   _aborted_satb                  = 0;
3559   _aborted_termination           = 0;
3560 #endif // _MARKING_STATS_
3561 }
3562 
3563 bool CMTask::should_exit_termination() {
3564   regular_clock_call();
3565   // This is called when we are in the termination protocol. We should
3566   // quit if, for some reason, this task wants to abort or the global
3567   // stack is not empty (this means that we can get work from it).
3568   return !_cm->mark_stack_empty() || has_aborted();
3569 }
3570 
3571 void CMTask::reached_limit() {
3572   assert(_words_scanned >= _words_scanned_limit ||
3573          _refs_reached >= _refs_reached_limit ,
3574          "shouldn't have been called otherwise");
3575   regular_clock_call();
3576 }
3577 
3578 void CMTask::regular_clock_call() {
3579   if (has_aborted()) return;
3580 
3581   // First, we need to recalculate the words scanned and refs reached
3582   // limits for the next clock call.
3583   recalculate_limits();
3584 
3585   // During the regular clock call we do the following
3586 
3587   // (1) If an overflow has been flagged, then we abort.
3588   if (_cm->has_overflown()) {
3589     set_has_aborted();
3590     return;
3591   }
3592 
3593   // If we are not concurrent (i.e. we're doing remark) we don't need
3594   // to check anything else. The other steps are only needed during
3595   // the concurrent marking phase.
3596   if (!concurrent()) return;
3597 
3598   // (2) If marking has been aborted for Full GC, then we also abort.
3599   if (_cm->has_aborted()) {
3600     set_has_aborted();
3601     statsOnly( ++_aborted_cm_aborted );
3602     return;
3603   }
3604 
3605   double curr_time_ms = os::elapsedVTime() * 1000.0;
3606 
3607   // (3) If marking stats are enabled, then we update the step history.
3608 #if _MARKING_STATS_
3609   if (_words_scanned >= _words_scanned_limit) {
3610     ++_clock_due_to_scanning;
3611   }
3612   if (_refs_reached >= _refs_reached_limit) {
3613     ++_clock_due_to_marking;
3614   }
3615 
3616   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3617   _interval_start_time_ms = curr_time_ms;
3618   _all_clock_intervals_ms.add(last_interval_ms);
3619 
3620   if (_cm->verbose_medium()) {
3621       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3622                         "scanned = %d%s, refs reached = %d%s",
3623                         _worker_id, last_interval_ms,
3624                         _words_scanned,
3625                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3626                         _refs_reached,
3627                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3628   }
3629 #endif // _MARKING_STATS_
3630 
3631   // (4) We check whether we should yield. If we have to, then we abort.
3632   if (SuspendibleThreadSet::should_yield()) {
3633     // We should yield. To do this we abort the task. The caller is
3634     // responsible for yielding.
3635     set_has_aborted();
3636     statsOnly( ++_aborted_yield );
3637     return;
3638   }
3639 
3640   // (5) We check whether we've reached our time quota. If we have,
3641   // then we abort.
3642   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3643   if (elapsed_time_ms > _time_target_ms) {
3644     set_has_aborted();
3645     _has_timed_out = true;
3646     statsOnly( ++_aborted_timed_out );
3647     return;
3648   }
3649 
3650   // (6) Finally, we check whether there are enough completed STAB
3651   // buffers available for processing. If there are, we abort.
3652   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3653   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3654     if (_cm->verbose_low()) {
3655       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3656                              _worker_id);
3657     }
3658     // we do need to process SATB buffers, we'll abort and restart
3659     // the marking task to do so
3660     set_has_aborted();
3661     statsOnly( ++_aborted_satb );
3662     return;
3663   }
3664 }
3665 
3666 void CMTask::recalculate_limits() {
3667   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3668   _words_scanned_limit      = _real_words_scanned_limit;
3669 
3670   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3671   _refs_reached_limit       = _real_refs_reached_limit;
3672 }
3673 
3674 void CMTask::decrease_limits() {
3675   // This is called when we believe that we're going to do an infrequent
3676   // operation which will increase the per byte scanned cost (i.e. move
3677   // entries to/from the global stack). It basically tries to decrease the
3678   // scanning limit so that the clock is called earlier.
3679 
3680   if (_cm->verbose_medium()) {
3681     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3682   }
3683 
3684   _words_scanned_limit = _real_words_scanned_limit -
3685     3 * words_scanned_period / 4;
3686   _refs_reached_limit  = _real_refs_reached_limit -
3687     3 * refs_reached_period / 4;
3688 }
3689 
3690 void CMTask::move_entries_to_global_stack() {
3691   // local array where we'll store the entries that will be popped
3692   // from the local queue
3693   oop buffer[global_stack_transfer_size];
3694 
3695   int n = 0;
3696   oop obj;
3697   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3698     buffer[n] = obj;
3699     ++n;
3700   }
3701 
3702   if (n > 0) {
3703     // we popped at least one entry from the local queue
3704 
3705     statsOnly( ++_global_transfers_to; _local_pops += n );
3706 
3707     if (!_cm->mark_stack_push(buffer, n)) {
3708       if (_cm->verbose_low()) {
3709         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3710                                _worker_id);
3711       }
3712       set_has_aborted();
3713     } else {
3714       // the transfer was successful
3715 
3716       if (_cm->verbose_medium()) {
3717         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3718                                _worker_id, n);
3719       }
3720       statsOnly( int tmp_size = _cm->mark_stack_size();
3721                  if (tmp_size > _global_max_size) {
3722                    _global_max_size = tmp_size;
3723                  }
3724                  _global_pushes += n );
3725     }
3726   }
3727 
3728   // this operation was quite expensive, so decrease the limits
3729   decrease_limits();
3730 }
3731 
3732 void CMTask::get_entries_from_global_stack() {
3733   // local array where we'll store the entries that will be popped
3734   // from the global stack.
3735   oop buffer[global_stack_transfer_size];
3736   int n;
3737   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3738   assert(n <= global_stack_transfer_size,
3739          "we should not pop more than the given limit");
3740   if (n > 0) {
3741     // yes, we did actually pop at least one entry
3742 
3743     statsOnly( ++_global_transfers_from; _global_pops += n );
3744     if (_cm->verbose_medium()) {
3745       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3746                              _worker_id, n);
3747     }
3748     for (int i = 0; i < n; ++i) {
3749       bool success = _task_queue->push(buffer[i]);
3750       // We only call this when the local queue is empty or under a
3751       // given target limit. So, we do not expect this push to fail.
3752       assert(success, "invariant");
3753     }
3754 
3755     statsOnly( int tmp_size = _task_queue->size();
3756                if (tmp_size > _local_max_size) {
3757                  _local_max_size = tmp_size;
3758                }
3759                _local_pushes += n );
3760   }
3761 
3762   // this operation was quite expensive, so decrease the limits
3763   decrease_limits();
3764 }
3765 
3766 void CMTask::drain_local_queue(bool partially) {
3767   if (has_aborted()) return;
3768 
3769   // Decide what the target size is, depending whether we're going to
3770   // drain it partially (so that other tasks can steal if they run out
3771   // of things to do) or totally (at the very end).
3772   size_t target_size;
3773   if (partially) {
3774     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3775   } else {
3776     target_size = 0;
3777   }
3778 
3779   if (_task_queue->size() > target_size) {
3780     if (_cm->verbose_high()) {
3781       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3782                              _worker_id, target_size);
3783     }
3784 
3785     oop obj;
3786     bool ret = _task_queue->pop_local(obj);
3787     while (ret) {
3788       statsOnly( ++_local_pops );
3789 
3790       if (_cm->verbose_high()) {
3791         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3792                                p2i((void*) obj));
3793       }
3794 
3795       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3796       assert(!_g1h->is_on_master_free_list(
3797                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3798 
3799       scan_object(obj);
3800 
3801       if (_task_queue->size() <= target_size || has_aborted()) {
3802         ret = false;
3803       } else {
3804         ret = _task_queue->pop_local(obj);
3805       }
3806     }
3807 
3808     if (_cm->verbose_high()) {
3809       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3810                              _worker_id, _task_queue->size());
3811     }
3812   }
3813 }
3814 
3815 void CMTask::drain_global_stack(bool partially) {
3816   if (has_aborted()) return;
3817 
3818   // We have a policy to drain the local queue before we attempt to
3819   // drain the global stack.
3820   assert(partially || _task_queue->size() == 0, "invariant");
3821 
3822   // Decide what the target size is, depending whether we're going to
3823   // drain it partially (so that other tasks can steal if they run out
3824   // of things to do) or totally (at the very end).  Notice that,
3825   // because we move entries from the global stack in chunks or
3826   // because another task might be doing the same, we might in fact
3827   // drop below the target. But, this is not a problem.
3828   size_t target_size;
3829   if (partially) {
3830     target_size = _cm->partial_mark_stack_size_target();
3831   } else {
3832     target_size = 0;
3833   }
3834 
3835   if (_cm->mark_stack_size() > target_size) {
3836     if (_cm->verbose_low()) {
3837       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3838                              _worker_id, target_size);
3839     }
3840 
3841     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3842       get_entries_from_global_stack();
3843       drain_local_queue(partially);
3844     }
3845 
3846     if (_cm->verbose_low()) {
3847       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3848                              _worker_id, _cm->mark_stack_size());
3849     }
3850   }
3851 }
3852 
3853 // SATB Queue has several assumptions on whether to call the par or
3854 // non-par versions of the methods. this is why some of the code is
3855 // replicated. We should really get rid of the single-threaded version
3856 // of the code to simplify things.
3857 void CMTask::drain_satb_buffers() {
3858   if (has_aborted()) return;
3859 
3860   // We set this so that the regular clock knows that we're in the
3861   // middle of draining buffers and doesn't set the abort flag when it
3862   // notices that SATB buffers are available for draining. It'd be
3863   // very counter productive if it did that. :-)
3864   _draining_satb_buffers = true;
3865 
3866   CMObjectClosure oc(this);
3867   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3868   if (G1CollectedHeap::use_parallel_gc_threads()) {
3869     satb_mq_set.set_par_closure(_worker_id, &oc);
3870   } else {
3871     satb_mq_set.set_closure(&oc);
3872   }
3873 
3874   // This keeps claiming and applying the closure to completed buffers
3875   // until we run out of buffers or we need to abort.
3876   if (G1CollectedHeap::use_parallel_gc_threads()) {
3877     while (!has_aborted() &&
3878            satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3879       if (_cm->verbose_medium()) {
3880         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3881       }
3882       statsOnly( ++_satb_buffers_processed );
3883       regular_clock_call();
3884     }
3885   } else {
3886     while (!has_aborted() &&
3887            satb_mq_set.apply_closure_to_completed_buffer()) {
3888       if (_cm->verbose_medium()) {
3889         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3890       }
3891       statsOnly( ++_satb_buffers_processed );
3892       regular_clock_call();
3893     }
3894   }
3895 
3896   if (!concurrent() && !has_aborted()) {
3897     // We should only do this during remark.
3898     if (G1CollectedHeap::use_parallel_gc_threads()) {
3899       satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3900     } else {
3901       satb_mq_set.iterate_closure_all_threads();
3902     }
3903   }
3904 
3905   _draining_satb_buffers = false;
3906 
3907   assert(has_aborted() ||
3908          concurrent() ||
3909          satb_mq_set.completed_buffers_num() == 0, "invariant");
3910 
3911   if (G1CollectedHeap::use_parallel_gc_threads()) {
3912     satb_mq_set.set_par_closure(_worker_id, NULL);
3913   } else {
3914     satb_mq_set.set_closure(NULL);
3915   }
3916 
3917   // again, this was a potentially expensive operation, decrease the
3918   // limits to get the regular clock call early
3919   decrease_limits();
3920 }
3921 
3922 void CMTask::print_stats() {
3923   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3924                          _worker_id, _calls);
3925   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3926                          _elapsed_time_ms, _termination_time_ms);
3927   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3928                          _step_times_ms.num(), _step_times_ms.avg(),
3929                          _step_times_ms.sd());
3930   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3931                          _step_times_ms.maximum(), _step_times_ms.sum());
3932 
3933 #if _MARKING_STATS_
3934   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3935                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3936                          _all_clock_intervals_ms.sd());
3937   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3938                          _all_clock_intervals_ms.maximum(),
3939                          _all_clock_intervals_ms.sum());
3940   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3941                          _clock_due_to_scanning, _clock_due_to_marking);
3942   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3943                          _objs_scanned, _objs_found_on_bitmap);
3944   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3945                          _local_pushes, _local_pops, _local_max_size);
3946   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3947                          _global_pushes, _global_pops, _global_max_size);
3948   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3949                          _global_transfers_to,_global_transfers_from);
3950   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3951   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3952   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3953                          _steal_attempts, _steals);
3954   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3955   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3956                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3957   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3958                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3959 #endif // _MARKING_STATS_
3960 }
3961 
3962 /*****************************************************************************
3963 
3964     The do_marking_step(time_target_ms, ...) method is the building
3965     block of the parallel marking framework. It can be called in parallel
3966     with other invocations of do_marking_step() on different tasks
3967     (but only one per task, obviously) and concurrently with the
3968     mutator threads, or during remark, hence it eliminates the need
3969     for two versions of the code. When called during remark, it will
3970     pick up from where the task left off during the concurrent marking
3971     phase. Interestingly, tasks are also claimable during evacuation
3972     pauses too, since do_marking_step() ensures that it aborts before
3973     it needs to yield.
3974 
3975     The data structures that it uses to do marking work are the
3976     following:
3977 
3978       (1) Marking Bitmap. If there are gray objects that appear only
3979       on the bitmap (this happens either when dealing with an overflow
3980       or when the initial marking phase has simply marked the roots
3981       and didn't push them on the stack), then tasks claim heap
3982       regions whose bitmap they then scan to find gray objects. A
3983       global finger indicates where the end of the last claimed region
3984       is. A local finger indicates how far into the region a task has
3985       scanned. The two fingers are used to determine how to gray an
3986       object (i.e. whether simply marking it is OK, as it will be
3987       visited by a task in the future, or whether it needs to be also
3988       pushed on a stack).
3989 
3990       (2) Local Queue. The local queue of the task which is accessed
3991       reasonably efficiently by the task. Other tasks can steal from
3992       it when they run out of work. Throughout the marking phase, a
3993       task attempts to keep its local queue short but not totally
3994       empty, so that entries are available for stealing by other
3995       tasks. Only when there is no more work, a task will totally
3996       drain its local queue.
3997 
3998       (3) Global Mark Stack. This handles local queue overflow. During
3999       marking only sets of entries are moved between it and the local
4000       queues, as access to it requires a mutex and more fine-grain
4001       interaction with it which might cause contention. If it
4002       overflows, then the marking phase should restart and iterate
4003       over the bitmap to identify gray objects. Throughout the marking
4004       phase, tasks attempt to keep the global mark stack at a small
4005       length but not totally empty, so that entries are available for
4006       popping by other tasks. Only when there is no more work, tasks
4007       will totally drain the global mark stack.
4008 
4009       (4) SATB Buffer Queue. This is where completed SATB buffers are
4010       made available. Buffers are regularly removed from this queue
4011       and scanned for roots, so that the queue doesn't get too
4012       long. During remark, all completed buffers are processed, as
4013       well as the filled in parts of any uncompleted buffers.
4014 
4015     The do_marking_step() method tries to abort when the time target
4016     has been reached. There are a few other cases when the
4017     do_marking_step() method also aborts:
4018 
4019       (1) When the marking phase has been aborted (after a Full GC).
4020 
4021       (2) When a global overflow (on the global stack) has been
4022       triggered. Before the task aborts, it will actually sync up with
4023       the other tasks to ensure that all the marking data structures
4024       (local queues, stacks, fingers etc.)  are re-initialized so that
4025       when do_marking_step() completes, the marking phase can
4026       immediately restart.
4027 
4028       (3) When enough completed SATB buffers are available. The
4029       do_marking_step() method only tries to drain SATB buffers right
4030       at the beginning. So, if enough buffers are available, the
4031       marking step aborts and the SATB buffers are processed at
4032       the beginning of the next invocation.
4033 
4034       (4) To yield. when we have to yield then we abort and yield
4035       right at the end of do_marking_step(). This saves us from a lot
4036       of hassle as, by yielding we might allow a Full GC. If this
4037       happens then objects will be compacted underneath our feet, the
4038       heap might shrink, etc. We save checking for this by just
4039       aborting and doing the yield right at the end.
4040 
4041     From the above it follows that the do_marking_step() method should
4042     be called in a loop (or, otherwise, regularly) until it completes.
4043 
4044     If a marking step completes without its has_aborted() flag being
4045     true, it means it has completed the current marking phase (and
4046     also all other marking tasks have done so and have all synced up).
4047 
4048     A method called regular_clock_call() is invoked "regularly" (in
4049     sub ms intervals) throughout marking. It is this clock method that
4050     checks all the abort conditions which were mentioned above and
4051     decides when the task should abort. A work-based scheme is used to
4052     trigger this clock method: when the number of object words the
4053     marking phase has scanned or the number of references the marking
4054     phase has visited reach a given limit. Additional invocations to
4055     the method clock have been planted in a few other strategic places
4056     too. The initial reason for the clock method was to avoid calling
4057     vtime too regularly, as it is quite expensive. So, once it was in
4058     place, it was natural to piggy-back all the other conditions on it
4059     too and not constantly check them throughout the code.
4060 
4061     If do_termination is true then do_marking_step will enter its
4062     termination protocol.
4063 
4064     The value of is_serial must be true when do_marking_step is being
4065     called serially (i.e. by the VMThread) and do_marking_step should
4066     skip any synchronization in the termination and overflow code.
4067     Examples include the serial remark code and the serial reference
4068     processing closures.
4069 
4070     The value of is_serial must be false when do_marking_step is
4071     being called by any of the worker threads in a work gang.
4072     Examples include the concurrent marking code (CMMarkingTask),
4073     the MT remark code, and the MT reference processing closures.
4074 
4075  *****************************************************************************/
4076 
4077 void CMTask::do_marking_step(double time_target_ms,
4078                              bool do_termination,
4079                              bool is_serial) {
4080   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4081   assert(concurrent() == _cm->concurrent(), "they should be the same");
4082 
4083   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4084   assert(_task_queues != NULL, "invariant");
4085   assert(_task_queue != NULL, "invariant");
4086   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4087 
4088   assert(!_claimed,
4089          "only one thread should claim this task at any one time");
4090 
4091   // OK, this doesn't safeguard again all possible scenarios, as it is
4092   // possible for two threads to set the _claimed flag at the same
4093   // time. But it is only for debugging purposes anyway and it will
4094   // catch most problems.
4095   _claimed = true;
4096 
4097   _start_time_ms = os::elapsedVTime() * 1000.0;
4098   statsOnly( _interval_start_time_ms = _start_time_ms );
4099 
4100   // If do_stealing is true then do_marking_step will attempt to
4101   // steal work from the other CMTasks. It only makes sense to
4102   // enable stealing when the termination protocol is enabled
4103   // and do_marking_step() is not being called serially.
4104   bool do_stealing = do_termination && !is_serial;
4105 
4106   double diff_prediction_ms =
4107     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4108   _time_target_ms = time_target_ms - diff_prediction_ms;
4109 
4110   // set up the variables that are used in the work-based scheme to
4111   // call the regular clock method
4112   _words_scanned = 0;
4113   _refs_reached  = 0;
4114   recalculate_limits();
4115 
4116   // clear all flags
4117   clear_has_aborted();
4118   _has_timed_out = false;
4119   _draining_satb_buffers = false;
4120 
4121   ++_calls;
4122 
4123   if (_cm->verbose_low()) {
4124     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4125                            "target = %1.2lfms >>>>>>>>>>",
4126                            _worker_id, _calls, _time_target_ms);
4127   }
4128 
4129   // Set up the bitmap and oop closures. Anything that uses them is
4130   // eventually called from this method, so it is OK to allocate these
4131   // statically.
4132   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4133   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4134   set_cm_oop_closure(&cm_oop_closure);
4135 
4136   if (_cm->has_overflown()) {
4137     // This can happen if the mark stack overflows during a GC pause
4138     // and this task, after a yield point, restarts. We have to abort
4139     // as we need to get into the overflow protocol which happens
4140     // right at the end of this task.
4141     set_has_aborted();
4142   }
4143 
4144   // First drain any available SATB buffers. After this, we will not
4145   // look at SATB buffers before the next invocation of this method.
4146   // If enough completed SATB buffers are queued up, the regular clock
4147   // will abort this task so that it restarts.
4148   drain_satb_buffers();
4149   // ...then partially drain the local queue and the global stack
4150   drain_local_queue(true);
4151   drain_global_stack(true);
4152 
4153   do {
4154     if (!has_aborted() && _curr_region != NULL) {
4155       // This means that we're already holding on to a region.
4156       assert(_finger != NULL, "if region is not NULL, then the finger "
4157              "should not be NULL either");
4158 
4159       // We might have restarted this task after an evacuation pause
4160       // which might have evacuated the region we're holding on to
4161       // underneath our feet. Let's read its limit again to make sure
4162       // that we do not iterate over a region of the heap that
4163       // contains garbage (update_region_limit() will also move
4164       // _finger to the start of the region if it is found empty).
4165       update_region_limit();
4166       // We will start from _finger not from the start of the region,
4167       // as we might be restarting this task after aborting half-way
4168       // through scanning this region. In this case, _finger points to
4169       // the address where we last found a marked object. If this is a
4170       // fresh region, _finger points to start().
4171       MemRegion mr = MemRegion(_finger, _region_limit);
4172 
4173       if (_cm->verbose_low()) {
4174         gclog_or_tty->print_cr("[%u] we're scanning part "
4175                                "["PTR_FORMAT", "PTR_FORMAT") "
4176                                "of region "HR_FORMAT,
4177                                _worker_id, p2i(_finger), p2i(_region_limit),
4178                                HR_FORMAT_PARAMS(_curr_region));
4179       }
4180 
4181       assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4182              "humongous regions should go around loop once only");
4183 
4184       // Some special cases:
4185       // If the memory region is empty, we can just give up the region.
4186       // If the current region is humongous then we only need to check
4187       // the bitmap for the bit associated with the start of the object,
4188       // scan the object if it's live, and give up the region.
4189       // Otherwise, let's iterate over the bitmap of the part of the region
4190       // that is left.
4191       // If the iteration is successful, give up the region.
4192       if (mr.is_empty()) {
4193         giveup_current_region();
4194         regular_clock_call();
4195       } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4196         if (_nextMarkBitMap->isMarked(mr.start())) {
4197           // The object is marked - apply the closure
4198           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4199           bitmap_closure.do_bit(offset);
4200         }
4201         // Even if this task aborted while scanning the humongous object
4202         // we can (and should) give up the current region.
4203         giveup_current_region();
4204         regular_clock_call();
4205       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4206         giveup_current_region();
4207         regular_clock_call();
4208       } else {
4209         assert(has_aborted(), "currently the only way to do so");
4210         // The only way to abort the bitmap iteration is to return
4211         // false from the do_bit() method. However, inside the
4212         // do_bit() method we move the _finger to point to the
4213         // object currently being looked at. So, if we bail out, we
4214         // have definitely set _finger to something non-null.
4215         assert(_finger != NULL, "invariant");
4216 
4217         // Region iteration was actually aborted. So now _finger
4218         // points to the address of the object we last scanned. If we
4219         // leave it there, when we restart this task, we will rescan
4220         // the object. It is easy to avoid this. We move the finger by
4221         // enough to point to the next possible object header (the
4222         // bitmap knows by how much we need to move it as it knows its
4223         // granularity).
4224         assert(_finger < _region_limit, "invariant");
4225         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4226         // Check if bitmap iteration was aborted while scanning the last object
4227         if (new_finger >= _region_limit) {
4228           giveup_current_region();
4229         } else {
4230           move_finger_to(new_finger);
4231         }
4232       }
4233     }
4234     // At this point we have either completed iterating over the
4235     // region we were holding on to, or we have aborted.
4236 
4237     // We then partially drain the local queue and the global stack.
4238     // (Do we really need this?)
4239     drain_local_queue(true);
4240     drain_global_stack(true);
4241 
4242     // Read the note on the claim_region() method on why it might
4243     // return NULL with potentially more regions available for
4244     // claiming and why we have to check out_of_regions() to determine
4245     // whether we're done or not.
4246     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4247       // We are going to try to claim a new region. We should have
4248       // given up on the previous one.
4249       // Separated the asserts so that we know which one fires.
4250       assert(_curr_region  == NULL, "invariant");
4251       assert(_finger       == NULL, "invariant");
4252       assert(_region_limit == NULL, "invariant");
4253       if (_cm->verbose_low()) {
4254         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4255       }
4256       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4257       if (claimed_region != NULL) {
4258         // Yes, we managed to claim one
4259         statsOnly( ++_regions_claimed );
4260 
4261         if (_cm->verbose_low()) {
4262           gclog_or_tty->print_cr("[%u] we successfully claimed "
4263                                  "region "PTR_FORMAT,
4264                                  _worker_id, p2i(claimed_region));
4265         }
4266 
4267         setup_for_region(claimed_region);
4268         assert(_curr_region == claimed_region, "invariant");
4269       }
4270       // It is important to call the regular clock here. It might take
4271       // a while to claim a region if, for example, we hit a large
4272       // block of empty regions. So we need to call the regular clock
4273       // method once round the loop to make sure it's called
4274       // frequently enough.
4275       regular_clock_call();
4276     }
4277 
4278     if (!has_aborted() && _curr_region == NULL) {
4279       assert(_cm->out_of_regions(),
4280              "at this point we should be out of regions");
4281     }
4282   } while ( _curr_region != NULL && !has_aborted());
4283 
4284   if (!has_aborted()) {
4285     // We cannot check whether the global stack is empty, since other
4286     // tasks might be pushing objects to it concurrently.
4287     assert(_cm->out_of_regions(),
4288            "at this point we should be out of regions");
4289 
4290     if (_cm->verbose_low()) {
4291       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4292     }
4293 
4294     // Try to reduce the number of available SATB buffers so that
4295     // remark has less work to do.
4296     drain_satb_buffers();
4297   }
4298 
4299   // Since we've done everything else, we can now totally drain the
4300   // local queue and global stack.
4301   drain_local_queue(false);
4302   drain_global_stack(false);
4303 
4304   // Attempt at work stealing from other task's queues.
4305   if (do_stealing && !has_aborted()) {
4306     // We have not aborted. This means that we have finished all that
4307     // we could. Let's try to do some stealing...
4308 
4309     // We cannot check whether the global stack is empty, since other
4310     // tasks might be pushing objects to it concurrently.
4311     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4312            "only way to reach here");
4313 
4314     if (_cm->verbose_low()) {
4315       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4316     }
4317 
4318     while (!has_aborted()) {
4319       oop obj;
4320       statsOnly( ++_steal_attempts );
4321 
4322       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4323         if (_cm->verbose_medium()) {
4324           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4325                                  _worker_id, p2i((void*) obj));
4326         }
4327 
4328         statsOnly( ++_steals );
4329 
4330         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4331                "any stolen object should be marked");
4332         scan_object(obj);
4333 
4334         // And since we're towards the end, let's totally drain the
4335         // local queue and global stack.
4336         drain_local_queue(false);
4337         drain_global_stack(false);
4338       } else {
4339         break;
4340       }
4341     }
4342   }
4343 
4344   // If we are about to wrap up and go into termination, check if we
4345   // should raise the overflow flag.
4346   if (do_termination && !has_aborted()) {
4347     if (_cm->force_overflow()->should_force()) {
4348       _cm->set_has_overflown();
4349       regular_clock_call();
4350     }
4351   }
4352 
4353   // We still haven't aborted. Now, let's try to get into the
4354   // termination protocol.
4355   if (do_termination && !has_aborted()) {
4356     // We cannot check whether the global stack is empty, since other
4357     // tasks might be concurrently pushing objects on it.
4358     // Separated the asserts so that we know which one fires.
4359     assert(_cm->out_of_regions(), "only way to reach here");
4360     assert(_task_queue->size() == 0, "only way to reach here");
4361 
4362     if (_cm->verbose_low()) {
4363       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4364     }
4365 
4366     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4367 
4368     // The CMTask class also extends the TerminatorTerminator class,
4369     // hence its should_exit_termination() method will also decide
4370     // whether to exit the termination protocol or not.
4371     bool finished = (is_serial ||
4372                      _cm->terminator()->offer_termination(this));
4373     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4374     _termination_time_ms +=
4375       termination_end_time_ms - _termination_start_time_ms;
4376 
4377     if (finished) {
4378       // We're all done.
4379 
4380       if (_worker_id == 0) {
4381         // let's allow task 0 to do this
4382         if (concurrent()) {
4383           assert(_cm->concurrent_marking_in_progress(), "invariant");
4384           // we need to set this to false before the next
4385           // safepoint. This way we ensure that the marking phase
4386           // doesn't observe any more heap expansions.
4387           _cm->clear_concurrent_marking_in_progress();
4388         }
4389       }
4390 
4391       // We can now guarantee that the global stack is empty, since
4392       // all other tasks have finished. We separated the guarantees so
4393       // that, if a condition is false, we can immediately find out
4394       // which one.
4395       guarantee(_cm->out_of_regions(), "only way to reach here");
4396       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4397       guarantee(_task_queue->size() == 0, "only way to reach here");
4398       guarantee(!_cm->has_overflown(), "only way to reach here");
4399       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4400 
4401       if (_cm->verbose_low()) {
4402         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4403       }
4404     } else {
4405       // Apparently there's more work to do. Let's abort this task. It
4406       // will restart it and we can hopefully find more things to do.
4407 
4408       if (_cm->verbose_low()) {
4409         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4410                                _worker_id);
4411       }
4412 
4413       set_has_aborted();
4414       statsOnly( ++_aborted_termination );
4415     }
4416   }
4417 
4418   // Mainly for debugging purposes to make sure that a pointer to the
4419   // closure which was statically allocated in this frame doesn't
4420   // escape it by accident.
4421   set_cm_oop_closure(NULL);
4422   double end_time_ms = os::elapsedVTime() * 1000.0;
4423   double elapsed_time_ms = end_time_ms - _start_time_ms;
4424   // Update the step history.
4425   _step_times_ms.add(elapsed_time_ms);
4426 
4427   if (has_aborted()) {
4428     // The task was aborted for some reason.
4429 
4430     statsOnly( ++_aborted );
4431 
4432     if (_has_timed_out) {
4433       double diff_ms = elapsed_time_ms - _time_target_ms;
4434       // Keep statistics of how well we did with respect to hitting
4435       // our target only if we actually timed out (if we aborted for
4436       // other reasons, then the results might get skewed).
4437       _marking_step_diffs_ms.add(diff_ms);
4438     }
4439 
4440     if (_cm->has_overflown()) {
4441       // This is the interesting one. We aborted because a global
4442       // overflow was raised. This means we have to restart the
4443       // marking phase and start iterating over regions. However, in
4444       // order to do this we have to make sure that all tasks stop
4445       // what they are doing and re-initialize in a safe manner. We
4446       // will achieve this with the use of two barrier sync points.
4447 
4448       if (_cm->verbose_low()) {
4449         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4450       }
4451 
4452       if (!is_serial) {
4453         // We only need to enter the sync barrier if being called
4454         // from a parallel context
4455         _cm->enter_first_sync_barrier(_worker_id);
4456 
4457         // When we exit this sync barrier we know that all tasks have
4458         // stopped doing marking work. So, it's now safe to
4459         // re-initialize our data structures. At the end of this method,
4460         // task 0 will clear the global data structures.
4461       }
4462 
4463       statsOnly( ++_aborted_overflow );
4464 
4465       // We clear the local state of this task...
4466       clear_region_fields();
4467 
4468       if (!is_serial) {
4469         // ...and enter the second barrier.
4470         _cm->enter_second_sync_barrier(_worker_id);
4471       }
4472       // At this point, if we're during the concurrent phase of
4473       // marking, everything has been re-initialized and we're
4474       // ready to restart.
4475     }
4476 
4477     if (_cm->verbose_low()) {
4478       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4479                              "elapsed = %1.2lfms <<<<<<<<<<",
4480                              _worker_id, _time_target_ms, elapsed_time_ms);
4481       if (_cm->has_aborted()) {
4482         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4483                                _worker_id);
4484       }
4485     }
4486   } else {
4487     if (_cm->verbose_low()) {
4488       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4489                              "elapsed = %1.2lfms <<<<<<<<<<",
4490                              _worker_id, _time_target_ms, elapsed_time_ms);
4491     }
4492   }
4493 
4494   _claimed = false;
4495 }
4496 
4497 CMTask::CMTask(uint worker_id,
4498                ConcurrentMark* cm,
4499                size_t* marked_bytes,
4500                BitMap* card_bm,
4501                CMTaskQueue* task_queue,
4502                CMTaskQueueSet* task_queues)
4503   : _g1h(G1CollectedHeap::heap()),
4504     _worker_id(worker_id), _cm(cm),
4505     _claimed(false),
4506     _nextMarkBitMap(NULL), _hash_seed(17),
4507     _task_queue(task_queue),
4508     _task_queues(task_queues),
4509     _cm_oop_closure(NULL),
4510     _marked_bytes_array(marked_bytes),
4511     _card_bm(card_bm) {
4512   guarantee(task_queue != NULL, "invariant");
4513   guarantee(task_queues != NULL, "invariant");
4514 
4515   statsOnly( _clock_due_to_scanning = 0;
4516              _clock_due_to_marking  = 0 );
4517 
4518   _marking_step_diffs_ms.add(0.5);
4519 }
4520 
4521 // These are formatting macros that are used below to ensure
4522 // consistent formatting. The *_H_* versions are used to format the
4523 // header for a particular value and they should be kept consistent
4524 // with the corresponding macro. Also note that most of the macros add
4525 // the necessary white space (as a prefix) which makes them a bit
4526 // easier to compose.
4527 
4528 // All the output lines are prefixed with this string to be able to
4529 // identify them easily in a large log file.
4530 #define G1PPRL_LINE_PREFIX            "###"
4531 
4532 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4533 #ifdef _LP64
4534 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4535 #else // _LP64
4536 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4537 #endif // _LP64
4538 
4539 // For per-region info
4540 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4541 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4542 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4543 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4544 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4545 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4546 
4547 // For summary info
4548 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4549 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4550 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4551 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4552 
4553 G1PrintRegionLivenessInfoClosure::
4554 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4555   : _out(out),
4556     _total_used_bytes(0), _total_capacity_bytes(0),
4557     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4558     _hum_used_bytes(0), _hum_capacity_bytes(0),
4559     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4560     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4561   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4562   MemRegion g1_committed = g1h->g1_committed();
4563   MemRegion g1_reserved = g1h->g1_reserved();
4564   double now = os::elapsedTime();
4565 
4566   // Print the header of the output.
4567   _out->cr();
4568   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4569   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4570                  G1PPRL_SUM_ADDR_FORMAT("committed")
4571                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4572                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4573                  p2i(g1_committed.start()), p2i(g1_committed.end()),
4574                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4575                  HeapRegion::GrainBytes);
4576   _out->print_cr(G1PPRL_LINE_PREFIX);
4577   _out->print_cr(G1PPRL_LINE_PREFIX
4578                 G1PPRL_TYPE_H_FORMAT
4579                 G1PPRL_ADDR_BASE_H_FORMAT
4580                 G1PPRL_BYTE_H_FORMAT
4581                 G1PPRL_BYTE_H_FORMAT
4582                 G1PPRL_BYTE_H_FORMAT
4583                 G1PPRL_DOUBLE_H_FORMAT
4584                 G1PPRL_BYTE_H_FORMAT
4585                 G1PPRL_BYTE_H_FORMAT,
4586                 "type", "address-range",
4587                 "used", "prev-live", "next-live", "gc-eff",
4588                 "remset", "code-roots");
4589   _out->print_cr(G1PPRL_LINE_PREFIX
4590                 G1PPRL_TYPE_H_FORMAT
4591                 G1PPRL_ADDR_BASE_H_FORMAT
4592                 G1PPRL_BYTE_H_FORMAT
4593                 G1PPRL_BYTE_H_FORMAT
4594                 G1PPRL_BYTE_H_FORMAT
4595                 G1PPRL_DOUBLE_H_FORMAT
4596                 G1PPRL_BYTE_H_FORMAT
4597                 G1PPRL_BYTE_H_FORMAT,
4598                 "", "",
4599                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4600                 "(bytes)", "(bytes)");
4601 }
4602 
4603 // It takes as a parameter a reference to one of the _hum_* fields, it
4604 // deduces the corresponding value for a region in a humongous region
4605 // series (either the region size, or what's left if the _hum_* field
4606 // is < the region size), and updates the _hum_* field accordingly.
4607 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4608   size_t bytes = 0;
4609   // The > 0 check is to deal with the prev and next live bytes which
4610   // could be 0.
4611   if (*hum_bytes > 0) {
4612     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4613     *hum_bytes -= bytes;
4614   }
4615   return bytes;
4616 }
4617 
4618 // It deduces the values for a region in a humongous region series
4619 // from the _hum_* fields and updates those accordingly. It assumes
4620 // that that _hum_* fields have already been set up from the "starts
4621 // humongous" region and we visit the regions in address order.
4622 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4623                                                      size_t* capacity_bytes,
4624                                                      size_t* prev_live_bytes,
4625                                                      size_t* next_live_bytes) {
4626   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4627   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4628   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4629   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4630   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4631 }
4632 
4633 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4634   const char* type = "";
4635   HeapWord* bottom       = r->bottom();
4636   HeapWord* end          = r->end();
4637   size_t capacity_bytes  = r->capacity();
4638   size_t used_bytes      = r->used();
4639   size_t prev_live_bytes = r->live_bytes();
4640   size_t next_live_bytes = r->next_live_bytes();
4641   double gc_eff          = r->gc_efficiency();
4642   size_t remset_bytes    = r->rem_set()->mem_size();
4643   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4644 
4645   if (r->used() == 0) {
4646     type = "FREE";
4647   } else if (r->is_survivor()) {
4648     type = "SURV";
4649   } else if (r->is_young()) {
4650     type = "EDEN";
4651   } else if (r->startsHumongous()) {
4652     type = "HUMS";
4653 
4654     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4655            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4656            "they should have been zeroed after the last time we used them");
4657     // Set up the _hum_* fields.
4658     _hum_capacity_bytes  = capacity_bytes;
4659     _hum_used_bytes      = used_bytes;
4660     _hum_prev_live_bytes = prev_live_bytes;
4661     _hum_next_live_bytes = next_live_bytes;
4662     get_hum_bytes(&used_bytes, &capacity_bytes,
4663                   &prev_live_bytes, &next_live_bytes);
4664     end = bottom + HeapRegion::GrainWords;
4665   } else if (r->continuesHumongous()) {
4666     type = "HUMC";
4667     get_hum_bytes(&used_bytes, &capacity_bytes,
4668                   &prev_live_bytes, &next_live_bytes);
4669     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4670   } else {
4671     type = "OLD";
4672   }
4673 
4674   _total_used_bytes      += used_bytes;
4675   _total_capacity_bytes  += capacity_bytes;
4676   _total_prev_live_bytes += prev_live_bytes;
4677   _total_next_live_bytes += next_live_bytes;
4678   _total_remset_bytes    += remset_bytes;
4679   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4680 
4681   // Print a line for this particular region.
4682   _out->print_cr(G1PPRL_LINE_PREFIX
4683                  G1PPRL_TYPE_FORMAT
4684                  G1PPRL_ADDR_BASE_FORMAT
4685                  G1PPRL_BYTE_FORMAT
4686                  G1PPRL_BYTE_FORMAT
4687                  G1PPRL_BYTE_FORMAT
4688                  G1PPRL_DOUBLE_FORMAT
4689                  G1PPRL_BYTE_FORMAT
4690                  G1PPRL_BYTE_FORMAT,
4691                  type, p2i(bottom), p2i(end),
4692                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4693                  remset_bytes, strong_code_roots_bytes);
4694 
4695   return false;
4696 }
4697 
4698 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4699   // add static memory usages to remembered set sizes
4700   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4701   // Print the footer of the output.
4702   _out->print_cr(G1PPRL_LINE_PREFIX);
4703   _out->print_cr(G1PPRL_LINE_PREFIX
4704                  " SUMMARY"
4705                  G1PPRL_SUM_MB_FORMAT("capacity")
4706                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4707                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4708                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4709                  G1PPRL_SUM_MB_FORMAT("remset")
4710                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4711                  bytes_to_mb(_total_capacity_bytes),
4712                  bytes_to_mb(_total_used_bytes),
4713                  perc(_total_used_bytes, _total_capacity_bytes),
4714                  bytes_to_mb(_total_prev_live_bytes),
4715                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4716                  bytes_to_mb(_total_next_live_bytes),
4717                  perc(_total_next_live_bytes, _total_capacity_bytes),
4718                  bytes_to_mb(_total_remset_bytes),
4719                  bytes_to_mb(_total_strong_code_roots_bytes));
4720   _out->cr();
4721 }