1 /*
2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "classfile/symbolTable.hpp"
27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
32 #include "gc_implementation/g1/g1Log.hpp"
33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
34 #include "gc_implementation/g1/g1RemSet.hpp"
35 #include "gc_implementation/g1/heapRegion.inline.hpp"
36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
38 #include "gc_implementation/shared/vmGCOperations.hpp"
39 #include "gc_implementation/shared/gcTimer.hpp"
40 #include "gc_implementation/shared/gcTrace.hpp"
41 #include "gc_implementation/shared/gcTraceTime.hpp"
42 #include "memory/genOopClosures.inline.hpp"
43 #include "memory/referencePolicy.hpp"
44 #include "memory/resourceArea.hpp"
45 #include "oops/oop.inline.hpp"
46 #include "runtime/handles.inline.hpp"
47 #include "runtime/java.hpp"
48 #include "runtime/prefetch.inline.hpp"
49 #include "services/memTracker.hpp"
50
51 // Concurrent marking bit map wrapper
52
53 CMBitMapRO::CMBitMapRO(int shifter) :
54 _bm(),
55 _shifter(shifter) {
56 _bmStartWord = 0;
57 _bmWordSize = 0;
58 }
59
60 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
61 HeapWord* limit) const {
62 // First we must round addr *up* to a possible object boundary.
63 addr = (HeapWord*)align_size_up((intptr_t)addr,
64 HeapWordSize << _shifter);
65 size_t addrOffset = heapWordToOffset(addr);
66 if (limit == NULL) {
67 limit = _bmStartWord + _bmWordSize;
68 }
69 size_t limitOffset = heapWordToOffset(limit);
70 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
71 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
72 assert(nextAddr >= addr, "get_next_one postcondition");
73 assert(nextAddr == limit || isMarked(nextAddr),
74 "get_next_one postcondition");
75 return nextAddr;
76 }
77
78 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
79 HeapWord* limit) const {
80 size_t addrOffset = heapWordToOffset(addr);
81 if (limit == NULL) {
82 limit = _bmStartWord + _bmWordSize;
83 }
84 size_t limitOffset = heapWordToOffset(limit);
85 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
86 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
87 assert(nextAddr >= addr, "get_next_one postcondition");
88 assert(nextAddr == limit || !isMarked(nextAddr),
89 "get_next_one postcondition");
90 return nextAddr;
91 }
92
93 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
94 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
95 return (int) (diff >> _shifter);
96 }
97
98 #ifndef PRODUCT
99 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
100 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
101 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
102 "size inconsistency");
103 return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
104 _bmWordSize == heap_rs.size()>>LogHeapWordSize;
105 }
106 #endif
107
108 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
109 _bm.print_on_error(st, prefix);
110 }
111
112 bool CMBitMap::allocate(ReservedSpace heap_rs) {
113 _bmStartWord = (HeapWord*)(heap_rs.base());
114 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes
115 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
116 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
117 if (!brs.is_reserved()) {
118 warning("ConcurrentMark marking bit map allocation failure");
119 return false;
120 }
121 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
122 // For now we'll just commit all of the bit map up front.
123 // Later on we'll try to be more parsimonious with swap.
124 if (!_virtual_space.initialize(brs, brs.size())) {
125 warning("ConcurrentMark marking bit map backing store failure");
126 return false;
127 }
128 assert(_virtual_space.committed_size() == brs.size(),
129 "didn't reserve backing store for all of concurrent marking bit map?");
130 _bm.set_map((uintptr_t*)_virtual_space.low());
131 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
132 _bmWordSize, "inconsistency in bit map sizing");
133 _bm.set_size(_bmWordSize >> _shifter);
134 return true;
135 }
136
137 void CMBitMap::clearAll() {
138 _bm.clear();
139 return;
140 }
141
142 void CMBitMap::markRange(MemRegion mr) {
143 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
144 assert(!mr.is_empty(), "unexpected empty region");
145 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
146 ((HeapWord *) mr.end())),
147 "markRange memory region end is not card aligned");
148 // convert address range into offset range
149 _bm.at_put_range(heapWordToOffset(mr.start()),
150 heapWordToOffset(mr.end()), true);
151 }
152
153 void CMBitMap::clearRange(MemRegion mr) {
154 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
155 assert(!mr.is_empty(), "unexpected empty region");
156 // convert address range into offset range
157 _bm.at_put_range(heapWordToOffset(mr.start()),
158 heapWordToOffset(mr.end()), false);
159 }
160
161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
162 HeapWord* end_addr) {
163 HeapWord* start = getNextMarkedWordAddress(addr);
164 start = MIN2(start, end_addr);
165 HeapWord* end = getNextUnmarkedWordAddress(start);
166 end = MIN2(end, end_addr);
167 assert(start <= end, "Consistency check");
168 MemRegion mr(start, end);
169 if (!mr.is_empty()) {
170 clearRange(mr);
171 }
172 return mr;
173 }
174
175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
176 _base(NULL), _cm(cm)
177 #ifdef ASSERT
178 , _drain_in_progress(false)
179 , _drain_in_progress_yields(false)
180 #endif
181 {}
182
183 bool CMMarkStack::allocate(size_t capacity) {
184 // allocate a stack of the requisite depth
185 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
186 if (!rs.is_reserved()) {
187 warning("ConcurrentMark MarkStack allocation failure");
188 return false;
189 }
190 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
191 if (!_virtual_space.initialize(rs, rs.size())) {
192 warning("ConcurrentMark MarkStack backing store failure");
193 // Release the virtual memory reserved for the marking stack
194 rs.release();
195 return false;
196 }
197 assert(_virtual_space.committed_size() == rs.size(),
198 "Didn't reserve backing store for all of ConcurrentMark stack?");
199 _base = (oop*) _virtual_space.low();
200 setEmpty();
201 _capacity = (jint) capacity;
202 _saved_index = -1;
203 _should_expand = false;
204 NOT_PRODUCT(_max_depth = 0);
205 return true;
206 }
207
208 void CMMarkStack::expand() {
209 // Called, during remark, if we've overflown the marking stack during marking.
210 assert(isEmpty(), "stack should been emptied while handling overflow");
211 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
212 // Clear expansion flag
213 _should_expand = false;
214 if (_capacity == (jint) MarkStackSizeMax) {
215 if (PrintGCDetails && Verbose) {
216 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
217 }
218 return;
219 }
220 // Double capacity if possible
221 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
222 // Do not give up existing stack until we have managed to
223 // get the double capacity that we desired.
224 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
225 sizeof(oop)));
226 if (rs.is_reserved()) {
227 // Release the backing store associated with old stack
228 _virtual_space.release();
229 // Reinitialize virtual space for new stack
230 if (!_virtual_space.initialize(rs, rs.size())) {
231 fatal("Not enough swap for expanded marking stack capacity");
232 }
233 _base = (oop*)(_virtual_space.low());
234 _index = 0;
235 _capacity = new_capacity;
236 } else {
237 if (PrintGCDetails && Verbose) {
238 // Failed to double capacity, continue;
239 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
240 SIZE_FORMAT"K to " SIZE_FORMAT"K",
241 _capacity / K, new_capacity / K);
242 }
243 }
244 }
245
246 void CMMarkStack::set_should_expand() {
247 // If we're resetting the marking state because of an
248 // marking stack overflow, record that we should, if
249 // possible, expand the stack.
250 _should_expand = _cm->has_overflown();
251 }
252
253 CMMarkStack::~CMMarkStack() {
254 if (_base != NULL) {
255 _base = NULL;
256 _virtual_space.release();
257 }
258 }
259
260 void CMMarkStack::par_push(oop ptr) {
261 while (true) {
262 if (isFull()) {
263 _overflow = true;
264 return;
265 }
266 // Otherwise...
267 jint index = _index;
268 jint next_index = index+1;
269 jint res = Atomic::cmpxchg(next_index, &_index, index);
270 if (res == index) {
271 _base[index] = ptr;
272 // Note that we don't maintain this atomically. We could, but it
273 // doesn't seem necessary.
274 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
275 return;
276 }
277 // Otherwise, we need to try again.
278 }
279 }
280
281 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
282 while (true) {
283 if (isFull()) {
284 _overflow = true;
285 return;
286 }
287 // Otherwise...
288 jint index = _index;
289 jint next_index = index + n;
290 if (next_index > _capacity) {
291 _overflow = true;
292 return;
293 }
294 jint res = Atomic::cmpxchg(next_index, &_index, index);
295 if (res == index) {
296 for (int i = 0; i < n; i++) {
297 int ind = index + i;
298 assert(ind < _capacity, "By overflow test above.");
299 _base[ind] = ptr_arr[i];
300 }
301 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
302 return;
303 }
304 // Otherwise, we need to try again.
305 }
306 }
307
308 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
309 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
310 jint start = _index;
311 jint next_index = start + n;
312 if (next_index > _capacity) {
313 _overflow = true;
314 return;
315 }
316 // Otherwise.
317 _index = next_index;
318 for (int i = 0; i < n; i++) {
319 int ind = start + i;
320 assert(ind < _capacity, "By overflow test above.");
321 _base[ind] = ptr_arr[i];
322 }
323 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
324 }
325
326 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
327 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
328 jint index = _index;
329 if (index == 0) {
330 *n = 0;
331 return false;
332 } else {
333 int k = MIN2(max, index);
334 jint new_ind = index - k;
335 for (int j = 0; j < k; j++) {
336 ptr_arr[j] = _base[new_ind + j];
337 }
338 _index = new_ind;
339 *n = k;
340 return true;
341 }
342 }
343
344 template<class OopClosureClass>
345 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
346 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
347 || SafepointSynchronize::is_at_safepoint(),
348 "Drain recursion must be yield-safe.");
349 bool res = true;
350 debug_only(_drain_in_progress = true);
351 debug_only(_drain_in_progress_yields = yield_after);
352 while (!isEmpty()) {
353 oop newOop = pop();
354 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
355 assert(newOop->is_oop(), "Expected an oop");
356 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
357 "only grey objects on this stack");
358 newOop->oop_iterate(cl);
359 if (yield_after && _cm->do_yield_check()) {
360 res = false;
361 break;
362 }
363 }
364 debug_only(_drain_in_progress = false);
365 return res;
366 }
367
368 void CMMarkStack::note_start_of_gc() {
369 assert(_saved_index == -1,
370 "note_start_of_gc()/end_of_gc() bracketed incorrectly");
371 _saved_index = _index;
372 }
373
374 void CMMarkStack::note_end_of_gc() {
375 // This is intentionally a guarantee, instead of an assert. If we
376 // accidentally add something to the mark stack during GC, it
377 // will be a correctness issue so it's better if we crash. we'll
378 // only check this once per GC anyway, so it won't be a performance
379 // issue in any way.
380 guarantee(_saved_index == _index,
381 err_msg("saved index: %d index: %d", _saved_index, _index));
382 _saved_index = -1;
383 }
384
385 void CMMarkStack::oops_do(OopClosure* f) {
386 assert(_saved_index == _index,
387 err_msg("saved index: %d index: %d", _saved_index, _index));
388 for (int i = 0; i < _index; i += 1) {
389 f->do_oop(&_base[i]);
390 }
391 }
392
393 bool ConcurrentMark::not_yet_marked(oop obj) const {
394 return _g1h->is_obj_ill(obj);
395 }
396
397 CMRootRegions::CMRootRegions() :
398 _young_list(NULL), _cm(NULL), _scan_in_progress(false),
399 _should_abort(false), _next_survivor(NULL) { }
400
401 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
402 _young_list = g1h->young_list();
403 _cm = cm;
404 }
405
406 void CMRootRegions::prepare_for_scan() {
407 assert(!scan_in_progress(), "pre-condition");
408
409 // Currently, only survivors can be root regions.
410 assert(_next_survivor == NULL, "pre-condition");
411 _next_survivor = _young_list->first_survivor_region();
412 _scan_in_progress = (_next_survivor != NULL);
413 _should_abort = false;
414 }
415
416 HeapRegion* CMRootRegions::claim_next() {
417 if (_should_abort) {
418 // If someone has set the should_abort flag, we return NULL to
419 // force the caller to bail out of their loop.
420 return NULL;
421 }
422
423 // Currently, only survivors can be root regions.
424 HeapRegion* res = _next_survivor;
425 if (res != NULL) {
426 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
427 // Read it again in case it changed while we were waiting for the lock.
428 res = _next_survivor;
429 if (res != NULL) {
430 if (res == _young_list->last_survivor_region()) {
431 // We just claimed the last survivor so store NULL to indicate
432 // that we're done.
433 _next_survivor = NULL;
434 } else {
435 _next_survivor = res->get_next_young_region();
436 }
437 } else {
438 // Someone else claimed the last survivor while we were trying
439 // to take the lock so nothing else to do.
440 }
441 }
442 assert(res == NULL || res->is_survivor(), "post-condition");
443
444 return res;
445 }
446
447 void CMRootRegions::scan_finished() {
448 assert(scan_in_progress(), "pre-condition");
449
450 // Currently, only survivors can be root regions.
451 if (!_should_abort) {
452 assert(_next_survivor == NULL, "we should have claimed all survivors");
453 }
454 _next_survivor = NULL;
455
456 {
457 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
458 _scan_in_progress = false;
459 RootRegionScan_lock->notify_all();
460 }
461 }
462
463 bool CMRootRegions::wait_until_scan_finished() {
464 if (!scan_in_progress()) return false;
465
466 {
467 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
468 while (scan_in_progress()) {
469 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
470 }
471 }
472 return true;
473 }
474
475 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
476 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
477 #endif // _MSC_VER
478
479 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
480 return MAX2((n_par_threads + 2) / 4, 1U);
481 }
482
483 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
484 _g1h(g1h),
485 _markBitMap1(log2_intptr(MinObjAlignment)),
486 _markBitMap2(log2_intptr(MinObjAlignment)),
487 _parallel_marking_threads(0),
488 _max_parallel_marking_threads(0),
489 _sleep_factor(0.0),
490 _marking_task_overhead(1.0),
491 _cleanup_sleep_factor(0.0),
492 _cleanup_task_overhead(1.0),
493 _cleanup_list("Cleanup List"),
494 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
495 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
496 CardTableModRefBS::card_shift,
497 false /* in_resource_area*/),
498
499 _prevMarkBitMap(&_markBitMap1),
500 _nextMarkBitMap(&_markBitMap2),
501
502 _markStack(this),
503 // _finger set in set_non_marking_state
504
505 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
506 // _active_tasks set in set_non_marking_state
507 // _tasks set inside the constructor
508 _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
509 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
510
511 _has_overflown(false),
512 _concurrent(false),
513 _has_aborted(false),
514 _restart_for_overflow(false),
515 _concurrent_marking_in_progress(false),
516
517 // _verbose_level set below
518
519 _init_times(),
520 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
521 _cleanup_times(),
522 _total_counting_time(0.0),
523 _total_rs_scrub_time(0.0),
524
525 _parallel_workers(NULL),
526
527 _count_card_bitmaps(NULL),
528 _count_marked_bytes(NULL),
529 _completed_initialization(false) {
530 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
531 if (verbose_level < no_verbose) {
532 verbose_level = no_verbose;
533 }
534 if (verbose_level > high_verbose) {
535 verbose_level = high_verbose;
536 }
537 _verbose_level = verbose_level;
538
539 if (verbose_low()) {
540 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
541 "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
542 }
543
544 if (!_markBitMap1.allocate(heap_rs)) {
545 warning("Failed to allocate first CM bit map");
546 return;
547 }
548 if (!_markBitMap2.allocate(heap_rs)) {
549 warning("Failed to allocate second CM bit map");
550 return;
551 }
552
553 // Create & start a ConcurrentMark thread.
554 _cmThread = new ConcurrentMarkThread(this);
555 assert(cmThread() != NULL, "CM Thread should have been created");
556 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
557 if (_cmThread->osthread() == NULL) {
558 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
559 }
560
561 assert(CGC_lock != NULL, "Where's the CGC_lock?");
562 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
563 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
564
565 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
566 satb_qs.set_buffer_size(G1SATBBufferSize);
567
568 _root_regions.init(_g1h, this);
569
570 if (ConcGCThreads > ParallelGCThreads) {
571 warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
572 "than ParallelGCThreads (" UINTX_FORMAT ").",
573 ConcGCThreads, ParallelGCThreads);
574 return;
575 }
576 if (ParallelGCThreads == 0) {
577 // if we are not running with any parallel GC threads we will not
578 // spawn any marking threads either
579 _parallel_marking_threads = 0;
580 _max_parallel_marking_threads = 0;
581 _sleep_factor = 0.0;
582 _marking_task_overhead = 1.0;
583 } else {
584 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
585 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
586 // if both are set
587 _sleep_factor = 0.0;
588 _marking_task_overhead = 1.0;
589 } else if (G1MarkingOverheadPercent > 0) {
590 // We will calculate the number of parallel marking threads based
591 // on a target overhead with respect to the soft real-time goal
592 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
593 double overall_cm_overhead =
594 (double) MaxGCPauseMillis * marking_overhead /
595 (double) GCPauseIntervalMillis;
596 double cpu_ratio = 1.0 / (double) os::processor_count();
597 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
598 double marking_task_overhead =
599 overall_cm_overhead / marking_thread_num *
600 (double) os::processor_count();
601 double sleep_factor =
602 (1.0 - marking_task_overhead) / marking_task_overhead;
603
604 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
605 _sleep_factor = sleep_factor;
606 _marking_task_overhead = marking_task_overhead;
607 } else {
608 // Calculate the number of parallel marking threads by scaling
609 // the number of parallel GC threads.
610 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
611 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
612 _sleep_factor = 0.0;
613 _marking_task_overhead = 1.0;
614 }
615
616 assert(ConcGCThreads > 0, "Should have been set");
617 _parallel_marking_threads = (uint) ConcGCThreads;
618 _max_parallel_marking_threads = _parallel_marking_threads;
619
620 if (parallel_marking_threads() > 1) {
621 _cleanup_task_overhead = 1.0;
622 } else {
623 _cleanup_task_overhead = marking_task_overhead();
624 }
625 _cleanup_sleep_factor =
626 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
627
628 #if 0
629 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
630 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
631 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
632 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
633 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
634 #endif
635
636 guarantee(parallel_marking_threads() > 0, "peace of mind");
637 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
638 _max_parallel_marking_threads, false, true);
639 if (_parallel_workers == NULL) {
640 vm_exit_during_initialization("Failed necessary allocation.");
641 } else {
642 _parallel_workers->initialize_workers();
643 }
644 }
645
646 if (FLAG_IS_DEFAULT(MarkStackSize)) {
647 uintx mark_stack_size =
648 MIN2(MarkStackSizeMax,
649 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
650 // Verify that the calculated value for MarkStackSize is in range.
651 // It would be nice to use the private utility routine from Arguments.
652 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
653 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
654 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
655 mark_stack_size, (uintx) 1, MarkStackSizeMax);
656 return;
657 }
658 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
659 } else {
660 // Verify MarkStackSize is in range.
661 if (FLAG_IS_CMDLINE(MarkStackSize)) {
662 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
663 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
664 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
665 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
666 MarkStackSize, (uintx) 1, MarkStackSizeMax);
667 return;
668 }
669 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
670 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
671 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
672 " or for MarkStackSizeMax (" UINTX_FORMAT ")",
673 MarkStackSize, MarkStackSizeMax);
674 return;
675 }
676 }
677 }
678 }
679
680 if (!_markStack.allocate(MarkStackSize)) {
681 warning("Failed to allocate CM marking stack");
682 return;
683 }
684
685 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
686 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
687
688 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC);
689 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
690
691 BitMap::idx_t card_bm_size = _card_bm.size();
692
693 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
694 _active_tasks = _max_worker_id;
695
696 size_t max_regions = (size_t) _g1h->max_regions();
697 for (uint i = 0; i < _max_worker_id; ++i) {
698 CMTaskQueue* task_queue = new CMTaskQueue();
699 task_queue->initialize();
700 _task_queues->register_queue(i, task_queue);
701
702 _count_card_bitmaps[i] = BitMap(card_bm_size, false);
703 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
704
705 _tasks[i] = new CMTask(i, this,
706 _count_marked_bytes[i],
707 &_count_card_bitmaps[i],
708 task_queue, _task_queues);
709
710 _accum_task_vtime[i] = 0.0;
711 }
712
713 // Calculate the card number for the bottom of the heap. Used
714 // in biasing indexes into the accounting card bitmaps.
715 _heap_bottom_card_num =
716 intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
717 CardTableModRefBS::card_shift);
718
719 // Clear all the liveness counting data
720 clear_all_count_data();
721
722 // so that the call below can read a sensible value
723 _heap_start = (HeapWord*) heap_rs.base();
724 set_non_marking_state();
725 _completed_initialization = true;
726 }
727
728 void ConcurrentMark::update_g1_committed(bool force) {
729 // If concurrent marking is not in progress, then we do not need to
730 // update _heap_end.
731 if (!concurrent_marking_in_progress() && !force) return;
732
733 MemRegion committed = _g1h->g1_committed();
734 assert(committed.start() == _heap_start, "start shouldn't change");
735 HeapWord* new_end = committed.end();
736 if (new_end > _heap_end) {
737 // The heap has been expanded.
738
739 _heap_end = new_end;
740 }
741 // Notice that the heap can also shrink. However, this only happens
742 // during a Full GC (at least currently) and the entire marking
743 // phase will bail out and the task will not be restarted. So, let's
744 // do nothing.
745 }
746
747 void ConcurrentMark::reset() {
748 // Starting values for these two. This should be called in a STW
749 // phase. CM will be notified of any future g1_committed expansions
750 // will be at the end of evacuation pauses, when tasks are
751 // inactive.
752 MemRegion committed = _g1h->g1_committed();
753 _heap_start = committed.start();
754 _heap_end = committed.end();
755
756 // Separated the asserts so that we know which one fires.
757 assert(_heap_start != NULL, "heap bounds should look ok");
758 assert(_heap_end != NULL, "heap bounds should look ok");
759 assert(_heap_start < _heap_end, "heap bounds should look ok");
760
761 // Reset all the marking data structures and any necessary flags
762 reset_marking_state();
763
764 if (verbose_low()) {
765 gclog_or_tty->print_cr("[global] resetting");
766 }
767
768 // We do reset all of them, since different phases will use
769 // different number of active threads. So, it's easiest to have all
770 // of them ready.
771 for (uint i = 0; i < _max_worker_id; ++i) {
772 _tasks[i]->reset(_nextMarkBitMap);
773 }
774
775 // we need this to make sure that the flag is on during the evac
776 // pause with initial mark piggy-backed
777 set_concurrent_marking_in_progress();
778 }
779
780
781 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
782 _markStack.set_should_expand();
783 _markStack.setEmpty(); // Also clears the _markStack overflow flag
784 if (clear_overflow) {
785 clear_has_overflown();
786 } else {
787 assert(has_overflown(), "pre-condition");
788 }
789 _finger = _heap_start;
790
791 for (uint i = 0; i < _max_worker_id; ++i) {
792 CMTaskQueue* queue = _task_queues->queue(i);
793 queue->set_empty();
794 }
795 }
796
797 void ConcurrentMark::set_concurrency(uint active_tasks) {
798 assert(active_tasks <= _max_worker_id, "we should not have more");
799
800 _active_tasks = active_tasks;
801 // Need to update the three data structures below according to the
802 // number of active threads for this phase.
803 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
804 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
805 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
806 }
807
808 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
809 set_concurrency(active_tasks);
810
811 _concurrent = concurrent;
812 // We propagate this to all tasks, not just the active ones.
813 for (uint i = 0; i < _max_worker_id; ++i)
814 _tasks[i]->set_concurrent(concurrent);
815
816 if (concurrent) {
817 set_concurrent_marking_in_progress();
818 } else {
819 // We currently assume that the concurrent flag has been set to
820 // false before we start remark. At this point we should also be
821 // in a STW phase.
822 assert(!concurrent_marking_in_progress(), "invariant");
823 assert(out_of_regions(),
824 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
825 p2i(_finger), p2i(_heap_end)));
826 update_g1_committed(true);
827 }
828 }
829
830 void ConcurrentMark::set_non_marking_state() {
831 // We set the global marking state to some default values when we're
832 // not doing marking.
833 reset_marking_state();
834 _active_tasks = 0;
835 clear_concurrent_marking_in_progress();
836 }
837
838 ConcurrentMark::~ConcurrentMark() {
839 // The ConcurrentMark instance is never freed.
840 ShouldNotReachHere();
841 }
842
843 void ConcurrentMark::clearNextBitmap() {
844 G1CollectedHeap* g1h = G1CollectedHeap::heap();
845 G1CollectorPolicy* g1p = g1h->g1_policy();
846
847 // Make sure that the concurrent mark thread looks to still be in
848 // the current cycle.
849 guarantee(cmThread()->during_cycle(), "invariant");
850
851 // We are finishing up the current cycle by clearing the next
852 // marking bitmap and getting it ready for the next cycle. During
853 // this time no other cycle can start. So, let's make sure that this
854 // is the case.
855 guarantee(!g1h->mark_in_progress(), "invariant");
856
857 // clear the mark bitmap (no grey objects to start with).
858 // We need to do this in chunks and offer to yield in between
859 // each chunk.
860 HeapWord* start = _nextMarkBitMap->startWord();
861 HeapWord* end = _nextMarkBitMap->endWord();
862 HeapWord* cur = start;
863 size_t chunkSize = M;
864 while (cur < end) {
865 HeapWord* next = cur + chunkSize;
866 if (next > end) {
867 next = end;
868 }
869 MemRegion mr(cur,next);
870 _nextMarkBitMap->clearRange(mr);
871 cur = next;
872 do_yield_check();
873
874 // Repeat the asserts from above. We'll do them as asserts here to
875 // minimize their overhead on the product. However, we'll have
876 // them as guarantees at the beginning / end of the bitmap
877 // clearing to get some checking in the product.
878 assert(cmThread()->during_cycle(), "invariant");
879 assert(!g1h->mark_in_progress(), "invariant");
880 }
881
882 // Clear the liveness counting data
883 clear_all_count_data();
884
885 // Repeat the asserts from above.
886 guarantee(cmThread()->during_cycle(), "invariant");
887 guarantee(!g1h->mark_in_progress(), "invariant");
888 }
889
890 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
891 public:
892 bool doHeapRegion(HeapRegion* r) {
893 if (!r->continuesHumongous()) {
894 r->note_start_of_marking();
895 }
896 return false;
897 }
898 };
899
900 void ConcurrentMark::checkpointRootsInitialPre() {
901 G1CollectedHeap* g1h = G1CollectedHeap::heap();
902 G1CollectorPolicy* g1p = g1h->g1_policy();
903
904 _has_aborted = false;
905
906 #ifndef PRODUCT
907 if (G1PrintReachableAtInitialMark) {
908 print_reachable("at-cycle-start",
909 VerifyOption_G1UsePrevMarking, true /* all */);
910 }
911 #endif
912
913 // Initialize marking structures. This has to be done in a STW phase.
914 reset();
915
916 // For each region note start of marking.
917 NoteStartOfMarkHRClosure startcl;
918 g1h->heap_region_iterate(&startcl);
919 }
920
921
922 void ConcurrentMark::checkpointRootsInitialPost() {
923 G1CollectedHeap* g1h = G1CollectedHeap::heap();
924
925 // If we force an overflow during remark, the remark operation will
926 // actually abort and we'll restart concurrent marking. If we always
927 // force an overflow during remark we'll never actually complete the
928 // marking phase. So, we initialize this here, at the start of the
929 // cycle, so that at the remaining overflow number will decrease at
930 // every remark and we'll eventually not need to cause one.
931 force_overflow_stw()->init();
932
933 // Start Concurrent Marking weak-reference discovery.
934 ReferenceProcessor* rp = g1h->ref_processor_cm();
935 // enable ("weak") refs discovery
936 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
937 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
938
939 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
940 // This is the start of the marking cycle, we're expected all
941 // threads to have SATB queues with active set to false.
942 satb_mq_set.set_active_all_threads(true, /* new active value */
943 false /* expected_active */);
944
945 _root_regions.prepare_for_scan();
946
947 // update_g1_committed() will be called at the end of an evac pause
948 // when marking is on. So, it's also called at the end of the
949 // initial-mark pause to update the heap end, if the heap expands
950 // during it. No need to call it here.
951 }
952
953 /*
954 * Notice that in the next two methods, we actually leave the STS
955 * during the barrier sync and join it immediately afterwards. If we
956 * do not do this, the following deadlock can occur: one thread could
957 * be in the barrier sync code, waiting for the other thread to also
958 * sync up, whereas another one could be trying to yield, while also
959 * waiting for the other threads to sync up too.
960 *
961 * Note, however, that this code is also used during remark and in
962 * this case we should not attempt to leave / enter the STS, otherwise
963 * we'll either hit an assert (debug / fastdebug) or deadlock
964 * (product). So we should only leave / enter the STS if we are
965 * operating concurrently.
966 *
967 * Because the thread that does the sync barrier has left the STS, it
968 * is possible to be suspended for a Full GC or an evacuation pause
969 * could occur. This is actually safe, since the entering the sync
970 * barrier is one of the last things do_marking_step() does, and it
971 * doesn't manipulate any data structures afterwards.
972 */
973
974 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
975 if (verbose_low()) {
976 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
977 }
978
979 if (concurrent()) {
980 SuspendibleThreadSet::leave();
981 }
982
983 bool barrier_aborted = !_first_overflow_barrier_sync.enter();
984
985 if (concurrent()) {
986 SuspendibleThreadSet::join();
987 }
988 // at this point everyone should have synced up and not be doing any
989 // more work
990
991 if (verbose_low()) {
992 if (barrier_aborted) {
993 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
994 } else {
995 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
996 }
997 }
998
999 if (barrier_aborted) {
1000 // If the barrier aborted we ignore the overflow condition and
1001 // just abort the whole marking phase as quickly as possible.
1002 return;
1003 }
1004
1005 // If we're executing the concurrent phase of marking, reset the marking
1006 // state; otherwise the marking state is reset after reference processing,
1007 // during the remark pause.
1008 // If we reset here as a result of an overflow during the remark we will
1009 // see assertion failures from any subsequent set_concurrency_and_phase()
1010 // calls.
1011 if (concurrent()) {
1012 // let the task associated with with worker 0 do this
1013 if (worker_id == 0) {
1014 // task 0 is responsible for clearing the global data structures
1015 // We should be here because of an overflow. During STW we should
1016 // not clear the overflow flag since we rely on it being true when
1017 // we exit this method to abort the pause and restart concurrent
1018 // marking.
1019 reset_marking_state(true /* clear_overflow */);
1020 force_overflow()->update();
1021
1022 if (G1Log::fine()) {
1023 gclog_or_tty->date_stamp(PrintGCDateStamps);
1024 gclog_or_tty->stamp(PrintGCTimeStamps);
1025 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1026 }
1027 }
1028 }
1029
1030 // after this, each task should reset its own data structures then
1031 // then go into the second barrier
1032 }
1033
1034 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1035 if (verbose_low()) {
1036 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1037 }
1038
1039 if (concurrent()) {
1040 SuspendibleThreadSet::leave();
1041 }
1042
1043 bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1044
1045 if (concurrent()) {
1046 SuspendibleThreadSet::join();
1047 }
1048 // at this point everything should be re-initialized and ready to go
1049
1050 if (verbose_low()) {
1051 if (barrier_aborted) {
1052 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1053 } else {
1054 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1055 }
1056 }
1057 }
1058
1059 #ifndef PRODUCT
1060 void ForceOverflowSettings::init() {
1061 _num_remaining = G1ConcMarkForceOverflow;
1062 _force = false;
1063 update();
1064 }
1065
1066 void ForceOverflowSettings::update() {
1067 if (_num_remaining > 0) {
1068 _num_remaining -= 1;
1069 _force = true;
1070 } else {
1071 _force = false;
1072 }
1073 }
1074
1075 bool ForceOverflowSettings::should_force() {
1076 if (_force) {
1077 _force = false;
1078 return true;
1079 } else {
1080 return false;
1081 }
1082 }
1083 #endif // !PRODUCT
1084
1085 class CMConcurrentMarkingTask: public AbstractGangTask {
1086 private:
1087 ConcurrentMark* _cm;
1088 ConcurrentMarkThread* _cmt;
1089
1090 public:
1091 void work(uint worker_id) {
1092 assert(Thread::current()->is_ConcurrentGC_thread(),
1093 "this should only be done by a conc GC thread");
1094 ResourceMark rm;
1095
1096 double start_vtime = os::elapsedVTime();
1097
1098 SuspendibleThreadSet::join();
1099
1100 assert(worker_id < _cm->active_tasks(), "invariant");
1101 CMTask* the_task = _cm->task(worker_id);
1102 the_task->record_start_time();
1103 if (!_cm->has_aborted()) {
1104 do {
1105 double start_vtime_sec = os::elapsedVTime();
1106 double start_time_sec = os::elapsedTime();
1107 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1108
1109 the_task->do_marking_step(mark_step_duration_ms,
1110 true /* do_termination */,
1111 false /* is_serial*/);
1112
1113 double end_time_sec = os::elapsedTime();
1114 double end_vtime_sec = os::elapsedVTime();
1115 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1116 double elapsed_time_sec = end_time_sec - start_time_sec;
1117 _cm->clear_has_overflown();
1118
1119 bool ret = _cm->do_yield_check(worker_id);
1120
1121 jlong sleep_time_ms;
1122 if (!_cm->has_aborted() && the_task->has_aborted()) {
1123 sleep_time_ms =
1124 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1125 SuspendibleThreadSet::leave();
1126 os::sleep(Thread::current(), sleep_time_ms, false);
1127 SuspendibleThreadSet::join();
1128 }
1129 double end_time2_sec = os::elapsedTime();
1130 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1131
1132 #if 0
1133 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1134 "overhead %1.4lf",
1135 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1136 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1137 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1138 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1139 #endif
1140 } while (!_cm->has_aborted() && the_task->has_aborted());
1141 }
1142 the_task->record_end_time();
1143 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1144
1145 SuspendibleThreadSet::leave();
1146
1147 double end_vtime = os::elapsedVTime();
1148 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1149 }
1150
1151 CMConcurrentMarkingTask(ConcurrentMark* cm,
1152 ConcurrentMarkThread* cmt) :
1153 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1154
1155 ~CMConcurrentMarkingTask() { }
1156 };
1157
1158 // Calculates the number of active workers for a concurrent
1159 // phase.
1160 uint ConcurrentMark::calc_parallel_marking_threads() {
1161 if (G1CollectedHeap::use_parallel_gc_threads()) {
1162 uint n_conc_workers = 0;
1163 if (!UseDynamicNumberOfGCThreads ||
1164 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1165 !ForceDynamicNumberOfGCThreads)) {
1166 n_conc_workers = max_parallel_marking_threads();
1167 } else {
1168 n_conc_workers =
1169 AdaptiveSizePolicy::calc_default_active_workers(
1170 max_parallel_marking_threads(),
1171 1, /* Minimum workers */
1172 parallel_marking_threads(),
1173 Threads::number_of_non_daemon_threads());
1174 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1175 // that scaling has already gone into "_max_parallel_marking_threads".
1176 }
1177 assert(n_conc_workers > 0, "Always need at least 1");
1178 return n_conc_workers;
1179 }
1180 // If we are not running with any parallel GC threads we will not
1181 // have spawned any marking threads either. Hence the number of
1182 // concurrent workers should be 0.
1183 return 0;
1184 }
1185
1186 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1187 // Currently, only survivors can be root regions.
1188 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1189 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1190
1191 const uintx interval = PrefetchScanIntervalInBytes;
1192 HeapWord* curr = hr->bottom();
1193 const HeapWord* end = hr->top();
1194 while (curr < end) {
1195 Prefetch::read(curr, interval);
1196 oop obj = oop(curr);
1197 int size = obj->oop_iterate(&cl);
1198 assert(size == obj->size(), "sanity");
1199 curr += size;
1200 }
1201 }
1202
1203 class CMRootRegionScanTask : public AbstractGangTask {
1204 private:
1205 ConcurrentMark* _cm;
1206
1207 public:
1208 CMRootRegionScanTask(ConcurrentMark* cm) :
1209 AbstractGangTask("Root Region Scan"), _cm(cm) { }
1210
1211 void work(uint worker_id) {
1212 assert(Thread::current()->is_ConcurrentGC_thread(),
1213 "this should only be done by a conc GC thread");
1214
1215 CMRootRegions* root_regions = _cm->root_regions();
1216 HeapRegion* hr = root_regions->claim_next();
1217 while (hr != NULL) {
1218 _cm->scanRootRegion(hr, worker_id);
1219 hr = root_regions->claim_next();
1220 }
1221 }
1222 };
1223
1224 void ConcurrentMark::scanRootRegions() {
1225 // scan_in_progress() will have been set to true only if there was
1226 // at least one root region to scan. So, if it's false, we
1227 // should not attempt to do any further work.
1228 if (root_regions()->scan_in_progress()) {
1229 _parallel_marking_threads = calc_parallel_marking_threads();
1230 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1231 "Maximum number of marking threads exceeded");
1232 uint active_workers = MAX2(1U, parallel_marking_threads());
1233
1234 CMRootRegionScanTask task(this);
1235 if (use_parallel_marking_threads()) {
1236 _parallel_workers->set_active_workers((int) active_workers);
1237 _parallel_workers->run_task(&task);
1238 } else {
1239 task.work(0);
1240 }
1241
1242 // It's possible that has_aborted() is true here without actually
1243 // aborting the survivor scan earlier. This is OK as it's
1244 // mainly used for sanity checking.
1245 root_regions()->scan_finished();
1246 }
1247 }
1248
1249 void ConcurrentMark::markFromRoots() {
1250 // we might be tempted to assert that:
1251 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1252 // "inconsistent argument?");
1253 // However that wouldn't be right, because it's possible that
1254 // a safepoint is indeed in progress as a younger generation
1255 // stop-the-world GC happens even as we mark in this generation.
1256
1257 _restart_for_overflow = false;
1258 force_overflow_conc()->init();
1259
1260 // _g1h has _n_par_threads
1261 _parallel_marking_threads = calc_parallel_marking_threads();
1262 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1263 "Maximum number of marking threads exceeded");
1264
1265 uint active_workers = MAX2(1U, parallel_marking_threads());
1266
1267 // Parallel task terminator is set in "set_concurrency_and_phase()"
1268 set_concurrency_and_phase(active_workers, true /* concurrent */);
1269
1270 CMConcurrentMarkingTask markingTask(this, cmThread());
1271 if (use_parallel_marking_threads()) {
1272 _parallel_workers->set_active_workers((int)active_workers);
1273 // Don't set _n_par_threads because it affects MT in process_strong_roots()
1274 // and the decisions on that MT processing is made elsewhere.
1275 assert(_parallel_workers->active_workers() > 0, "Should have been set");
1276 _parallel_workers->run_task(&markingTask);
1277 } else {
1278 markingTask.work(0);
1279 }
1280 print_stats();
1281 }
1282
1283 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1284 // world is stopped at this checkpoint
1285 assert(SafepointSynchronize::is_at_safepoint(),
1286 "world should be stopped");
1287
1288 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1289
1290 // If a full collection has happened, we shouldn't do this.
1291 if (has_aborted()) {
1292 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1293 return;
1294 }
1295
1296 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1297
1298 if (VerifyDuringGC) {
1299 HandleMark hm; // handle scope
1300 Universe::heap()->prepare_for_verify();
1301 Universe::verify(VerifyOption_G1UsePrevMarking,
1302 " VerifyDuringGC:(before)");
1303 }
1304 g1h->check_bitmaps("Remark Start");
1305
1306 G1CollectorPolicy* g1p = g1h->g1_policy();
1307 g1p->record_concurrent_mark_remark_start();
1308
1309 double start = os::elapsedTime();
1310
1311 checkpointRootsFinalWork();
1312
1313 double mark_work_end = os::elapsedTime();
1314
1315 weakRefsWork(clear_all_soft_refs);
1316
1317 if (has_overflown()) {
1318 // Oops. We overflowed. Restart concurrent marking.
1319 _restart_for_overflow = true;
1320 if (G1TraceMarkStackOverflow) {
1321 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1322 }
1323
1324 // Verify the heap w.r.t. the previous marking bitmap.
1325 if (VerifyDuringGC) {
1326 HandleMark hm; // handle scope
1327 Universe::heap()->prepare_for_verify();
1328 Universe::verify(VerifyOption_G1UsePrevMarking,
1329 " VerifyDuringGC:(overflow)");
1330 }
1331
1332 // Clear the marking state because we will be restarting
1333 // marking due to overflowing the global mark stack.
1334 reset_marking_state();
1335 } else {
1336 // Aggregate the per-task counting data that we have accumulated
1337 // while marking.
1338 aggregate_count_data();
1339
1340 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1341 // We're done with marking.
1342 // This is the end of the marking cycle, we're expected all
1343 // threads to have SATB queues with active set to true.
1344 satb_mq_set.set_active_all_threads(false, /* new active value */
1345 true /* expected_active */);
1346
1347 if (VerifyDuringGC) {
1348 HandleMark hm; // handle scope
1349 Universe::heap()->prepare_for_verify();
1350 Universe::verify(VerifyOption_G1UseNextMarking,
1351 " VerifyDuringGC:(after)");
1352 }
1353 g1h->check_bitmaps("Remark End");
1354 assert(!restart_for_overflow(), "sanity");
1355 // Completely reset the marking state since marking completed
1356 set_non_marking_state();
1357 }
1358
1359 // Expand the marking stack, if we have to and if we can.
1360 if (_markStack.should_expand()) {
1361 _markStack.expand();
1362 }
1363
1364 // Statistics
1365 double now = os::elapsedTime();
1366 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1367 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1368 _remark_times.add((now - start) * 1000.0);
1369
1370 g1p->record_concurrent_mark_remark_end();
1371
1372 G1CMIsAliveClosure is_alive(g1h);
1373 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1374 }
1375
1376 // Base class of the closures that finalize and verify the
1377 // liveness counting data.
1378 class CMCountDataClosureBase: public HeapRegionClosure {
1379 protected:
1380 G1CollectedHeap* _g1h;
1381 ConcurrentMark* _cm;
1382 CardTableModRefBS* _ct_bs;
1383
1384 BitMap* _region_bm;
1385 BitMap* _card_bm;
1386
1387 // Takes a region that's not empty (i.e., it has at least one
1388 // live object in it and sets its corresponding bit on the region
1389 // bitmap to 1. If the region is "starts humongous" it will also set
1390 // to 1 the bits on the region bitmap that correspond to its
1391 // associated "continues humongous" regions.
1392 void set_bit_for_region(HeapRegion* hr) {
1393 assert(!hr->continuesHumongous(), "should have filtered those out");
1394
1395 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1396 if (!hr->startsHumongous()) {
1397 // Normal (non-humongous) case: just set the bit.
1398 _region_bm->par_at_put(index, true);
1399 } else {
1400 // Starts humongous case: calculate how many regions are part of
1401 // this humongous region and then set the bit range.
1402 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1403 _region_bm->par_at_put_range(index, end_index, true);
1404 }
1405 }
1406
1407 public:
1408 CMCountDataClosureBase(G1CollectedHeap* g1h,
1409 BitMap* region_bm, BitMap* card_bm):
1410 _g1h(g1h), _cm(g1h->concurrent_mark()),
1411 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1412 _region_bm(region_bm), _card_bm(card_bm) { }
1413 };
1414
1415 // Closure that calculates the # live objects per region. Used
1416 // for verification purposes during the cleanup pause.
1417 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1418 CMBitMapRO* _bm;
1419 size_t _region_marked_bytes;
1420
1421 public:
1422 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1423 BitMap* region_bm, BitMap* card_bm) :
1424 CMCountDataClosureBase(g1h, region_bm, card_bm),
1425 _bm(bm), _region_marked_bytes(0) { }
1426
1427 bool doHeapRegion(HeapRegion* hr) {
1428
1429 if (hr->continuesHumongous()) {
1430 // We will ignore these here and process them when their
1431 // associated "starts humongous" region is processed (see
1432 // set_bit_for_heap_region()). Note that we cannot rely on their
1433 // associated "starts humongous" region to have their bit set to
1434 // 1 since, due to the region chunking in the parallel region
1435 // iteration, a "continues humongous" region might be visited
1436 // before its associated "starts humongous".
1437 return false;
1438 }
1439
1440 HeapWord* ntams = hr->next_top_at_mark_start();
1441 HeapWord* start = hr->bottom();
1442
1443 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1444 err_msg("Preconditions not met - "
1445 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1446 p2i(start), p2i(ntams), p2i(hr->end())));
1447
1448 // Find the first marked object at or after "start".
1449 start = _bm->getNextMarkedWordAddress(start, ntams);
1450
1451 size_t marked_bytes = 0;
1452
1453 while (start < ntams) {
1454 oop obj = oop(start);
1455 int obj_sz = obj->size();
1456 HeapWord* obj_end = start + obj_sz;
1457
1458 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1459 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1460
1461 // Note: if we're looking at the last region in heap - obj_end
1462 // could be actually just beyond the end of the heap; end_idx
1463 // will then correspond to a (non-existent) card that is also
1464 // just beyond the heap.
1465 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1466 // end of object is not card aligned - increment to cover
1467 // all the cards spanned by the object
1468 end_idx += 1;
1469 }
1470
1471 // Set the bits in the card BM for the cards spanned by this object.
1472 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1473
1474 // Add the size of this object to the number of marked bytes.
1475 marked_bytes += (size_t)obj_sz * HeapWordSize;
1476
1477 // Find the next marked object after this one.
1478 start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1479 }
1480
1481 // Mark the allocated-since-marking portion...
1482 HeapWord* top = hr->top();
1483 if (ntams < top) {
1484 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1485 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1486
1487 // Note: if we're looking at the last region in heap - top
1488 // could be actually just beyond the end of the heap; end_idx
1489 // will then correspond to a (non-existent) card that is also
1490 // just beyond the heap.
1491 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1492 // end of object is not card aligned - increment to cover
1493 // all the cards spanned by the object
1494 end_idx += 1;
1495 }
1496 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1497
1498 // This definitely means the region has live objects.
1499 set_bit_for_region(hr);
1500 }
1501
1502 // Update the live region bitmap.
1503 if (marked_bytes > 0) {
1504 set_bit_for_region(hr);
1505 }
1506
1507 // Set the marked bytes for the current region so that
1508 // it can be queried by a calling verification routine
1509 _region_marked_bytes = marked_bytes;
1510
1511 return false;
1512 }
1513
1514 size_t region_marked_bytes() const { return _region_marked_bytes; }
1515 };
1516
1517 // Heap region closure used for verifying the counting data
1518 // that was accumulated concurrently and aggregated during
1519 // the remark pause. This closure is applied to the heap
1520 // regions during the STW cleanup pause.
1521
1522 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1523 G1CollectedHeap* _g1h;
1524 ConcurrentMark* _cm;
1525 CalcLiveObjectsClosure _calc_cl;
1526 BitMap* _region_bm; // Region BM to be verified
1527 BitMap* _card_bm; // Card BM to be verified
1528 bool _verbose; // verbose output?
1529
1530 BitMap* _exp_region_bm; // Expected Region BM values
1531 BitMap* _exp_card_bm; // Expected card BM values
1532
1533 int _failures;
1534
1535 public:
1536 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1537 BitMap* region_bm,
1538 BitMap* card_bm,
1539 BitMap* exp_region_bm,
1540 BitMap* exp_card_bm,
1541 bool verbose) :
1542 _g1h(g1h), _cm(g1h->concurrent_mark()),
1543 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1544 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1545 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1546 _failures(0) { }
1547
1548 int failures() const { return _failures; }
1549
1550 bool doHeapRegion(HeapRegion* hr) {
1551 if (hr->continuesHumongous()) {
1552 // We will ignore these here and process them when their
1553 // associated "starts humongous" region is processed (see
1554 // set_bit_for_heap_region()). Note that we cannot rely on their
1555 // associated "starts humongous" region to have their bit set to
1556 // 1 since, due to the region chunking in the parallel region
1557 // iteration, a "continues humongous" region might be visited
1558 // before its associated "starts humongous".
1559 return false;
1560 }
1561
1562 int failures = 0;
1563
1564 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1565 // this region and set the corresponding bits in the expected region
1566 // and card bitmaps.
1567 bool res = _calc_cl.doHeapRegion(hr);
1568 assert(res == false, "should be continuing");
1569
1570 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1571 Mutex::_no_safepoint_check_flag);
1572
1573 // Verify the marked bytes for this region.
1574 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1575 size_t act_marked_bytes = hr->next_marked_bytes();
1576
1577 // We're not OK if expected marked bytes > actual marked bytes. It means
1578 // we have missed accounting some objects during the actual marking.
1579 if (exp_marked_bytes > act_marked_bytes) {
1580 if (_verbose) {
1581 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1582 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1583 hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1584 }
1585 failures += 1;
1586 }
1587
1588 // Verify the bit, for this region, in the actual and expected
1589 // (which was just calculated) region bit maps.
1590 // We're not OK if the bit in the calculated expected region
1591 // bitmap is set and the bit in the actual region bitmap is not.
1592 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1593
1594 bool expected = _exp_region_bm->at(index);
1595 bool actual = _region_bm->at(index);
1596 if (expected && !actual) {
1597 if (_verbose) {
1598 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1599 "expected: %s, actual: %s",
1600 hr->hrs_index(),
1601 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1602 }
1603 failures += 1;
1604 }
1605
1606 // Verify that the card bit maps for the cards spanned by the current
1607 // region match. We have an error if we have a set bit in the expected
1608 // bit map and the corresponding bit in the actual bitmap is not set.
1609
1610 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1611 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1612
1613 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1614 expected = _exp_card_bm->at(i);
1615 actual = _card_bm->at(i);
1616
1617 if (expected && !actual) {
1618 if (_verbose) {
1619 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1620 "expected: %s, actual: %s",
1621 hr->hrs_index(), i,
1622 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1623 }
1624 failures += 1;
1625 }
1626 }
1627
1628 if (failures > 0 && _verbose) {
1629 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1630 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1631 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1632 _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1633 }
1634
1635 _failures += failures;
1636
1637 // We could stop iteration over the heap when we
1638 // find the first violating region by returning true.
1639 return false;
1640 }
1641 };
1642
1643 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1644 protected:
1645 G1CollectedHeap* _g1h;
1646 ConcurrentMark* _cm;
1647 BitMap* _actual_region_bm;
1648 BitMap* _actual_card_bm;
1649
1650 uint _n_workers;
1651
1652 BitMap* _expected_region_bm;
1653 BitMap* _expected_card_bm;
1654
1655 int _failures;
1656 bool _verbose;
1657
1658 public:
1659 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1660 BitMap* region_bm, BitMap* card_bm,
1661 BitMap* expected_region_bm, BitMap* expected_card_bm)
1662 : AbstractGangTask("G1 verify final counting"),
1663 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1664 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1665 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1666 _failures(0), _verbose(false),
1667 _n_workers(0) {
1668 assert(VerifyDuringGC, "don't call this otherwise");
1669
1670 // Use the value already set as the number of active threads
1671 // in the call to run_task().
1672 if (G1CollectedHeap::use_parallel_gc_threads()) {
1673 assert( _g1h->workers()->active_workers() > 0,
1674 "Should have been previously set");
1675 _n_workers = _g1h->workers()->active_workers();
1676 } else {
1677 _n_workers = 1;
1678 }
1679
1680 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1681 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1682
1683 _verbose = _cm->verbose_medium();
1684 }
1685
1686 void work(uint worker_id) {
1687 assert(worker_id < _n_workers, "invariant");
1688
1689 VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1690 _actual_region_bm, _actual_card_bm,
1691 _expected_region_bm,
1692 _expected_card_bm,
1693 _verbose);
1694
1695 if (G1CollectedHeap::use_parallel_gc_threads()) {
1696 _g1h->heap_region_par_iterate_chunked(&verify_cl,
1697 worker_id,
1698 _n_workers,
1699 HeapRegion::VerifyCountClaimValue);
1700 } else {
1701 _g1h->heap_region_iterate(&verify_cl);
1702 }
1703
1704 Atomic::add(verify_cl.failures(), &_failures);
1705 }
1706
1707 int failures() const { return _failures; }
1708 };
1709
1710 // Closure that finalizes the liveness counting data.
1711 // Used during the cleanup pause.
1712 // Sets the bits corresponding to the interval [NTAMS, top]
1713 // (which contains the implicitly live objects) in the
1714 // card liveness bitmap. Also sets the bit for each region,
1715 // containing live data, in the region liveness bitmap.
1716
1717 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1718 public:
1719 FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1720 BitMap* region_bm,
1721 BitMap* card_bm) :
1722 CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1723
1724 bool doHeapRegion(HeapRegion* hr) {
1725
1726 if (hr->continuesHumongous()) {
1727 // We will ignore these here and process them when their
1728 // associated "starts humongous" region is processed (see
1729 // set_bit_for_heap_region()). Note that we cannot rely on their
1730 // associated "starts humongous" region to have their bit set to
1731 // 1 since, due to the region chunking in the parallel region
1732 // iteration, a "continues humongous" region might be visited
1733 // before its associated "starts humongous".
1734 return false;
1735 }
1736
1737 HeapWord* ntams = hr->next_top_at_mark_start();
1738 HeapWord* top = hr->top();
1739
1740 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1741
1742 // Mark the allocated-since-marking portion...
1743 if (ntams < top) {
1744 // This definitely means the region has live objects.
1745 set_bit_for_region(hr);
1746
1747 // Now set the bits in the card bitmap for [ntams, top)
1748 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1749 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1750
1751 // Note: if we're looking at the last region in heap - top
1752 // could be actually just beyond the end of the heap; end_idx
1753 // will then correspond to a (non-existent) card that is also
1754 // just beyond the heap.
1755 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1756 // end of object is not card aligned - increment to cover
1757 // all the cards spanned by the object
1758 end_idx += 1;
1759 }
1760
1761 assert(end_idx <= _card_bm->size(),
1762 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1763 end_idx, _card_bm->size()));
1764 assert(start_idx < _card_bm->size(),
1765 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1766 start_idx, _card_bm->size()));
1767
1768 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1769 }
1770
1771 // Set the bit for the region if it contains live data
1772 if (hr->next_marked_bytes() > 0) {
1773 set_bit_for_region(hr);
1774 }
1775
1776 return false;
1777 }
1778 };
1779
1780 class G1ParFinalCountTask: public AbstractGangTask {
1781 protected:
1782 G1CollectedHeap* _g1h;
1783 ConcurrentMark* _cm;
1784 BitMap* _actual_region_bm;
1785 BitMap* _actual_card_bm;
1786
1787 uint _n_workers;
1788
1789 public:
1790 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1791 : AbstractGangTask("G1 final counting"),
1792 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1793 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1794 _n_workers(0) {
1795 // Use the value already set as the number of active threads
1796 // in the call to run_task().
1797 if (G1CollectedHeap::use_parallel_gc_threads()) {
1798 assert( _g1h->workers()->active_workers() > 0,
1799 "Should have been previously set");
1800 _n_workers = _g1h->workers()->active_workers();
1801 } else {
1802 _n_workers = 1;
1803 }
1804 }
1805
1806 void work(uint worker_id) {
1807 assert(worker_id < _n_workers, "invariant");
1808
1809 FinalCountDataUpdateClosure final_update_cl(_g1h,
1810 _actual_region_bm,
1811 _actual_card_bm);
1812
1813 if (G1CollectedHeap::use_parallel_gc_threads()) {
1814 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1815 worker_id,
1816 _n_workers,
1817 HeapRegion::FinalCountClaimValue);
1818 } else {
1819 _g1h->heap_region_iterate(&final_update_cl);
1820 }
1821 }
1822 };
1823
1824 class G1ParNoteEndTask;
1825
1826 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1827 G1CollectedHeap* _g1;
1828 size_t _max_live_bytes;
1829 uint _regions_claimed;
1830 size_t _freed_bytes;
1831 FreeRegionList* _local_cleanup_list;
1832 HeapRegionSetCount _old_regions_removed;
1833 HeapRegionSetCount _humongous_regions_removed;
1834 HRRSCleanupTask* _hrrs_cleanup_task;
1835 double _claimed_region_time;
1836 double _max_region_time;
1837
1838 public:
1839 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1840 FreeRegionList* local_cleanup_list,
1841 HRRSCleanupTask* hrrs_cleanup_task) :
1842 _g1(g1),
1843 _max_live_bytes(0), _regions_claimed(0),
1844 _freed_bytes(0),
1845 _claimed_region_time(0.0), _max_region_time(0.0),
1846 _local_cleanup_list(local_cleanup_list),
1847 _old_regions_removed(),
1848 _humongous_regions_removed(),
1849 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1850
1851 size_t freed_bytes() { return _freed_bytes; }
1852 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1853 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1854
1855 bool doHeapRegion(HeapRegion *hr) {
1856 if (hr->continuesHumongous()) {
1857 return false;
1858 }
1859 // We use a claim value of zero here because all regions
1860 // were claimed with value 1 in the FinalCount task.
1861 _g1->reset_gc_time_stamps(hr);
1862 double start = os::elapsedTime();
1863 _regions_claimed++;
1864 hr->note_end_of_marking();
1865 _max_live_bytes += hr->max_live_bytes();
1866
1867 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1868 _freed_bytes += hr->used();
1869 hr->set_containing_set(NULL);
1870 if (hr->isHumongous()) {
1871 assert(hr->startsHumongous(), "we should only see starts humongous");
1872 _humongous_regions_removed.increment(1u, hr->capacity());
1873 _g1->free_humongous_region(hr, _local_cleanup_list, true);
1874 } else {
1875 _old_regions_removed.increment(1u, hr->capacity());
1876 _g1->free_region(hr, _local_cleanup_list, true);
1877 }
1878 } else {
1879 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1880 }
1881
1882 double region_time = (os::elapsedTime() - start);
1883 _claimed_region_time += region_time;
1884 if (region_time > _max_region_time) {
1885 _max_region_time = region_time;
1886 }
1887 return false;
1888 }
1889
1890 size_t max_live_bytes() { return _max_live_bytes; }
1891 uint regions_claimed() { return _regions_claimed; }
1892 double claimed_region_time_sec() { return _claimed_region_time; }
1893 double max_region_time_sec() { return _max_region_time; }
1894 };
1895
1896 class G1ParNoteEndTask: public AbstractGangTask {
1897 friend class G1NoteEndOfConcMarkClosure;
1898
1899 protected:
1900 G1CollectedHeap* _g1h;
1901 size_t _max_live_bytes;
1902 size_t _freed_bytes;
1903 FreeRegionList* _cleanup_list;
1904
1905 public:
1906 G1ParNoteEndTask(G1CollectedHeap* g1h,
1907 FreeRegionList* cleanup_list) :
1908 AbstractGangTask("G1 note end"), _g1h(g1h),
1909 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1910
1911 void work(uint worker_id) {
1912 double start = os::elapsedTime();
1913 FreeRegionList local_cleanup_list("Local Cleanup List");
1914 HRRSCleanupTask hrrs_cleanup_task;
1915 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1916 &hrrs_cleanup_task);
1917 if (G1CollectedHeap::use_parallel_gc_threads()) {
1918 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1919 _g1h->workers()->active_workers(),
1920 HeapRegion::NoteEndClaimValue);
1921 } else {
1922 _g1h->heap_region_iterate(&g1_note_end);
1923 }
1924 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1925
1926 // Now update the lists
1927 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1928 {
1929 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1930 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1931 _max_live_bytes += g1_note_end.max_live_bytes();
1932 _freed_bytes += g1_note_end.freed_bytes();
1933
1934 // If we iterate over the global cleanup list at the end of
1935 // cleanup to do this printing we will not guarantee to only
1936 // generate output for the newly-reclaimed regions (the list
1937 // might not be empty at the beginning of cleanup; we might
1938 // still be working on its previous contents). So we do the
1939 // printing here, before we append the new regions to the global
1940 // cleanup list.
1941
1942 G1HRPrinter* hr_printer = _g1h->hr_printer();
1943 if (hr_printer->is_active()) {
1944 FreeRegionListIterator iter(&local_cleanup_list);
1945 while (iter.more_available()) {
1946 HeapRegion* hr = iter.get_next();
1947 hr_printer->cleanup(hr);
1948 }
1949 }
1950
1951 _cleanup_list->add_ordered(&local_cleanup_list);
1952 assert(local_cleanup_list.is_empty(), "post-condition");
1953
1954 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1955 }
1956 }
1957 size_t max_live_bytes() { return _max_live_bytes; }
1958 size_t freed_bytes() { return _freed_bytes; }
1959 };
1960
1961 class G1ParScrubRemSetTask: public AbstractGangTask {
1962 protected:
1963 G1RemSet* _g1rs;
1964 BitMap* _region_bm;
1965 BitMap* _card_bm;
1966 public:
1967 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1968 BitMap* region_bm, BitMap* card_bm) :
1969 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1970 _region_bm(region_bm), _card_bm(card_bm) { }
1971
1972 void work(uint worker_id) {
1973 if (G1CollectedHeap::use_parallel_gc_threads()) {
1974 _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1975 HeapRegion::ScrubRemSetClaimValue);
1976 } else {
1977 _g1rs->scrub(_region_bm, _card_bm);
1978 }
1979 }
1980
1981 };
1982
1983 void ConcurrentMark::cleanup() {
1984 // world is stopped at this checkpoint
1985 assert(SafepointSynchronize::is_at_safepoint(),
1986 "world should be stopped");
1987 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1988
1989 // If a full collection has happened, we shouldn't do this.
1990 if (has_aborted()) {
1991 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1992 return;
1993 }
1994
1995 g1h->verify_region_sets_optional();
1996
1997 if (VerifyDuringGC) {
1998 HandleMark hm; // handle scope
1999 Universe::heap()->prepare_for_verify();
2000 Universe::verify(VerifyOption_G1UsePrevMarking,
2001 " VerifyDuringGC:(before)");
2002 }
2003 g1h->check_bitmaps("Cleanup Start");
2004
2005 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2006 g1p->record_concurrent_mark_cleanup_start();
2007
2008 double start = os::elapsedTime();
2009
2010 HeapRegionRemSet::reset_for_cleanup_tasks();
2011
2012 uint n_workers;
2013
2014 // Do counting once more with the world stopped for good measure.
2015 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2016
2017 if (G1CollectedHeap::use_parallel_gc_threads()) {
2018 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2019 "sanity check");
2020
2021 g1h->set_par_threads();
2022 n_workers = g1h->n_par_threads();
2023 assert(g1h->n_par_threads() == n_workers,
2024 "Should not have been reset");
2025 g1h->workers()->run_task(&g1_par_count_task);
2026 // Done with the parallel phase so reset to 0.
2027 g1h->set_par_threads(0);
2028
2029 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2030 "sanity check");
2031 } else {
2032 n_workers = 1;
2033 g1_par_count_task.work(0);
2034 }
2035
2036 if (VerifyDuringGC) {
2037 // Verify that the counting data accumulated during marking matches
2038 // that calculated by walking the marking bitmap.
2039
2040 // Bitmaps to hold expected values
2041 BitMap expected_region_bm(_region_bm.size(), true);
2042 BitMap expected_card_bm(_card_bm.size(), true);
2043
2044 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2045 &_region_bm,
2046 &_card_bm,
2047 &expected_region_bm,
2048 &expected_card_bm);
2049
2050 if (G1CollectedHeap::use_parallel_gc_threads()) {
2051 g1h->set_par_threads((int)n_workers);
2052 g1h->workers()->run_task(&g1_par_verify_task);
2053 // Done with the parallel phase so reset to 0.
2054 g1h->set_par_threads(0);
2055
2056 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2057 "sanity check");
2058 } else {
2059 g1_par_verify_task.work(0);
2060 }
2061
2062 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2063 }
2064
2065 size_t start_used_bytes = g1h->used();
2066 g1h->set_marking_complete();
2067
2068 double count_end = os::elapsedTime();
2069 double this_final_counting_time = (count_end - start);
2070 _total_counting_time += this_final_counting_time;
2071
2072 if (G1PrintRegionLivenessInfo) {
2073 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2074 _g1h->heap_region_iterate(&cl);
2075 }
2076
2077 // Install newly created mark bitMap as "prev".
2078 swapMarkBitMaps();
2079
2080 g1h->reset_gc_time_stamp();
2081
2082 // Note end of marking in all heap regions.
2083 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2084 if (G1CollectedHeap::use_parallel_gc_threads()) {
2085 g1h->set_par_threads((int)n_workers);
2086 g1h->workers()->run_task(&g1_par_note_end_task);
2087 g1h->set_par_threads(0);
2088
2089 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2090 "sanity check");
2091 } else {
2092 g1_par_note_end_task.work(0);
2093 }
2094 g1h->check_gc_time_stamps();
2095
2096 if (!cleanup_list_is_empty()) {
2097 // The cleanup list is not empty, so we'll have to process it
2098 // concurrently. Notify anyone else that might be wanting free
2099 // regions that there will be more free regions coming soon.
2100 g1h->set_free_regions_coming();
2101 }
2102
2103 // call below, since it affects the metric by which we sort the heap
2104 // regions.
2105 if (G1ScrubRemSets) {
2106 double rs_scrub_start = os::elapsedTime();
2107 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2108 if (G1CollectedHeap::use_parallel_gc_threads()) {
2109 g1h->set_par_threads((int)n_workers);
2110 g1h->workers()->run_task(&g1_par_scrub_rs_task);
2111 g1h->set_par_threads(0);
2112
2113 assert(g1h->check_heap_region_claim_values(
2114 HeapRegion::ScrubRemSetClaimValue),
2115 "sanity check");
2116 } else {
2117 g1_par_scrub_rs_task.work(0);
2118 }
2119
2120 double rs_scrub_end = os::elapsedTime();
2121 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2122 _total_rs_scrub_time += this_rs_scrub_time;
2123 }
2124
2125 // this will also free any regions totally full of garbage objects,
2126 // and sort the regions.
2127 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2128
2129 // Statistics.
2130 double end = os::elapsedTime();
2131 _cleanup_times.add((end - start) * 1000.0);
2132
2133 if (G1Log::fine()) {
2134 g1h->print_size_transition(gclog_or_tty,
2135 start_used_bytes,
2136 g1h->used(),
2137 g1h->capacity());
2138 }
2139
2140 // Clean up will have freed any regions completely full of garbage.
2141 // Update the soft reference policy with the new heap occupancy.
2142 Universe::update_heap_info_at_gc();
2143
2144 // We need to make this be a "collection" so any collection pause that
2145 // races with it goes around and waits for completeCleanup to finish.
2146 g1h->increment_total_collections();
2147
2148 // We reclaimed old regions so we should calculate the sizes to make
2149 // sure we update the old gen/space data.
2150 g1h->g1mm()->update_sizes();
2151
2152 if (VerifyDuringGC) {
2153 HandleMark hm; // handle scope
2154 Universe::heap()->prepare_for_verify();
2155 Universe::verify(VerifyOption_G1UsePrevMarking,
2156 " VerifyDuringGC:(after)");
2157 }
2158 g1h->check_bitmaps("Cleanup End");
2159
2160 g1h->verify_region_sets_optional();
2161 g1h->trace_heap_after_concurrent_cycle();
2162 }
2163
2164 void ConcurrentMark::completeCleanup() {
2165 if (has_aborted()) return;
2166
2167 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2168
2169 _cleanup_list.verify_optional();
2170 FreeRegionList tmp_free_list("Tmp Free List");
2171
2172 if (G1ConcRegionFreeingVerbose) {
2173 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2174 "cleanup list has %u entries",
2175 _cleanup_list.length());
2176 }
2177
2178 // Noone else should be accessing the _cleanup_list at this point,
2179 // so it's not necessary to take any locks
2180 while (!_cleanup_list.is_empty()) {
2181 HeapRegion* hr = _cleanup_list.remove_head();
2182 assert(hr != NULL, "Got NULL from a non-empty list");
2183 hr->par_clear();
2184 tmp_free_list.add_ordered(hr);
2185
2186 // Instead of adding one region at a time to the secondary_free_list,
2187 // we accumulate them in the local list and move them a few at a
2188 // time. This also cuts down on the number of notify_all() calls
2189 // we do during this process. We'll also append the local list when
2190 // _cleanup_list is empty (which means we just removed the last
2191 // region from the _cleanup_list).
2192 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2193 _cleanup_list.is_empty()) {
2194 if (G1ConcRegionFreeingVerbose) {
2195 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2196 "appending %u entries to the secondary_free_list, "
2197 "cleanup list still has %u entries",
2198 tmp_free_list.length(),
2199 _cleanup_list.length());
2200 }
2201
2202 {
2203 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2204 g1h->secondary_free_list_add(&tmp_free_list);
2205 SecondaryFreeList_lock->notify_all();
2206 }
2207
2208 if (G1StressConcRegionFreeing) {
2209 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2210 os::sleep(Thread::current(), (jlong) 1, false);
2211 }
2212 }
2213 }
2214 }
2215 assert(tmp_free_list.is_empty(), "post-condition");
2216 }
2217
2218 // Supporting Object and Oop closures for reference discovery
2219 // and processing in during marking
2220
2221 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2222 HeapWord* addr = (HeapWord*)obj;
2223 return addr != NULL &&
2224 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2225 }
2226
2227 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2228 // Uses the CMTask associated with a worker thread (for serial reference
2229 // processing the CMTask for worker 0 is used) to preserve (mark) and
2230 // trace referent objects.
2231 //
2232 // Using the CMTask and embedded local queues avoids having the worker
2233 // threads operating on the global mark stack. This reduces the risk
2234 // of overflowing the stack - which we would rather avoid at this late
2235 // state. Also using the tasks' local queues removes the potential
2236 // of the workers interfering with each other that could occur if
2237 // operating on the global stack.
2238
2239 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2240 ConcurrentMark* _cm;
2241 CMTask* _task;
2242 int _ref_counter_limit;
2243 int _ref_counter;
2244 bool _is_serial;
2245 public:
2246 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2247 _cm(cm), _task(task), _is_serial(is_serial),
2248 _ref_counter_limit(G1RefProcDrainInterval) {
2249 assert(_ref_counter_limit > 0, "sanity");
2250 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2251 _ref_counter = _ref_counter_limit;
2252 }
2253
2254 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2255 virtual void do_oop( oop* p) { do_oop_work(p); }
2256
2257 template <class T> void do_oop_work(T* p) {
2258 if (!_cm->has_overflown()) {
2259 oop obj = oopDesc::load_decode_heap_oop(p);
2260 if (_cm->verbose_high()) {
2261 gclog_or_tty->print_cr("\t[%u] we're looking at location "
2262 "*"PTR_FORMAT" = "PTR_FORMAT,
2263 _task->worker_id(), p2i(p), p2i((void*) obj));
2264 }
2265
2266 _task->deal_with_reference(obj);
2267 _ref_counter--;
2268
2269 if (_ref_counter == 0) {
2270 // We have dealt with _ref_counter_limit references, pushing them
2271 // and objects reachable from them on to the local stack (and
2272 // possibly the global stack). Call CMTask::do_marking_step() to
2273 // process these entries.
2274 //
2275 // We call CMTask::do_marking_step() in a loop, which we'll exit if
2276 // there's nothing more to do (i.e. we're done with the entries that
2277 // were pushed as a result of the CMTask::deal_with_reference() calls
2278 // above) or we overflow.
2279 //
2280 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2281 // flag while there may still be some work to do. (See the comment at
2282 // the beginning of CMTask::do_marking_step() for those conditions -
2283 // one of which is reaching the specified time target.) It is only
2284 // when CMTask::do_marking_step() returns without setting the
2285 // has_aborted() flag that the marking step has completed.
2286 do {
2287 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2288 _task->do_marking_step(mark_step_duration_ms,
2289 false /* do_termination */,
2290 _is_serial);
2291 } while (_task->has_aborted() && !_cm->has_overflown());
2292 _ref_counter = _ref_counter_limit;
2293 }
2294 } else {
2295 if (_cm->verbose_high()) {
2296 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2297 }
2298 }
2299 }
2300 };
2301
2302 // 'Drain' oop closure used by both serial and parallel reference processing.
2303 // Uses the CMTask associated with a given worker thread (for serial
2304 // reference processing the CMtask for worker 0 is used). Calls the
2305 // do_marking_step routine, with an unbelievably large timeout value,
2306 // to drain the marking data structures of the remaining entries
2307 // added by the 'keep alive' oop closure above.
2308
2309 class G1CMDrainMarkingStackClosure: public VoidClosure {
2310 ConcurrentMark* _cm;
2311 CMTask* _task;
2312 bool _is_serial;
2313 public:
2314 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2315 _cm(cm), _task(task), _is_serial(is_serial) {
2316 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2317 }
2318
2319 void do_void() {
2320 do {
2321 if (_cm->verbose_high()) {
2322 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2323 _task->worker_id(), BOOL_TO_STR(_is_serial));
2324 }
2325
2326 // We call CMTask::do_marking_step() to completely drain the local
2327 // and global marking stacks of entries pushed by the 'keep alive'
2328 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2329 //
2330 // CMTask::do_marking_step() is called in a loop, which we'll exit
2331 // if there's nothing more to do (i.e. we've completely drained the
2332 // entries that were pushed as a a result of applying the 'keep alive'
2333 // closure to the entries on the discovered ref lists) or we overflow
2334 // the global marking stack.
2335 //
2336 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2337 // flag while there may still be some work to do. (See the comment at
2338 // the beginning of CMTask::do_marking_step() for those conditions -
2339 // one of which is reaching the specified time target.) It is only
2340 // when CMTask::do_marking_step() returns without setting the
2341 // has_aborted() flag that the marking step has completed.
2342
2343 _task->do_marking_step(1000000000.0 /* something very large */,
2344 true /* do_termination */,
2345 _is_serial);
2346 } while (_task->has_aborted() && !_cm->has_overflown());
2347 }
2348 };
2349
2350 // Implementation of AbstractRefProcTaskExecutor for parallel
2351 // reference processing at the end of G1 concurrent marking
2352
2353 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2354 private:
2355 G1CollectedHeap* _g1h;
2356 ConcurrentMark* _cm;
2357 WorkGang* _workers;
2358 int _active_workers;
2359
2360 public:
2361 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2362 ConcurrentMark* cm,
2363 WorkGang* workers,
2364 int n_workers) :
2365 _g1h(g1h), _cm(cm),
2366 _workers(workers), _active_workers(n_workers) { }
2367
2368 // Executes the given task using concurrent marking worker threads.
2369 virtual void execute(ProcessTask& task);
2370 virtual void execute(EnqueueTask& task);
2371 };
2372
2373 class G1CMRefProcTaskProxy: public AbstractGangTask {
2374 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2375 ProcessTask& _proc_task;
2376 G1CollectedHeap* _g1h;
2377 ConcurrentMark* _cm;
2378
2379 public:
2380 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2381 G1CollectedHeap* g1h,
2382 ConcurrentMark* cm) :
2383 AbstractGangTask("Process reference objects in parallel"),
2384 _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2385 ReferenceProcessor* rp = _g1h->ref_processor_cm();
2386 assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2387 }
2388
2389 virtual void work(uint worker_id) {
2390 CMTask* task = _cm->task(worker_id);
2391 G1CMIsAliveClosure g1_is_alive(_g1h);
2392 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2393 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2394
2395 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2396 }
2397 };
2398
2399 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2400 assert(_workers != NULL, "Need parallel worker threads.");
2401 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2402
2403 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2404
2405 // We need to reset the concurrency level before each
2406 // proxy task execution, so that the termination protocol
2407 // and overflow handling in CMTask::do_marking_step() knows
2408 // how many workers to wait for.
2409 _cm->set_concurrency(_active_workers);
2410 _g1h->set_par_threads(_active_workers);
2411 _workers->run_task(&proc_task_proxy);
2412 _g1h->set_par_threads(0);
2413 }
2414
2415 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2416 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2417 EnqueueTask& _enq_task;
2418
2419 public:
2420 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2421 AbstractGangTask("Enqueue reference objects in parallel"),
2422 _enq_task(enq_task) { }
2423
2424 virtual void work(uint worker_id) {
2425 _enq_task.work(worker_id);
2426 }
2427 };
2428
2429 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2430 assert(_workers != NULL, "Need parallel worker threads.");
2431 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2432
2433 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2434
2435 // Not strictly necessary but...
2436 //
2437 // We need to reset the concurrency level before each
2438 // proxy task execution, so that the termination protocol
2439 // and overflow handling in CMTask::do_marking_step() knows
2440 // how many workers to wait for.
2441 _cm->set_concurrency(_active_workers);
2442 _g1h->set_par_threads(_active_workers);
2443 _workers->run_task(&enq_task_proxy);
2444 _g1h->set_par_threads(0);
2445 }
2446
2447 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2448 if (has_overflown()) {
2449 // Skip processing the discovered references if we have
2450 // overflown the global marking stack. Reference objects
2451 // only get discovered once so it is OK to not
2452 // de-populate the discovered reference lists. We could have,
2453 // but the only benefit would be that, when marking restarts,
2454 // less reference objects are discovered.
2455 return;
2456 }
2457
2458 ResourceMark rm;
2459 HandleMark hm;
2460
2461 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2462
2463 // Is alive closure.
2464 G1CMIsAliveClosure g1_is_alive(g1h);
2465
2466 // Inner scope to exclude the cleaning of the string and symbol
2467 // tables from the displayed time.
2468 {
2469 if (G1Log::finer()) {
2470 gclog_or_tty->put(' ');
2471 }
2472 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm());
2473
2474 ReferenceProcessor* rp = g1h->ref_processor_cm();
2475
2476 // See the comment in G1CollectedHeap::ref_processing_init()
2477 // about how reference processing currently works in G1.
2478
2479 // Set the soft reference policy
2480 rp->setup_policy(clear_all_soft_refs);
2481 assert(_markStack.isEmpty(), "mark stack should be empty");
2482
2483 // Instances of the 'Keep Alive' and 'Complete GC' closures used
2484 // in serial reference processing. Note these closures are also
2485 // used for serially processing (by the the current thread) the
2486 // JNI references during parallel reference processing.
2487 //
2488 // These closures do not need to synchronize with the worker
2489 // threads involved in parallel reference processing as these
2490 // instances are executed serially by the current thread (e.g.
2491 // reference processing is not multi-threaded and is thus
2492 // performed by the current thread instead of a gang worker).
2493 //
2494 // The gang tasks involved in parallel reference processing create
2495 // their own instances of these closures, which do their own
2496 // synchronization among themselves.
2497 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2498 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2499
2500 // We need at least one active thread. If reference processing
2501 // is not multi-threaded we use the current (VMThread) thread,
2502 // otherwise we use the work gang from the G1CollectedHeap and
2503 // we utilize all the worker threads we can.
2504 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2505 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2506 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2507
2508 // Parallel processing task executor.
2509 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2510 g1h->workers(), active_workers);
2511 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2512
2513 // Set the concurrency level. The phase was already set prior to
2514 // executing the remark task.
2515 set_concurrency(active_workers);
2516
2517 // Set the degree of MT processing here. If the discovery was done MT,
2518 // the number of threads involved during discovery could differ from
2519 // the number of active workers. This is OK as long as the discovered
2520 // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2521 rp->set_active_mt_degree(active_workers);
2522
2523 // Process the weak references.
2524 const ReferenceProcessorStats& stats =
2525 rp->process_discovered_references(&g1_is_alive,
2526 &g1_keep_alive,
2527 &g1_drain_mark_stack,
2528 executor,
2529 g1h->gc_timer_cm());
2530 g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2531
2532 // The do_oop work routines of the keep_alive and drain_marking_stack
2533 // oop closures will set the has_overflown flag if we overflow the
2534 // global marking stack.
2535
2536 assert(_markStack.overflow() || _markStack.isEmpty(),
2537 "mark stack should be empty (unless it overflowed)");
2538
2539 if (_markStack.overflow()) {
2540 // This should have been done already when we tried to push an
2541 // entry on to the global mark stack. But let's do it again.
2542 set_has_overflown();
2543 }
2544
2545 assert(rp->num_q() == active_workers, "why not");
2546
2547 rp->enqueue_discovered_references(executor);
2548
2549 rp->verify_no_references_recorded();
2550 assert(!rp->discovery_enabled(), "Post condition");
2551 }
2552
2553 if (has_overflown()) {
2554 // We can not trust g1_is_alive if the marking stack overflowed
2555 return;
2556 }
2557
2558 g1h->unlink_string_and_symbol_table(&g1_is_alive,
2559 /* process_strings */ false, // currently strings are always roots
2560 /* process_symbols */ true);
2561 }
2562
2563 void ConcurrentMark::swapMarkBitMaps() {
2564 CMBitMapRO* temp = _prevMarkBitMap;
2565 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2566 _nextMarkBitMap = (CMBitMap*) temp;
2567 }
2568
2569 class CMRemarkTask: public AbstractGangTask {
2570 private:
2571 ConcurrentMark* _cm;
2572 bool _is_serial;
2573 public:
2574 void work(uint worker_id) {
2575 // Since all available tasks are actually started, we should
2576 // only proceed if we're supposed to be active.
2577 if (worker_id < _cm->active_tasks()) {
2578 CMTask* task = _cm->task(worker_id);
2579 task->record_start_time();
2580 do {
2581 task->do_marking_step(1000000000.0 /* something very large */,
2582 true /* do_termination */,
2583 _is_serial);
2584 } while (task->has_aborted() && !_cm->has_overflown());
2585 // If we overflow, then we do not want to restart. We instead
2586 // want to abort remark and do concurrent marking again.
2587 task->record_end_time();
2588 }
2589 }
2590
2591 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2592 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2593 _cm->terminator()->reset_for_reuse(active_workers);
2594 }
2595 };
2596
2597 void ConcurrentMark::checkpointRootsFinalWork() {
2598 ResourceMark rm;
2599 HandleMark hm;
2600 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2601
2602 g1h->ensure_parsability(false);
2603
2604 if (G1CollectedHeap::use_parallel_gc_threads()) {
2605 G1CollectedHeap::StrongRootsScope srs(g1h);
2606 // this is remark, so we'll use up all active threads
2607 uint active_workers = g1h->workers()->active_workers();
2608 if (active_workers == 0) {
2609 assert(active_workers > 0, "Should have been set earlier");
2610 active_workers = (uint) ParallelGCThreads;
2611 g1h->workers()->set_active_workers(active_workers);
2612 }
2613 set_concurrency_and_phase(active_workers, false /* concurrent */);
2614 // Leave _parallel_marking_threads at it's
2615 // value originally calculated in the ConcurrentMark
2616 // constructor and pass values of the active workers
2617 // through the gang in the task.
2618
2619 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2620 // We will start all available threads, even if we decide that the
2621 // active_workers will be fewer. The extra ones will just bail out
2622 // immediately.
2623 g1h->set_par_threads(active_workers);
2624 g1h->workers()->run_task(&remarkTask);
2625 g1h->set_par_threads(0);
2626 } else {
2627 G1CollectedHeap::StrongRootsScope srs(g1h);
2628 uint active_workers = 1;
2629 set_concurrency_and_phase(active_workers, false /* concurrent */);
2630
2631 // Note - if there's no work gang then the VMThread will be
2632 // the thread to execute the remark - serially. We have
2633 // to pass true for the is_serial parameter so that
2634 // CMTask::do_marking_step() doesn't enter the sync
2635 // barriers in the event of an overflow. Doing so will
2636 // cause an assert that the current thread is not a
2637 // concurrent GC thread.
2638 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2639 remarkTask.work(0);
2640 }
2641 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2642 guarantee(has_overflown() ||
2643 satb_mq_set.completed_buffers_num() == 0,
2644 err_msg("Invariant: has_overflown = %s, num buffers = %d",
2645 BOOL_TO_STR(has_overflown()),
2646 satb_mq_set.completed_buffers_num()));
2647
2648 print_stats();
2649 }
2650
2651 #ifndef PRODUCT
2652
2653 class PrintReachableOopClosure: public OopClosure {
2654 private:
2655 G1CollectedHeap* _g1h;
2656 outputStream* _out;
2657 VerifyOption _vo;
2658 bool _all;
2659
2660 public:
2661 PrintReachableOopClosure(outputStream* out,
2662 VerifyOption vo,
2663 bool all) :
2664 _g1h(G1CollectedHeap::heap()),
2665 _out(out), _vo(vo), _all(all) { }
2666
2667 void do_oop(narrowOop* p) { do_oop_work(p); }
2668 void do_oop( oop* p) { do_oop_work(p); }
2669
2670 template <class T> void do_oop_work(T* p) {
2671 oop obj = oopDesc::load_decode_heap_oop(p);
2672 const char* str = NULL;
2673 const char* str2 = "";
2674
2675 if (obj == NULL) {
2676 str = "";
2677 } else if (!_g1h->is_in_g1_reserved(obj)) {
2678 str = " O";
2679 } else {
2680 HeapRegion* hr = _g1h->heap_region_containing(obj);
2681 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2682 bool marked = _g1h->is_marked(obj, _vo);
2683
2684 if (over_tams) {
2685 str = " >";
2686 if (marked) {
2687 str2 = " AND MARKED";
2688 }
2689 } else if (marked) {
2690 str = " M";
2691 } else {
2692 str = " NOT";
2693 }
2694 }
2695
2696 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2697 p2i(p), p2i((void*) obj), str, str2);
2698 }
2699 };
2700
2701 class PrintReachableObjectClosure : public ObjectClosure {
2702 private:
2703 G1CollectedHeap* _g1h;
2704 outputStream* _out;
2705 VerifyOption _vo;
2706 bool _all;
2707 HeapRegion* _hr;
2708
2709 public:
2710 PrintReachableObjectClosure(outputStream* out,
2711 VerifyOption vo,
2712 bool all,
2713 HeapRegion* hr) :
2714 _g1h(G1CollectedHeap::heap()),
2715 _out(out), _vo(vo), _all(all), _hr(hr) { }
2716
2717 void do_object(oop o) {
2718 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2719 bool marked = _g1h->is_marked(o, _vo);
2720 bool print_it = _all || over_tams || marked;
2721
2722 if (print_it) {
2723 _out->print_cr(" "PTR_FORMAT"%s",
2724 p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2725 PrintReachableOopClosure oopCl(_out, _vo, _all);
2726 o->oop_iterate_no_header(&oopCl);
2727 }
2728 }
2729 };
2730
2731 class PrintReachableRegionClosure : public HeapRegionClosure {
2732 private:
2733 G1CollectedHeap* _g1h;
2734 outputStream* _out;
2735 VerifyOption _vo;
2736 bool _all;
2737
2738 public:
2739 bool doHeapRegion(HeapRegion* hr) {
2740 HeapWord* b = hr->bottom();
2741 HeapWord* e = hr->end();
2742 HeapWord* t = hr->top();
2743 HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2744 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2745 "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2746 _out->cr();
2747
2748 HeapWord* from = b;
2749 HeapWord* to = t;
2750
2751 if (to > from) {
2752 _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2753 _out->cr();
2754 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2755 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2756 _out->cr();
2757 }
2758
2759 return false;
2760 }
2761
2762 PrintReachableRegionClosure(outputStream* out,
2763 VerifyOption vo,
2764 bool all) :
2765 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2766 };
2767
2768 void ConcurrentMark::print_reachable(const char* str,
2769 VerifyOption vo,
2770 bool all) {
2771 gclog_or_tty->cr();
2772 gclog_or_tty->print_cr("== Doing heap dump... ");
2773
2774 if (G1PrintReachableBaseFile == NULL) {
2775 gclog_or_tty->print_cr(" #### error: no base file defined");
2776 return;
2777 }
2778
2779 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2780 (JVM_MAXPATHLEN - 1)) {
2781 gclog_or_tty->print_cr(" #### error: file name too long");
2782 return;
2783 }
2784
2785 char file_name[JVM_MAXPATHLEN];
2786 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2787 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2788
2789 fileStream fout(file_name);
2790 if (!fout.is_open()) {
2791 gclog_or_tty->print_cr(" #### error: could not open file");
2792 return;
2793 }
2794
2795 outputStream* out = &fout;
2796 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2797 out->cr();
2798
2799 out->print_cr("--- ITERATING OVER REGIONS");
2800 out->cr();
2801 PrintReachableRegionClosure rcl(out, vo, all);
2802 _g1h->heap_region_iterate(&rcl);
2803 out->cr();
2804
2805 gclog_or_tty->print_cr(" done");
2806 gclog_or_tty->flush();
2807 }
2808
2809 #endif // PRODUCT
2810
2811 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2812 // Note we are overriding the read-only view of the prev map here, via
2813 // the cast.
2814 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2815 }
2816
2817 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2818 _nextMarkBitMap->clearRange(mr);
2819 }
2820
2821 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2822 clearRangePrevBitmap(mr);
2823 clearRangeNextBitmap(mr);
2824 }
2825
2826 HeapRegion*
2827 ConcurrentMark::claim_region(uint worker_id) {
2828 // "checkpoint" the finger
2829 HeapWord* finger = _finger;
2830
2831 // _heap_end will not change underneath our feet; it only changes at
2832 // yield points.
2833 while (finger < _heap_end) {
2834 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2835
2836 // Note on how this code handles humongous regions. In the
2837 // normal case the finger will reach the start of a "starts
2838 // humongous" (SH) region. Its end will either be the end of the
2839 // last "continues humongous" (CH) region in the sequence, or the
2840 // standard end of the SH region (if the SH is the only region in
2841 // the sequence). That way claim_region() will skip over the CH
2842 // regions. However, there is a subtle race between a CM thread
2843 // executing this method and a mutator thread doing a humongous
2844 // object allocation. The two are not mutually exclusive as the CM
2845 // thread does not need to hold the Heap_lock when it gets
2846 // here. So there is a chance that claim_region() will come across
2847 // a free region that's in the progress of becoming a SH or a CH
2848 // region. In the former case, it will either
2849 // a) Miss the update to the region's end, in which case it will
2850 // visit every subsequent CH region, will find their bitmaps
2851 // empty, and do nothing, or
2852 // b) Will observe the update of the region's end (in which case
2853 // it will skip the subsequent CH regions).
2854 // If it comes across a region that suddenly becomes CH, the
2855 // scenario will be similar to b). So, the race between
2856 // claim_region() and a humongous object allocation might force us
2857 // to do a bit of unnecessary work (due to some unnecessary bitmap
2858 // iterations) but it should not introduce and correctness issues.
2859 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2860 HeapWord* bottom = curr_region->bottom();
2861 HeapWord* end = curr_region->end();
2862 HeapWord* limit = curr_region->next_top_at_mark_start();
2863
2864 if (verbose_low()) {
2865 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2866 "["PTR_FORMAT", "PTR_FORMAT"), "
2867 "limit = "PTR_FORMAT,
2868 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2869 }
2870
2871 // Is the gap between reading the finger and doing the CAS too long?
2872 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2873 if (res == finger) {
2874 // we succeeded
2875
2876 // notice that _finger == end cannot be guaranteed here since,
2877 // someone else might have moved the finger even further
2878 assert(_finger >= end, "the finger should have moved forward");
2879
2880 if (verbose_low()) {
2881 gclog_or_tty->print_cr("[%u] we were successful with region = "
2882 PTR_FORMAT, worker_id, p2i(curr_region));
2883 }
2884
2885 if (limit > bottom) {
2886 if (verbose_low()) {
2887 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2888 "returning it ", worker_id, p2i(curr_region));
2889 }
2890 return curr_region;
2891 } else {
2892 assert(limit == bottom,
2893 "the region limit should be at bottom");
2894 if (verbose_low()) {
2895 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2896 "returning NULL", worker_id, p2i(curr_region));
2897 }
2898 // we return NULL and the caller should try calling
2899 // claim_region() again.
2900 return NULL;
2901 }
2902 } else {
2903 assert(_finger > finger, "the finger should have moved forward");
2904 if (verbose_low()) {
2905 gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2906 "global finger = "PTR_FORMAT", "
2907 "our finger = "PTR_FORMAT,
2908 worker_id, p2i(_finger), p2i(finger));
2909 }
2910
2911 // read it again
2912 finger = _finger;
2913 }
2914 }
2915
2916 return NULL;
2917 }
2918
2919 #ifndef PRODUCT
2920 enum VerifyNoCSetOopsPhase {
2921 VerifyNoCSetOopsStack,
2922 VerifyNoCSetOopsQueues,
2923 VerifyNoCSetOopsSATBCompleted,
2924 VerifyNoCSetOopsSATBThread
2925 };
2926
2927 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure {
2928 private:
2929 G1CollectedHeap* _g1h;
2930 VerifyNoCSetOopsPhase _phase;
2931 int _info;
2932
2933 const char* phase_str() {
2934 switch (_phase) {
2935 case VerifyNoCSetOopsStack: return "Stack";
2936 case VerifyNoCSetOopsQueues: return "Queue";
2937 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2938 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers";
2939 default: ShouldNotReachHere();
2940 }
2941 return NULL;
2942 }
2943
2944 void do_object_work(oop obj) {
2945 guarantee(!_g1h->obj_in_cs(obj),
2946 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2947 p2i((void*) obj), phase_str(), _info));
2948 }
2949
2950 public:
2951 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2952
2953 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2954 _phase = phase;
2955 _info = info;
2956 }
2957
2958 virtual void do_oop(oop* p) {
2959 oop obj = oopDesc::load_decode_heap_oop(p);
2960 do_object_work(obj);
2961 }
2962
2963 virtual void do_oop(narrowOop* p) {
2964 // We should not come across narrow oops while scanning marking
2965 // stacks and SATB buffers.
2966 ShouldNotReachHere();
2967 }
2968
2969 virtual void do_object(oop obj) {
2970 do_object_work(obj);
2971 }
2972 };
2973
2974 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2975 bool verify_enqueued_buffers,
2976 bool verify_thread_buffers,
2977 bool verify_fingers) {
2978 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2979 if (!G1CollectedHeap::heap()->mark_in_progress()) {
2980 return;
2981 }
2982
2983 VerifyNoCSetOopsClosure cl;
2984
2985 if (verify_stacks) {
2986 // Verify entries on the global mark stack
2987 cl.set_phase(VerifyNoCSetOopsStack);
2988 _markStack.oops_do(&cl);
2989
2990 // Verify entries on the task queues
2991 for (uint i = 0; i < _max_worker_id; i += 1) {
2992 cl.set_phase(VerifyNoCSetOopsQueues, i);
2993 CMTaskQueue* queue = _task_queues->queue(i);
2994 queue->oops_do(&cl);
2995 }
2996 }
2997
2998 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2999
3000 // Verify entries on the enqueued SATB buffers
3001 if (verify_enqueued_buffers) {
3002 cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3003 satb_qs.iterate_completed_buffers_read_only(&cl);
3004 }
3005
3006 // Verify entries on the per-thread SATB buffers
3007 if (verify_thread_buffers) {
3008 cl.set_phase(VerifyNoCSetOopsSATBThread);
3009 satb_qs.iterate_thread_buffers_read_only(&cl);
3010 }
3011
3012 if (verify_fingers) {
3013 // Verify the global finger
3014 HeapWord* global_finger = finger();
3015 if (global_finger != NULL && global_finger < _heap_end) {
3016 // The global finger always points to a heap region boundary. We
3017 // use heap_region_containing_raw() to get the containing region
3018 // given that the global finger could be pointing to a free region
3019 // which subsequently becomes continues humongous. If that
3020 // happens, heap_region_containing() will return the bottom of the
3021 // corresponding starts humongous region and the check below will
3022 // not hold any more.
3023 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3024 guarantee(global_finger == global_hr->bottom(),
3025 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3026 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3027 }
3028
3029 // Verify the task fingers
3030 assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3031 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3032 CMTask* task = _tasks[i];
3033 HeapWord* task_finger = task->finger();
3034 if (task_finger != NULL && task_finger < _heap_end) {
3035 // See above note on the global finger verification.
3036 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3037 guarantee(task_finger == task_hr->bottom() ||
3038 !task_hr->in_collection_set(),
3039 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3040 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3041 }
3042 }
3043 }
3044 }
3045 #endif // PRODUCT
3046
3047 // Aggregate the counting data that was constructed concurrently
3048 // with marking.
3049 class AggregateCountDataHRClosure: public HeapRegionClosure {
3050 G1CollectedHeap* _g1h;
3051 ConcurrentMark* _cm;
3052 CardTableModRefBS* _ct_bs;
3053 BitMap* _cm_card_bm;
3054 uint _max_worker_id;
3055
3056 public:
3057 AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3058 BitMap* cm_card_bm,
3059 uint max_worker_id) :
3060 _g1h(g1h), _cm(g1h->concurrent_mark()),
3061 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3062 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3063
3064 bool doHeapRegion(HeapRegion* hr) {
3065 if (hr->continuesHumongous()) {
3066 // We will ignore these here and process them when their
3067 // associated "starts humongous" region is processed.
3068 // Note that we cannot rely on their associated
3069 // "starts humongous" region to have their bit set to 1
3070 // since, due to the region chunking in the parallel region
3071 // iteration, a "continues humongous" region might be visited
3072 // before its associated "starts humongous".
3073 return false;
3074 }
3075
3076 HeapWord* start = hr->bottom();
3077 HeapWord* limit = hr->next_top_at_mark_start();
3078 HeapWord* end = hr->end();
3079
3080 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3081 err_msg("Preconditions not met - "
3082 "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3083 "top: "PTR_FORMAT", end: "PTR_FORMAT,
3084 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3085
3086 assert(hr->next_marked_bytes() == 0, "Precondition");
3087
3088 if (start == limit) {
3089 // NTAMS of this region has not been set so nothing to do.
3090 return false;
3091 }
3092
3093 // 'start' should be in the heap.
3094 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3095 // 'end' *may* be just beyond the end of the heap (if hr is the last region)
3096 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3097
3098 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3099 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3100 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3101
3102 // If ntams is not card aligned then we bump card bitmap index
3103 // for limit so that we get the all the cards spanned by
3104 // the object ending at ntams.
3105 // Note: if this is the last region in the heap then ntams
3106 // could be actually just beyond the end of the the heap;
3107 // limit_idx will then correspond to a (non-existent) card
3108 // that is also outside the heap.
3109 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3110 limit_idx += 1;
3111 }
3112
3113 assert(limit_idx <= end_idx, "or else use atomics");
3114
3115 // Aggregate the "stripe" in the count data associated with hr.
3116 uint hrs_index = hr->hrs_index();
3117 size_t marked_bytes = 0;
3118
3119 for (uint i = 0; i < _max_worker_id; i += 1) {
3120 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3121 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3122
3123 // Fetch the marked_bytes in this region for task i and
3124 // add it to the running total for this region.
3125 marked_bytes += marked_bytes_array[hrs_index];
3126
3127 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3128 // into the global card bitmap.
3129 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3130
3131 while (scan_idx < limit_idx) {
3132 assert(task_card_bm->at(scan_idx) == true, "should be");
3133 _cm_card_bm->set_bit(scan_idx);
3134 assert(_cm_card_bm->at(scan_idx) == true, "should be");
3135
3136 // BitMap::get_next_one_offset() can handle the case when
3137 // its left_offset parameter is greater than its right_offset
3138 // parameter. It does, however, have an early exit if
3139 // left_offset == right_offset. So let's limit the value
3140 // passed in for left offset here.
3141 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3142 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3143 }
3144 }
3145
3146 // Update the marked bytes for this region.
3147 hr->add_to_marked_bytes(marked_bytes);
3148
3149 // Next heap region
3150 return false;
3151 }
3152 };
3153
3154 class G1AggregateCountDataTask: public AbstractGangTask {
3155 protected:
3156 G1CollectedHeap* _g1h;
3157 ConcurrentMark* _cm;
3158 BitMap* _cm_card_bm;
3159 uint _max_worker_id;
3160 int _active_workers;
3161
3162 public:
3163 G1AggregateCountDataTask(G1CollectedHeap* g1h,
3164 ConcurrentMark* cm,
3165 BitMap* cm_card_bm,
3166 uint max_worker_id,
3167 int n_workers) :
3168 AbstractGangTask("Count Aggregation"),
3169 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3170 _max_worker_id(max_worker_id),
3171 _active_workers(n_workers) { }
3172
3173 void work(uint worker_id) {
3174 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3175
3176 if (G1CollectedHeap::use_parallel_gc_threads()) {
3177 _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3178 _active_workers,
3179 HeapRegion::AggregateCountClaimValue);
3180 } else {
3181 _g1h->heap_region_iterate(&cl);
3182 }
3183 }
3184 };
3185
3186
3187 void ConcurrentMark::aggregate_count_data() {
3188 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3189 _g1h->workers()->active_workers() :
3190 1);
3191
3192 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3193 _max_worker_id, n_workers);
3194
3195 if (G1CollectedHeap::use_parallel_gc_threads()) {
3196 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3197 "sanity check");
3198 _g1h->set_par_threads(n_workers);
3199 _g1h->workers()->run_task(&g1_par_agg_task);
3200 _g1h->set_par_threads(0);
3201
3202 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3203 "sanity check");
3204 _g1h->reset_heap_region_claim_values();
3205 } else {
3206 g1_par_agg_task.work(0);
3207 }
3208 }
3209
3210 // Clear the per-worker arrays used to store the per-region counting data
3211 void ConcurrentMark::clear_all_count_data() {
3212 // Clear the global card bitmap - it will be filled during
3213 // liveness count aggregation (during remark) and the
3214 // final counting task.
3215 _card_bm.clear();
3216
3217 // Clear the global region bitmap - it will be filled as part
3218 // of the final counting task.
3219 _region_bm.clear();
3220
3221 uint max_regions = _g1h->max_regions();
3222 assert(_max_worker_id > 0, "uninitialized");
3223
3224 for (uint i = 0; i < _max_worker_id; i += 1) {
3225 BitMap* task_card_bm = count_card_bitmap_for(i);
3226 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3227
3228 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3229 assert(marked_bytes_array != NULL, "uninitialized");
3230
3231 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3232 task_card_bm->clear();
3233 }
3234 }
3235
3236 void ConcurrentMark::print_stats() {
3237 if (verbose_stats()) {
3238 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3239 for (size_t i = 0; i < _active_tasks; ++i) {
3240 _tasks[i]->print_stats();
3241 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3242 }
3243 }
3244 }
3245
3246 // abandon current marking iteration due to a Full GC
3247 void ConcurrentMark::abort() {
3248 // Clear all marks to force marking thread to do nothing
3249 _nextMarkBitMap->clearAll();
3250
3251 // Note we cannot clear the previous marking bitmap here
3252 // since VerifyDuringGC verifies the objects marked during
3253 // a full GC against the previous bitmap.
3254
3255 // Clear the liveness counting data
3256 clear_all_count_data();
3257 // Empty mark stack
3258 reset_marking_state();
3259 for (uint i = 0; i < _max_worker_id; ++i) {
3260 _tasks[i]->clear_region_fields();
3261 }
3262 _first_overflow_barrier_sync.abort();
3263 _second_overflow_barrier_sync.abort();
3264 _has_aborted = true;
3265
3266 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3267 satb_mq_set.abandon_partial_marking();
3268 // This can be called either during or outside marking, we'll read
3269 // the expected_active value from the SATB queue set.
3270 satb_mq_set.set_active_all_threads(
3271 false, /* new active value */
3272 satb_mq_set.is_active() /* expected_active */);
3273
3274 _g1h->trace_heap_after_concurrent_cycle();
3275 _g1h->register_concurrent_cycle_end();
3276 }
3277
3278 static void print_ms_time_info(const char* prefix, const char* name,
3279 NumberSeq& ns) {
3280 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3281 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3282 if (ns.num() > 0) {
3283 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3284 prefix, ns.sd(), ns.maximum());
3285 }
3286 }
3287
3288 void ConcurrentMark::print_summary_info() {
3289 gclog_or_tty->print_cr(" Concurrent marking:");
3290 print_ms_time_info(" ", "init marks", _init_times);
3291 print_ms_time_info(" ", "remarks", _remark_times);
3292 {
3293 print_ms_time_info(" ", "final marks", _remark_mark_times);
3294 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3295
3296 }
3297 print_ms_time_info(" ", "cleanups", _cleanup_times);
3298 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3299 _total_counting_time,
3300 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3301 (double)_cleanup_times.num()
3302 : 0.0));
3303 if (G1ScrubRemSets) {
3304 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3305 _total_rs_scrub_time,
3306 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3307 (double)_cleanup_times.num()
3308 : 0.0));
3309 }
3310 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3311 (_init_times.sum() + _remark_times.sum() +
3312 _cleanup_times.sum())/1000.0);
3313 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3314 "(%8.2f s marking).",
3315 cmThread()->vtime_accum(),
3316 cmThread()->vtime_mark_accum());
3317 }
3318
3319 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3320 if (use_parallel_marking_threads()) {
3321 _parallel_workers->print_worker_threads_on(st);
3322 }
3323 }
3324
3325 void ConcurrentMark::print_on_error(outputStream* st) const {
3326 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3327 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3328 _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3329 _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3330 }
3331
3332 // We take a break if someone is trying to stop the world.
3333 bool ConcurrentMark::do_yield_check(uint worker_id) {
3334 if (SuspendibleThreadSet::should_yield()) {
3335 if (worker_id == 0) {
3336 _g1h->g1_policy()->record_concurrent_pause();
3337 }
3338 SuspendibleThreadSet::yield();
3339 return true;
3340 } else {
3341 return false;
3342 }
3343 }
3344
3345 bool ConcurrentMark::containing_card_is_marked(void* p) {
3346 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3347 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3348 }
3349
3350 bool ConcurrentMark::containing_cards_are_marked(void* start,
3351 void* last) {
3352 return containing_card_is_marked(start) &&
3353 containing_card_is_marked(last);
3354 }
3355
3356 #ifndef PRODUCT
3357 // for debugging purposes
3358 void ConcurrentMark::print_finger() {
3359 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3360 p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3361 for (uint i = 0; i < _max_worker_id; ++i) {
3362 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3363 }
3364 gclog_or_tty->cr();
3365 }
3366 #endif
3367
3368 void CMTask::scan_object(oop obj) {
3369 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3370
3371 if (_cm->verbose_high()) {
3372 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3373 _worker_id, p2i((void*) obj));
3374 }
3375
3376 size_t obj_size = obj->size();
3377 _words_scanned += obj_size;
3378
3379 obj->oop_iterate(_cm_oop_closure);
3380 statsOnly( ++_objs_scanned );
3381 check_limits();
3382 }
3383
3384 // Closure for iteration over bitmaps
3385 class CMBitMapClosure : public BitMapClosure {
3386 private:
3387 // the bitmap that is being iterated over
3388 CMBitMap* _nextMarkBitMap;
3389 ConcurrentMark* _cm;
3390 CMTask* _task;
3391
3392 public:
3393 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3394 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3395
3396 bool do_bit(size_t offset) {
3397 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3398 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3399 assert( addr < _cm->finger(), "invariant");
3400
3401 statsOnly( _task->increase_objs_found_on_bitmap() );
3402 assert(addr >= _task->finger(), "invariant");
3403
3404 // We move that task's local finger along.
3405 _task->move_finger_to(addr);
3406
3407 _task->scan_object(oop(addr));
3408 // we only partially drain the local queue and global stack
3409 _task->drain_local_queue(true);
3410 _task->drain_global_stack(true);
3411
3412 // if the has_aborted flag has been raised, we need to bail out of
3413 // the iteration
3414 return !_task->has_aborted();
3415 }
3416 };
3417
3418 // Closure for iterating over objects, currently only used for
3419 // processing SATB buffers.
3420 class CMObjectClosure : public ObjectClosure {
3421 private:
3422 CMTask* _task;
3423
3424 public:
3425 void do_object(oop obj) {
3426 _task->deal_with_reference(obj);
3427 }
3428
3429 CMObjectClosure(CMTask* task) : _task(task) { }
3430 };
3431
3432 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3433 ConcurrentMark* cm,
3434 CMTask* task)
3435 : _g1h(g1h), _cm(cm), _task(task) {
3436 assert(_ref_processor == NULL, "should be initialized to NULL");
3437
3438 if (G1UseConcMarkReferenceProcessing) {
3439 _ref_processor = g1h->ref_processor_cm();
3440 assert(_ref_processor != NULL, "should not be NULL");
3441 }
3442 }
3443
3444 void CMTask::setup_for_region(HeapRegion* hr) {
3445 assert(hr != NULL,
3446 "claim_region() should have filtered out NULL regions");
3447 assert(!hr->continuesHumongous(),
3448 "claim_region() should have filtered out continues humongous regions");
3449
3450 if (_cm->verbose_low()) {
3451 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3452 _worker_id, p2i(hr));
3453 }
3454
3455 _curr_region = hr;
3456 _finger = hr->bottom();
3457 update_region_limit();
3458 }
3459
3460 void CMTask::update_region_limit() {
3461 HeapRegion* hr = _curr_region;
3462 HeapWord* bottom = hr->bottom();
3463 HeapWord* limit = hr->next_top_at_mark_start();
3464
3465 if (limit == bottom) {
3466 if (_cm->verbose_low()) {
3467 gclog_or_tty->print_cr("[%u] found an empty region "
3468 "["PTR_FORMAT", "PTR_FORMAT")",
3469 _worker_id, p2i(bottom), p2i(limit));
3470 }
3471 // The region was collected underneath our feet.
3472 // We set the finger to bottom to ensure that the bitmap
3473 // iteration that will follow this will not do anything.
3474 // (this is not a condition that holds when we set the region up,
3475 // as the region is not supposed to be empty in the first place)
3476 _finger = bottom;
3477 } else if (limit >= _region_limit) {
3478 assert(limit >= _finger, "peace of mind");
3479 } else {
3480 assert(limit < _region_limit, "only way to get here");
3481 // This can happen under some pretty unusual circumstances. An
3482 // evacuation pause empties the region underneath our feet (NTAMS
3483 // at bottom). We then do some allocation in the region (NTAMS
3484 // stays at bottom), followed by the region being used as a GC
3485 // alloc region (NTAMS will move to top() and the objects
3486 // originally below it will be grayed). All objects now marked in
3487 // the region are explicitly grayed, if below the global finger,
3488 // and we do not need in fact to scan anything else. So, we simply
3489 // set _finger to be limit to ensure that the bitmap iteration
3490 // doesn't do anything.
3491 _finger = limit;
3492 }
3493
3494 _region_limit = limit;
3495 }
3496
3497 void CMTask::giveup_current_region() {
3498 assert(_curr_region != NULL, "invariant");
3499 if (_cm->verbose_low()) {
3500 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3501 _worker_id, p2i(_curr_region));
3502 }
3503 clear_region_fields();
3504 }
3505
3506 void CMTask::clear_region_fields() {
3507 // Values for these three fields that indicate that we're not
3508 // holding on to a region.
3509 _curr_region = NULL;
3510 _finger = NULL;
3511 _region_limit = NULL;
3512 }
3513
3514 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3515 if (cm_oop_closure == NULL) {
3516 assert(_cm_oop_closure != NULL, "invariant");
3517 } else {
3518 assert(_cm_oop_closure == NULL, "invariant");
3519 }
3520 _cm_oop_closure = cm_oop_closure;
3521 }
3522
3523 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3524 guarantee(nextMarkBitMap != NULL, "invariant");
3525
3526 if (_cm->verbose_low()) {
3527 gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3528 }
3529
3530 _nextMarkBitMap = nextMarkBitMap;
3531 clear_region_fields();
3532
3533 _calls = 0;
3534 _elapsed_time_ms = 0.0;
3535 _termination_time_ms = 0.0;
3536 _termination_start_time_ms = 0.0;
3537
3538 #if _MARKING_STATS_
3539 _local_pushes = 0;
3540 _local_pops = 0;
3541 _local_max_size = 0;
3542 _objs_scanned = 0;
3543 _global_pushes = 0;
3544 _global_pops = 0;
3545 _global_max_size = 0;
3546 _global_transfers_to = 0;
3547 _global_transfers_from = 0;
3548 _regions_claimed = 0;
3549 _objs_found_on_bitmap = 0;
3550 _satb_buffers_processed = 0;
3551 _steal_attempts = 0;
3552 _steals = 0;
3553 _aborted = 0;
3554 _aborted_overflow = 0;
3555 _aborted_cm_aborted = 0;
3556 _aborted_yield = 0;
3557 _aborted_timed_out = 0;
3558 _aborted_satb = 0;
3559 _aborted_termination = 0;
3560 #endif // _MARKING_STATS_
3561 }
3562
3563 bool CMTask::should_exit_termination() {
3564 regular_clock_call();
3565 // This is called when we are in the termination protocol. We should
3566 // quit if, for some reason, this task wants to abort or the global
3567 // stack is not empty (this means that we can get work from it).
3568 return !_cm->mark_stack_empty() || has_aborted();
3569 }
3570
3571 void CMTask::reached_limit() {
3572 assert(_words_scanned >= _words_scanned_limit ||
3573 _refs_reached >= _refs_reached_limit ,
3574 "shouldn't have been called otherwise");
3575 regular_clock_call();
3576 }
3577
3578 void CMTask::regular_clock_call() {
3579 if (has_aborted()) return;
3580
3581 // First, we need to recalculate the words scanned and refs reached
3582 // limits for the next clock call.
3583 recalculate_limits();
3584
3585 // During the regular clock call we do the following
3586
3587 // (1) If an overflow has been flagged, then we abort.
3588 if (_cm->has_overflown()) {
3589 set_has_aborted();
3590 return;
3591 }
3592
3593 // If we are not concurrent (i.e. we're doing remark) we don't need
3594 // to check anything else. The other steps are only needed during
3595 // the concurrent marking phase.
3596 if (!concurrent()) return;
3597
3598 // (2) If marking has been aborted for Full GC, then we also abort.
3599 if (_cm->has_aborted()) {
3600 set_has_aborted();
3601 statsOnly( ++_aborted_cm_aborted );
3602 return;
3603 }
3604
3605 double curr_time_ms = os::elapsedVTime() * 1000.0;
3606
3607 // (3) If marking stats are enabled, then we update the step history.
3608 #if _MARKING_STATS_
3609 if (_words_scanned >= _words_scanned_limit) {
3610 ++_clock_due_to_scanning;
3611 }
3612 if (_refs_reached >= _refs_reached_limit) {
3613 ++_clock_due_to_marking;
3614 }
3615
3616 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3617 _interval_start_time_ms = curr_time_ms;
3618 _all_clock_intervals_ms.add(last_interval_ms);
3619
3620 if (_cm->verbose_medium()) {
3621 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3622 "scanned = %d%s, refs reached = %d%s",
3623 _worker_id, last_interval_ms,
3624 _words_scanned,
3625 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3626 _refs_reached,
3627 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3628 }
3629 #endif // _MARKING_STATS_
3630
3631 // (4) We check whether we should yield. If we have to, then we abort.
3632 if (SuspendibleThreadSet::should_yield()) {
3633 // We should yield. To do this we abort the task. The caller is
3634 // responsible for yielding.
3635 set_has_aborted();
3636 statsOnly( ++_aborted_yield );
3637 return;
3638 }
3639
3640 // (5) We check whether we've reached our time quota. If we have,
3641 // then we abort.
3642 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3643 if (elapsed_time_ms > _time_target_ms) {
3644 set_has_aborted();
3645 _has_timed_out = true;
3646 statsOnly( ++_aborted_timed_out );
3647 return;
3648 }
3649
3650 // (6) Finally, we check whether there are enough completed STAB
3651 // buffers available for processing. If there are, we abort.
3652 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3653 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3654 if (_cm->verbose_low()) {
3655 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3656 _worker_id);
3657 }
3658 // we do need to process SATB buffers, we'll abort and restart
3659 // the marking task to do so
3660 set_has_aborted();
3661 statsOnly( ++_aborted_satb );
3662 return;
3663 }
3664 }
3665
3666 void CMTask::recalculate_limits() {
3667 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3668 _words_scanned_limit = _real_words_scanned_limit;
3669
3670 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3671 _refs_reached_limit = _real_refs_reached_limit;
3672 }
3673
3674 void CMTask::decrease_limits() {
3675 // This is called when we believe that we're going to do an infrequent
3676 // operation which will increase the per byte scanned cost (i.e. move
3677 // entries to/from the global stack). It basically tries to decrease the
3678 // scanning limit so that the clock is called earlier.
3679
3680 if (_cm->verbose_medium()) {
3681 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3682 }
3683
3684 _words_scanned_limit = _real_words_scanned_limit -
3685 3 * words_scanned_period / 4;
3686 _refs_reached_limit = _real_refs_reached_limit -
3687 3 * refs_reached_period / 4;
3688 }
3689
3690 void CMTask::move_entries_to_global_stack() {
3691 // local array where we'll store the entries that will be popped
3692 // from the local queue
3693 oop buffer[global_stack_transfer_size];
3694
3695 int n = 0;
3696 oop obj;
3697 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3698 buffer[n] = obj;
3699 ++n;
3700 }
3701
3702 if (n > 0) {
3703 // we popped at least one entry from the local queue
3704
3705 statsOnly( ++_global_transfers_to; _local_pops += n );
3706
3707 if (!_cm->mark_stack_push(buffer, n)) {
3708 if (_cm->verbose_low()) {
3709 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3710 _worker_id);
3711 }
3712 set_has_aborted();
3713 } else {
3714 // the transfer was successful
3715
3716 if (_cm->verbose_medium()) {
3717 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3718 _worker_id, n);
3719 }
3720 statsOnly( int tmp_size = _cm->mark_stack_size();
3721 if (tmp_size > _global_max_size) {
3722 _global_max_size = tmp_size;
3723 }
3724 _global_pushes += n );
3725 }
3726 }
3727
3728 // this operation was quite expensive, so decrease the limits
3729 decrease_limits();
3730 }
3731
3732 void CMTask::get_entries_from_global_stack() {
3733 // local array where we'll store the entries that will be popped
3734 // from the global stack.
3735 oop buffer[global_stack_transfer_size];
3736 int n;
3737 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3738 assert(n <= global_stack_transfer_size,
3739 "we should not pop more than the given limit");
3740 if (n > 0) {
3741 // yes, we did actually pop at least one entry
3742
3743 statsOnly( ++_global_transfers_from; _global_pops += n );
3744 if (_cm->verbose_medium()) {
3745 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3746 _worker_id, n);
3747 }
3748 for (int i = 0; i < n; ++i) {
3749 bool success = _task_queue->push(buffer[i]);
3750 // We only call this when the local queue is empty or under a
3751 // given target limit. So, we do not expect this push to fail.
3752 assert(success, "invariant");
3753 }
3754
3755 statsOnly( int tmp_size = _task_queue->size();
3756 if (tmp_size > _local_max_size) {
3757 _local_max_size = tmp_size;
3758 }
3759 _local_pushes += n );
3760 }
3761
3762 // this operation was quite expensive, so decrease the limits
3763 decrease_limits();
3764 }
3765
3766 void CMTask::drain_local_queue(bool partially) {
3767 if (has_aborted()) return;
3768
3769 // Decide what the target size is, depending whether we're going to
3770 // drain it partially (so that other tasks can steal if they run out
3771 // of things to do) or totally (at the very end).
3772 size_t target_size;
3773 if (partially) {
3774 target_size = MIN2((_task_queue->max_elems()/3), GCDrainStackTargetSize);
3775 } else {
3776 target_size = 0;
3777 }
3778
3779 if (_task_queue->size() > target_size) {
3780 if (_cm->verbose_high()) {
3781 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3782 _worker_id, target_size);
3783 }
3784
3785 oop obj;
3786 bool ret = _task_queue->pop_local(obj);
3787 while (ret) {
3788 statsOnly( ++_local_pops );
3789
3790 if (_cm->verbose_high()) {
3791 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3792 p2i((void*) obj));
3793 }
3794
3795 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3796 assert(!_g1h->is_on_master_free_list(
3797 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3798
3799 scan_object(obj);
3800
3801 if (_task_queue->size() <= target_size || has_aborted()) {
3802 ret = false;
3803 } else {
3804 ret = _task_queue->pop_local(obj);
3805 }
3806 }
3807
3808 if (_cm->verbose_high()) {
3809 gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3810 _worker_id, _task_queue->size());
3811 }
3812 }
3813 }
3814
3815 void CMTask::drain_global_stack(bool partially) {
3816 if (has_aborted()) return;
3817
3818 // We have a policy to drain the local queue before we attempt to
3819 // drain the global stack.
3820 assert(partially || _task_queue->size() == 0, "invariant");
3821
3822 // Decide what the target size is, depending whether we're going to
3823 // drain it partially (so that other tasks can steal if they run out
3824 // of things to do) or totally (at the very end). Notice that,
3825 // because we move entries from the global stack in chunks or
3826 // because another task might be doing the same, we might in fact
3827 // drop below the target. But, this is not a problem.
3828 size_t target_size;
3829 if (partially) {
3830 target_size = _cm->partial_mark_stack_size_target();
3831 } else {
3832 target_size = 0;
3833 }
3834
3835 if (_cm->mark_stack_size() > target_size) {
3836 if (_cm->verbose_low()) {
3837 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3838 _worker_id, target_size);
3839 }
3840
3841 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3842 get_entries_from_global_stack();
3843 drain_local_queue(partially);
3844 }
3845
3846 if (_cm->verbose_low()) {
3847 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3848 _worker_id, _cm->mark_stack_size());
3849 }
3850 }
3851 }
3852
3853 // SATB Queue has several assumptions on whether to call the par or
3854 // non-par versions of the methods. this is why some of the code is
3855 // replicated. We should really get rid of the single-threaded version
3856 // of the code to simplify things.
3857 void CMTask::drain_satb_buffers() {
3858 if (has_aborted()) return;
3859
3860 // We set this so that the regular clock knows that we're in the
3861 // middle of draining buffers and doesn't set the abort flag when it
3862 // notices that SATB buffers are available for draining. It'd be
3863 // very counter productive if it did that. :-)
3864 _draining_satb_buffers = true;
3865
3866 CMObjectClosure oc(this);
3867 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3868 if (G1CollectedHeap::use_parallel_gc_threads()) {
3869 satb_mq_set.set_par_closure(_worker_id, &oc);
3870 } else {
3871 satb_mq_set.set_closure(&oc);
3872 }
3873
3874 // This keeps claiming and applying the closure to completed buffers
3875 // until we run out of buffers or we need to abort.
3876 if (G1CollectedHeap::use_parallel_gc_threads()) {
3877 while (!has_aborted() &&
3878 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3879 if (_cm->verbose_medium()) {
3880 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3881 }
3882 statsOnly( ++_satb_buffers_processed );
3883 regular_clock_call();
3884 }
3885 } else {
3886 while (!has_aborted() &&
3887 satb_mq_set.apply_closure_to_completed_buffer()) {
3888 if (_cm->verbose_medium()) {
3889 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3890 }
3891 statsOnly( ++_satb_buffers_processed );
3892 regular_clock_call();
3893 }
3894 }
3895
3896 if (!concurrent() && !has_aborted()) {
3897 // We should only do this during remark.
3898 if (G1CollectedHeap::use_parallel_gc_threads()) {
3899 satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3900 } else {
3901 satb_mq_set.iterate_closure_all_threads();
3902 }
3903 }
3904
3905 _draining_satb_buffers = false;
3906
3907 assert(has_aborted() ||
3908 concurrent() ||
3909 satb_mq_set.completed_buffers_num() == 0, "invariant");
3910
3911 if (G1CollectedHeap::use_parallel_gc_threads()) {
3912 satb_mq_set.set_par_closure(_worker_id, NULL);
3913 } else {
3914 satb_mq_set.set_closure(NULL);
3915 }
3916
3917 // again, this was a potentially expensive operation, decrease the
3918 // limits to get the regular clock call early
3919 decrease_limits();
3920 }
3921
3922 void CMTask::print_stats() {
3923 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3924 _worker_id, _calls);
3925 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3926 _elapsed_time_ms, _termination_time_ms);
3927 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3928 _step_times_ms.num(), _step_times_ms.avg(),
3929 _step_times_ms.sd());
3930 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3931 _step_times_ms.maximum(), _step_times_ms.sum());
3932
3933 #if _MARKING_STATS_
3934 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3935 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3936 _all_clock_intervals_ms.sd());
3937 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3938 _all_clock_intervals_ms.maximum(),
3939 _all_clock_intervals_ms.sum());
3940 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3941 _clock_due_to_scanning, _clock_due_to_marking);
3942 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3943 _objs_scanned, _objs_found_on_bitmap);
3944 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3945 _local_pushes, _local_pops, _local_max_size);
3946 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3947 _global_pushes, _global_pops, _global_max_size);
3948 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3949 _global_transfers_to,_global_transfers_from);
3950 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed);
3951 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3952 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3953 _steal_attempts, _steals);
3954 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3955 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3956 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3957 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3958 _aborted_timed_out, _aborted_satb, _aborted_termination);
3959 #endif // _MARKING_STATS_
3960 }
3961
3962 /*****************************************************************************
3963
3964 The do_marking_step(time_target_ms, ...) method is the building
3965 block of the parallel marking framework. It can be called in parallel
3966 with other invocations of do_marking_step() on different tasks
3967 (but only one per task, obviously) and concurrently with the
3968 mutator threads, or during remark, hence it eliminates the need
3969 for two versions of the code. When called during remark, it will
3970 pick up from where the task left off during the concurrent marking
3971 phase. Interestingly, tasks are also claimable during evacuation
3972 pauses too, since do_marking_step() ensures that it aborts before
3973 it needs to yield.
3974
3975 The data structures that it uses to do marking work are the
3976 following:
3977
3978 (1) Marking Bitmap. If there are gray objects that appear only
3979 on the bitmap (this happens either when dealing with an overflow
3980 or when the initial marking phase has simply marked the roots
3981 and didn't push them on the stack), then tasks claim heap
3982 regions whose bitmap they then scan to find gray objects. A
3983 global finger indicates where the end of the last claimed region
3984 is. A local finger indicates how far into the region a task has
3985 scanned. The two fingers are used to determine how to gray an
3986 object (i.e. whether simply marking it is OK, as it will be
3987 visited by a task in the future, or whether it needs to be also
3988 pushed on a stack).
3989
3990 (2) Local Queue. The local queue of the task which is accessed
3991 reasonably efficiently by the task. Other tasks can steal from
3992 it when they run out of work. Throughout the marking phase, a
3993 task attempts to keep its local queue short but not totally
3994 empty, so that entries are available for stealing by other
3995 tasks. Only when there is no more work, a task will totally
3996 drain its local queue.
3997
3998 (3) Global Mark Stack. This handles local queue overflow. During
3999 marking only sets of entries are moved between it and the local
4000 queues, as access to it requires a mutex and more fine-grain
4001 interaction with it which might cause contention. If it
4002 overflows, then the marking phase should restart and iterate
4003 over the bitmap to identify gray objects. Throughout the marking
4004 phase, tasks attempt to keep the global mark stack at a small
4005 length but not totally empty, so that entries are available for
4006 popping by other tasks. Only when there is no more work, tasks
4007 will totally drain the global mark stack.
4008
4009 (4) SATB Buffer Queue. This is where completed SATB buffers are
4010 made available. Buffers are regularly removed from this queue
4011 and scanned for roots, so that the queue doesn't get too
4012 long. During remark, all completed buffers are processed, as
4013 well as the filled in parts of any uncompleted buffers.
4014
4015 The do_marking_step() method tries to abort when the time target
4016 has been reached. There are a few other cases when the
4017 do_marking_step() method also aborts:
4018
4019 (1) When the marking phase has been aborted (after a Full GC).
4020
4021 (2) When a global overflow (on the global stack) has been
4022 triggered. Before the task aborts, it will actually sync up with
4023 the other tasks to ensure that all the marking data structures
4024 (local queues, stacks, fingers etc.) are re-initialized so that
4025 when do_marking_step() completes, the marking phase can
4026 immediately restart.
4027
4028 (3) When enough completed SATB buffers are available. The
4029 do_marking_step() method only tries to drain SATB buffers right
4030 at the beginning. So, if enough buffers are available, the
4031 marking step aborts and the SATB buffers are processed at
4032 the beginning of the next invocation.
4033
4034 (4) To yield. when we have to yield then we abort and yield
4035 right at the end of do_marking_step(). This saves us from a lot
4036 of hassle as, by yielding we might allow a Full GC. If this
4037 happens then objects will be compacted underneath our feet, the
4038 heap might shrink, etc. We save checking for this by just
4039 aborting and doing the yield right at the end.
4040
4041 From the above it follows that the do_marking_step() method should
4042 be called in a loop (or, otherwise, regularly) until it completes.
4043
4044 If a marking step completes without its has_aborted() flag being
4045 true, it means it has completed the current marking phase (and
4046 also all other marking tasks have done so and have all synced up).
4047
4048 A method called regular_clock_call() is invoked "regularly" (in
4049 sub ms intervals) throughout marking. It is this clock method that
4050 checks all the abort conditions which were mentioned above and
4051 decides when the task should abort. A work-based scheme is used to
4052 trigger this clock method: when the number of object words the
4053 marking phase has scanned or the number of references the marking
4054 phase has visited reach a given limit. Additional invocations to
4055 the method clock have been planted in a few other strategic places
4056 too. The initial reason for the clock method was to avoid calling
4057 vtime too regularly, as it is quite expensive. So, once it was in
4058 place, it was natural to piggy-back all the other conditions on it
4059 too and not constantly check them throughout the code.
4060
4061 If do_termination is true then do_marking_step will enter its
4062 termination protocol.
4063
4064 The value of is_serial must be true when do_marking_step is being
4065 called serially (i.e. by the VMThread) and do_marking_step should
4066 skip any synchronization in the termination and overflow code.
4067 Examples include the serial remark code and the serial reference
4068 processing closures.
4069
4070 The value of is_serial must be false when do_marking_step is
4071 being called by any of the worker threads in a work gang.
4072 Examples include the concurrent marking code (CMMarkingTask),
4073 the MT remark code, and the MT reference processing closures.
4074
4075 *****************************************************************************/
4076
4077 void CMTask::do_marking_step(double time_target_ms,
4078 bool do_termination,
4079 bool is_serial) {
4080 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4081 assert(concurrent() == _cm->concurrent(), "they should be the same");
4082
4083 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4084 assert(_task_queues != NULL, "invariant");
4085 assert(_task_queue != NULL, "invariant");
4086 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4087
4088 assert(!_claimed,
4089 "only one thread should claim this task at any one time");
4090
4091 // OK, this doesn't safeguard again all possible scenarios, as it is
4092 // possible for two threads to set the _claimed flag at the same
4093 // time. But it is only for debugging purposes anyway and it will
4094 // catch most problems.
4095 _claimed = true;
4096
4097 _start_time_ms = os::elapsedVTime() * 1000.0;
4098 statsOnly( _interval_start_time_ms = _start_time_ms );
4099
4100 // If do_stealing is true then do_marking_step will attempt to
4101 // steal work from the other CMTasks. It only makes sense to
4102 // enable stealing when the termination protocol is enabled
4103 // and do_marking_step() is not being called serially.
4104 bool do_stealing = do_termination && !is_serial;
4105
4106 double diff_prediction_ms =
4107 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4108 _time_target_ms = time_target_ms - diff_prediction_ms;
4109
4110 // set up the variables that are used in the work-based scheme to
4111 // call the regular clock method
4112 _words_scanned = 0;
4113 _refs_reached = 0;
4114 recalculate_limits();
4115
4116 // clear all flags
4117 clear_has_aborted();
4118 _has_timed_out = false;
4119 _draining_satb_buffers = false;
4120
4121 ++_calls;
4122
4123 if (_cm->verbose_low()) {
4124 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4125 "target = %1.2lfms >>>>>>>>>>",
4126 _worker_id, _calls, _time_target_ms);
4127 }
4128
4129 // Set up the bitmap and oop closures. Anything that uses them is
4130 // eventually called from this method, so it is OK to allocate these
4131 // statically.
4132 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4133 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4134 set_cm_oop_closure(&cm_oop_closure);
4135
4136 if (_cm->has_overflown()) {
4137 // This can happen if the mark stack overflows during a GC pause
4138 // and this task, after a yield point, restarts. We have to abort
4139 // as we need to get into the overflow protocol which happens
4140 // right at the end of this task.
4141 set_has_aborted();
4142 }
4143
4144 // First drain any available SATB buffers. After this, we will not
4145 // look at SATB buffers before the next invocation of this method.
4146 // If enough completed SATB buffers are queued up, the regular clock
4147 // will abort this task so that it restarts.
4148 drain_satb_buffers();
4149 // ...then partially drain the local queue and the global stack
4150 drain_local_queue(true);
4151 drain_global_stack(true);
4152
4153 do {
4154 if (!has_aborted() && _curr_region != NULL) {
4155 // This means that we're already holding on to a region.
4156 assert(_finger != NULL, "if region is not NULL, then the finger "
4157 "should not be NULL either");
4158
4159 // We might have restarted this task after an evacuation pause
4160 // which might have evacuated the region we're holding on to
4161 // underneath our feet. Let's read its limit again to make sure
4162 // that we do not iterate over a region of the heap that
4163 // contains garbage (update_region_limit() will also move
4164 // _finger to the start of the region if it is found empty).
4165 update_region_limit();
4166 // We will start from _finger not from the start of the region,
4167 // as we might be restarting this task after aborting half-way
4168 // through scanning this region. In this case, _finger points to
4169 // the address where we last found a marked object. If this is a
4170 // fresh region, _finger points to start().
4171 MemRegion mr = MemRegion(_finger, _region_limit);
4172
4173 if (_cm->verbose_low()) {
4174 gclog_or_tty->print_cr("[%u] we're scanning part "
4175 "["PTR_FORMAT", "PTR_FORMAT") "
4176 "of region "HR_FORMAT,
4177 _worker_id, p2i(_finger), p2i(_region_limit),
4178 HR_FORMAT_PARAMS(_curr_region));
4179 }
4180
4181 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4182 "humongous regions should go around loop once only");
4183
4184 // Some special cases:
4185 // If the memory region is empty, we can just give up the region.
4186 // If the current region is humongous then we only need to check
4187 // the bitmap for the bit associated with the start of the object,
4188 // scan the object if it's live, and give up the region.
4189 // Otherwise, let's iterate over the bitmap of the part of the region
4190 // that is left.
4191 // If the iteration is successful, give up the region.
4192 if (mr.is_empty()) {
4193 giveup_current_region();
4194 regular_clock_call();
4195 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4196 if (_nextMarkBitMap->isMarked(mr.start())) {
4197 // The object is marked - apply the closure
4198 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4199 bitmap_closure.do_bit(offset);
4200 }
4201 // Even if this task aborted while scanning the humongous object
4202 // we can (and should) give up the current region.
4203 giveup_current_region();
4204 regular_clock_call();
4205 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4206 giveup_current_region();
4207 regular_clock_call();
4208 } else {
4209 assert(has_aborted(), "currently the only way to do so");
4210 // The only way to abort the bitmap iteration is to return
4211 // false from the do_bit() method. However, inside the
4212 // do_bit() method we move the _finger to point to the
4213 // object currently being looked at. So, if we bail out, we
4214 // have definitely set _finger to something non-null.
4215 assert(_finger != NULL, "invariant");
4216
4217 // Region iteration was actually aborted. So now _finger
4218 // points to the address of the object we last scanned. If we
4219 // leave it there, when we restart this task, we will rescan
4220 // the object. It is easy to avoid this. We move the finger by
4221 // enough to point to the next possible object header (the
4222 // bitmap knows by how much we need to move it as it knows its
4223 // granularity).
4224 assert(_finger < _region_limit, "invariant");
4225 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4226 // Check if bitmap iteration was aborted while scanning the last object
4227 if (new_finger >= _region_limit) {
4228 giveup_current_region();
4229 } else {
4230 move_finger_to(new_finger);
4231 }
4232 }
4233 }
4234 // At this point we have either completed iterating over the
4235 // region we were holding on to, or we have aborted.
4236
4237 // We then partially drain the local queue and the global stack.
4238 // (Do we really need this?)
4239 drain_local_queue(true);
4240 drain_global_stack(true);
4241
4242 // Read the note on the claim_region() method on why it might
4243 // return NULL with potentially more regions available for
4244 // claiming and why we have to check out_of_regions() to determine
4245 // whether we're done or not.
4246 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4247 // We are going to try to claim a new region. We should have
4248 // given up on the previous one.
4249 // Separated the asserts so that we know which one fires.
4250 assert(_curr_region == NULL, "invariant");
4251 assert(_finger == NULL, "invariant");
4252 assert(_region_limit == NULL, "invariant");
4253 if (_cm->verbose_low()) {
4254 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4255 }
4256 HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4257 if (claimed_region != NULL) {
4258 // Yes, we managed to claim one
4259 statsOnly( ++_regions_claimed );
4260
4261 if (_cm->verbose_low()) {
4262 gclog_or_tty->print_cr("[%u] we successfully claimed "
4263 "region "PTR_FORMAT,
4264 _worker_id, p2i(claimed_region));
4265 }
4266
4267 setup_for_region(claimed_region);
4268 assert(_curr_region == claimed_region, "invariant");
4269 }
4270 // It is important to call the regular clock here. It might take
4271 // a while to claim a region if, for example, we hit a large
4272 // block of empty regions. So we need to call the regular clock
4273 // method once round the loop to make sure it's called
4274 // frequently enough.
4275 regular_clock_call();
4276 }
4277
4278 if (!has_aborted() && _curr_region == NULL) {
4279 assert(_cm->out_of_regions(),
4280 "at this point we should be out of regions");
4281 }
4282 } while ( _curr_region != NULL && !has_aborted());
4283
4284 if (!has_aborted()) {
4285 // We cannot check whether the global stack is empty, since other
4286 // tasks might be pushing objects to it concurrently.
4287 assert(_cm->out_of_regions(),
4288 "at this point we should be out of regions");
4289
4290 if (_cm->verbose_low()) {
4291 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4292 }
4293
4294 // Try to reduce the number of available SATB buffers so that
4295 // remark has less work to do.
4296 drain_satb_buffers();
4297 }
4298
4299 // Since we've done everything else, we can now totally drain the
4300 // local queue and global stack.
4301 drain_local_queue(false);
4302 drain_global_stack(false);
4303
4304 // Attempt at work stealing from other task's queues.
4305 if (do_stealing && !has_aborted()) {
4306 // We have not aborted. This means that we have finished all that
4307 // we could. Let's try to do some stealing...
4308
4309 // We cannot check whether the global stack is empty, since other
4310 // tasks might be pushing objects to it concurrently.
4311 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4312 "only way to reach here");
4313
4314 if (_cm->verbose_low()) {
4315 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4316 }
4317
4318 while (!has_aborted()) {
4319 oop obj;
4320 statsOnly( ++_steal_attempts );
4321
4322 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4323 if (_cm->verbose_medium()) {
4324 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4325 _worker_id, p2i((void*) obj));
4326 }
4327
4328 statsOnly( ++_steals );
4329
4330 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4331 "any stolen object should be marked");
4332 scan_object(obj);
4333
4334 // And since we're towards the end, let's totally drain the
4335 // local queue and global stack.
4336 drain_local_queue(false);
4337 drain_global_stack(false);
4338 } else {
4339 break;
4340 }
4341 }
4342 }
4343
4344 // If we are about to wrap up and go into termination, check if we
4345 // should raise the overflow flag.
4346 if (do_termination && !has_aborted()) {
4347 if (_cm->force_overflow()->should_force()) {
4348 _cm->set_has_overflown();
4349 regular_clock_call();
4350 }
4351 }
4352
4353 // We still haven't aborted. Now, let's try to get into the
4354 // termination protocol.
4355 if (do_termination && !has_aborted()) {
4356 // We cannot check whether the global stack is empty, since other
4357 // tasks might be concurrently pushing objects on it.
4358 // Separated the asserts so that we know which one fires.
4359 assert(_cm->out_of_regions(), "only way to reach here");
4360 assert(_task_queue->size() == 0, "only way to reach here");
4361
4362 if (_cm->verbose_low()) {
4363 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4364 }
4365
4366 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4367
4368 // The CMTask class also extends the TerminatorTerminator class,
4369 // hence its should_exit_termination() method will also decide
4370 // whether to exit the termination protocol or not.
4371 bool finished = (is_serial ||
4372 _cm->terminator()->offer_termination(this));
4373 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4374 _termination_time_ms +=
4375 termination_end_time_ms - _termination_start_time_ms;
4376
4377 if (finished) {
4378 // We're all done.
4379
4380 if (_worker_id == 0) {
4381 // let's allow task 0 to do this
4382 if (concurrent()) {
4383 assert(_cm->concurrent_marking_in_progress(), "invariant");
4384 // we need to set this to false before the next
4385 // safepoint. This way we ensure that the marking phase
4386 // doesn't observe any more heap expansions.
4387 _cm->clear_concurrent_marking_in_progress();
4388 }
4389 }
4390
4391 // We can now guarantee that the global stack is empty, since
4392 // all other tasks have finished. We separated the guarantees so
4393 // that, if a condition is false, we can immediately find out
4394 // which one.
4395 guarantee(_cm->out_of_regions(), "only way to reach here");
4396 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4397 guarantee(_task_queue->size() == 0, "only way to reach here");
4398 guarantee(!_cm->has_overflown(), "only way to reach here");
4399 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4400
4401 if (_cm->verbose_low()) {
4402 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4403 }
4404 } else {
4405 // Apparently there's more work to do. Let's abort this task. It
4406 // will restart it and we can hopefully find more things to do.
4407
4408 if (_cm->verbose_low()) {
4409 gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4410 _worker_id);
4411 }
4412
4413 set_has_aborted();
4414 statsOnly( ++_aborted_termination );
4415 }
4416 }
4417
4418 // Mainly for debugging purposes to make sure that a pointer to the
4419 // closure which was statically allocated in this frame doesn't
4420 // escape it by accident.
4421 set_cm_oop_closure(NULL);
4422 double end_time_ms = os::elapsedVTime() * 1000.0;
4423 double elapsed_time_ms = end_time_ms - _start_time_ms;
4424 // Update the step history.
4425 _step_times_ms.add(elapsed_time_ms);
4426
4427 if (has_aborted()) {
4428 // The task was aborted for some reason.
4429
4430 statsOnly( ++_aborted );
4431
4432 if (_has_timed_out) {
4433 double diff_ms = elapsed_time_ms - _time_target_ms;
4434 // Keep statistics of how well we did with respect to hitting
4435 // our target only if we actually timed out (if we aborted for
4436 // other reasons, then the results might get skewed).
4437 _marking_step_diffs_ms.add(diff_ms);
4438 }
4439
4440 if (_cm->has_overflown()) {
4441 // This is the interesting one. We aborted because a global
4442 // overflow was raised. This means we have to restart the
4443 // marking phase and start iterating over regions. However, in
4444 // order to do this we have to make sure that all tasks stop
4445 // what they are doing and re-initialize in a safe manner. We
4446 // will achieve this with the use of two barrier sync points.
4447
4448 if (_cm->verbose_low()) {
4449 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4450 }
4451
4452 if (!is_serial) {
4453 // We only need to enter the sync barrier if being called
4454 // from a parallel context
4455 _cm->enter_first_sync_barrier(_worker_id);
4456
4457 // When we exit this sync barrier we know that all tasks have
4458 // stopped doing marking work. So, it's now safe to
4459 // re-initialize our data structures. At the end of this method,
4460 // task 0 will clear the global data structures.
4461 }
4462
4463 statsOnly( ++_aborted_overflow );
4464
4465 // We clear the local state of this task...
4466 clear_region_fields();
4467
4468 if (!is_serial) {
4469 // ...and enter the second barrier.
4470 _cm->enter_second_sync_barrier(_worker_id);
4471 }
4472 // At this point, if we're during the concurrent phase of
4473 // marking, everything has been re-initialized and we're
4474 // ready to restart.
4475 }
4476
4477 if (_cm->verbose_low()) {
4478 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4479 "elapsed = %1.2lfms <<<<<<<<<<",
4480 _worker_id, _time_target_ms, elapsed_time_ms);
4481 if (_cm->has_aborted()) {
4482 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4483 _worker_id);
4484 }
4485 }
4486 } else {
4487 if (_cm->verbose_low()) {
4488 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4489 "elapsed = %1.2lfms <<<<<<<<<<",
4490 _worker_id, _time_target_ms, elapsed_time_ms);
4491 }
4492 }
4493
4494 _claimed = false;
4495 }
4496
4497 CMTask::CMTask(uint worker_id,
4498 ConcurrentMark* cm,
4499 size_t* marked_bytes,
4500 BitMap* card_bm,
4501 CMTaskQueue* task_queue,
4502 CMTaskQueueSet* task_queues)
4503 : _g1h(G1CollectedHeap::heap()),
4504 _worker_id(worker_id), _cm(cm),
4505 _claimed(false),
4506 _nextMarkBitMap(NULL), _hash_seed(17),
4507 _task_queue(task_queue),
4508 _task_queues(task_queues),
4509 _cm_oop_closure(NULL),
4510 _marked_bytes_array(marked_bytes),
4511 _card_bm(card_bm) {
4512 guarantee(task_queue != NULL, "invariant");
4513 guarantee(task_queues != NULL, "invariant");
4514
4515 statsOnly( _clock_due_to_scanning = 0;
4516 _clock_due_to_marking = 0 );
4517
4518 _marking_step_diffs_ms.add(0.5);
4519 }
4520
4521 // These are formatting macros that are used below to ensure
4522 // consistent formatting. The *_H_* versions are used to format the
4523 // header for a particular value and they should be kept consistent
4524 // with the corresponding macro. Also note that most of the macros add
4525 // the necessary white space (as a prefix) which makes them a bit
4526 // easier to compose.
4527
4528 // All the output lines are prefixed with this string to be able to
4529 // identify them easily in a large log file.
4530 #define G1PPRL_LINE_PREFIX "###"
4531
4532 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4533 #ifdef _LP64
4534 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4535 #else // _LP64
4536 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4537 #endif // _LP64
4538
4539 // For per-region info
4540 #define G1PPRL_TYPE_FORMAT " %-4s"
4541 #define G1PPRL_TYPE_H_FORMAT " %4s"
4542 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4543 #define G1PPRL_BYTE_H_FORMAT " %9s"
4544 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4545 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4546
4547 // For summary info
4548 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4549 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4550 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4551 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4552
4553 G1PrintRegionLivenessInfoClosure::
4554 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4555 : _out(out),
4556 _total_used_bytes(0), _total_capacity_bytes(0),
4557 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4558 _hum_used_bytes(0), _hum_capacity_bytes(0),
4559 _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4560 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4561 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4562 MemRegion g1_committed = g1h->g1_committed();
4563 MemRegion g1_reserved = g1h->g1_reserved();
4564 double now = os::elapsedTime();
4565
4566 // Print the header of the output.
4567 _out->cr();
4568 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4569 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4570 G1PPRL_SUM_ADDR_FORMAT("committed")
4571 G1PPRL_SUM_ADDR_FORMAT("reserved")
4572 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4573 p2i(g1_committed.start()), p2i(g1_committed.end()),
4574 p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4575 HeapRegion::GrainBytes);
4576 _out->print_cr(G1PPRL_LINE_PREFIX);
4577 _out->print_cr(G1PPRL_LINE_PREFIX
4578 G1PPRL_TYPE_H_FORMAT
4579 G1PPRL_ADDR_BASE_H_FORMAT
4580 G1PPRL_BYTE_H_FORMAT
4581 G1PPRL_BYTE_H_FORMAT
4582 G1PPRL_BYTE_H_FORMAT
4583 G1PPRL_DOUBLE_H_FORMAT
4584 G1PPRL_BYTE_H_FORMAT
4585 G1PPRL_BYTE_H_FORMAT,
4586 "type", "address-range",
4587 "used", "prev-live", "next-live", "gc-eff",
4588 "remset", "code-roots");
4589 _out->print_cr(G1PPRL_LINE_PREFIX
4590 G1PPRL_TYPE_H_FORMAT
4591 G1PPRL_ADDR_BASE_H_FORMAT
4592 G1PPRL_BYTE_H_FORMAT
4593 G1PPRL_BYTE_H_FORMAT
4594 G1PPRL_BYTE_H_FORMAT
4595 G1PPRL_DOUBLE_H_FORMAT
4596 G1PPRL_BYTE_H_FORMAT
4597 G1PPRL_BYTE_H_FORMAT,
4598 "", "",
4599 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4600 "(bytes)", "(bytes)");
4601 }
4602
4603 // It takes as a parameter a reference to one of the _hum_* fields, it
4604 // deduces the corresponding value for a region in a humongous region
4605 // series (either the region size, or what's left if the _hum_* field
4606 // is < the region size), and updates the _hum_* field accordingly.
4607 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4608 size_t bytes = 0;
4609 // The > 0 check is to deal with the prev and next live bytes which
4610 // could be 0.
4611 if (*hum_bytes > 0) {
4612 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4613 *hum_bytes -= bytes;
4614 }
4615 return bytes;
4616 }
4617
4618 // It deduces the values for a region in a humongous region series
4619 // from the _hum_* fields and updates those accordingly. It assumes
4620 // that that _hum_* fields have already been set up from the "starts
4621 // humongous" region and we visit the regions in address order.
4622 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4623 size_t* capacity_bytes,
4624 size_t* prev_live_bytes,
4625 size_t* next_live_bytes) {
4626 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4627 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4628 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4629 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4630 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4631 }
4632
4633 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4634 const char* type = "";
4635 HeapWord* bottom = r->bottom();
4636 HeapWord* end = r->end();
4637 size_t capacity_bytes = r->capacity();
4638 size_t used_bytes = r->used();
4639 size_t prev_live_bytes = r->live_bytes();
4640 size_t next_live_bytes = r->next_live_bytes();
4641 double gc_eff = r->gc_efficiency();
4642 size_t remset_bytes = r->rem_set()->mem_size();
4643 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4644
4645 if (r->used() == 0) {
4646 type = "FREE";
4647 } else if (r->is_survivor()) {
4648 type = "SURV";
4649 } else if (r->is_young()) {
4650 type = "EDEN";
4651 } else if (r->startsHumongous()) {
4652 type = "HUMS";
4653
4654 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4655 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4656 "they should have been zeroed after the last time we used them");
4657 // Set up the _hum_* fields.
4658 _hum_capacity_bytes = capacity_bytes;
4659 _hum_used_bytes = used_bytes;
4660 _hum_prev_live_bytes = prev_live_bytes;
4661 _hum_next_live_bytes = next_live_bytes;
4662 get_hum_bytes(&used_bytes, &capacity_bytes,
4663 &prev_live_bytes, &next_live_bytes);
4664 end = bottom + HeapRegion::GrainWords;
4665 } else if (r->continuesHumongous()) {
4666 type = "HUMC";
4667 get_hum_bytes(&used_bytes, &capacity_bytes,
4668 &prev_live_bytes, &next_live_bytes);
4669 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4670 } else {
4671 type = "OLD";
4672 }
4673
4674 _total_used_bytes += used_bytes;
4675 _total_capacity_bytes += capacity_bytes;
4676 _total_prev_live_bytes += prev_live_bytes;
4677 _total_next_live_bytes += next_live_bytes;
4678 _total_remset_bytes += remset_bytes;
4679 _total_strong_code_roots_bytes += strong_code_roots_bytes;
4680
4681 // Print a line for this particular region.
4682 _out->print_cr(G1PPRL_LINE_PREFIX
4683 G1PPRL_TYPE_FORMAT
4684 G1PPRL_ADDR_BASE_FORMAT
4685 G1PPRL_BYTE_FORMAT
4686 G1PPRL_BYTE_FORMAT
4687 G1PPRL_BYTE_FORMAT
4688 G1PPRL_DOUBLE_FORMAT
4689 G1PPRL_BYTE_FORMAT
4690 G1PPRL_BYTE_FORMAT,
4691 type, p2i(bottom), p2i(end),
4692 used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4693 remset_bytes, strong_code_roots_bytes);
4694
4695 return false;
4696 }
4697
4698 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4699 // add static memory usages to remembered set sizes
4700 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4701 // Print the footer of the output.
4702 _out->print_cr(G1PPRL_LINE_PREFIX);
4703 _out->print_cr(G1PPRL_LINE_PREFIX
4704 " SUMMARY"
4705 G1PPRL_SUM_MB_FORMAT("capacity")
4706 G1PPRL_SUM_MB_PERC_FORMAT("used")
4707 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4708 G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4709 G1PPRL_SUM_MB_FORMAT("remset")
4710 G1PPRL_SUM_MB_FORMAT("code-roots"),
4711 bytes_to_mb(_total_capacity_bytes),
4712 bytes_to_mb(_total_used_bytes),
4713 perc(_total_used_bytes, _total_capacity_bytes),
4714 bytes_to_mb(_total_prev_live_bytes),
4715 perc(_total_prev_live_bytes, _total_capacity_bytes),
4716 bytes_to_mb(_total_next_live_bytes),
4717 perc(_total_next_live_bytes, _total_capacity_bytes),
4718 bytes_to_mb(_total_remset_bytes),
4719 bytes_to_mb(_total_strong_code_roots_bytes));
4720 _out->cr();
4721 }
--- EOF ---