Details
-
Bug
-
Status: Resolved
-
Blocker
-
Resolution: Fixed
-
Impala 2.3.0, Impala 2.5.0, Impala 2.6.0, Impala 2.7.0, Impala 2.8.0, Impala 2.9.0
Description
We saw a crash in HashJoinNode::GetNext() where EvalConjuncts was touching invalid memory here:
if (match_all_probe_ && !matched_probe_ && probe_row_exists) { int row_idx = out_batch->AddRow(); TupleRow* out_row = out_batch->GetRow(row_idx); CreateOutputRow(out_row, current_probe_row_, NULL); if (EvalConjuncts(conjunct_ctxs, num_conjunct_ctxs, out_row)) { <=== out_batch->CommitLastRow(); VLOG_ROW << "match row: " << PrintRow(out_row, row_desc()); ++num_rows_returned_; COUNTER_SET(rows_returned_counter_, num_rows_returned_); matched_probe_ = true; if (out_batch->AtCapacity() || ReachedLimit()) { *eos = ReachedLimit(); return Status::OK(); } } }
We believe the bug is that 'probe_row_exists' can be true even if there is no probe row. E.g. in the code below, it may break out of the loop and leave the node in a state where there is no valid probe row.
if (probe_batch_pos_ == probe_batch_->num_rows()) { // pass on resources, out_batch might still need them probe_batch_->TransferResourceOwnership(out_batch); probe_batch_pos_ = 0; if (out_batch->AtCapacity()) return Status::OK(); // get new probe batch if (!probe_side_eos_) { while (true) { probe_timer.Stop(); RETURN_IF_ERROR(child(0)->GetNext(state, probe_batch_.get(), &probe_side_eos_)); probe_timer.Start(); if (probe_batch_->num_rows() == 0) { // Empty batches can still contain IO buffers, which need to be passed up to // the caller; transferring resources can fill up out_batch. probe_batch_->TransferResourceOwnership(out_batch); if (probe_side_eos_) { eos_ = true; break; } if (out_batch->AtCapacity()) return Status::OK(); <===== HERE continue; } else { COUNTER_ADD(probe_row_counter_, probe_batch_->num_rows()); break; } } } else { eos_ = true; } // finish up right outer join if (eos_ && match_all_build_) { hash_tbl_iterator_ = hash_tbl_->Begin(); } }