diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc index f18b1f2..cb16523 100644 --- a/be/src/exec/hash-table.cc +++ b/be/src/exec/hash-table.cc @@ -211,7 +211,7 @@ HashTable::HashTable(RuntimeState* state, BufferedBlockMgr::Client* client, num_build_tuples_(num_build_tuples), has_matches_(false), num_probes_(0), num_failed_probes_(0), travel_length_(0), num_hash_collisions_(0), - num_resizes_(0) { + num_resizes_(0), max_probe_(0) { DCHECK_EQ((num_buckets & (num_buckets-1)), 0) << "num_buckets must be a power of 2"; DCHECK_GT(num_buckets, 0) << "num_buckets must be larger than 0"; DCHECK(stores_tuples_ || stream != NULL); @@ -236,7 +236,7 @@ HashTable::HashTable(MemPool* pool, bool quadratic_probing, int num_buckets) num_build_tuples_(1), has_matches_(false), num_probes_(0), num_failed_probes_(0), travel_length_(0), num_hash_collisions_(0), - num_resizes_(0) { + num_resizes_(0), max_probe_(0) { DCHECK_EQ((num_buckets & (num_buckets-1)), 0) << "num_buckets must be a power of 2"; DCHECK_GT(num_buckets, 0) << "num_buckets must be larger than 0"; bool ret = Init(); diff --git a/be/src/exec/hash-table.h b/be/src/exec/hash-table.h index 3cd3e04..6039ade 100644 --- a/be/src/exec/hash-table.h +++ b/be/src/exec/hash-table.h @@ -665,6 +665,9 @@ class HashTable { /// How many times this table has resized so far. int64_t num_resizes_; + + /// maximum probe length + int64_t max_probe_; }; } diff --git a/be/src/exec/hash-table.inline.h b/be/src/exec/hash-table.inline.h index f310f52..bc997f7 100644 --- a/be/src/exec/hash-table.inline.h +++ b/be/src/exec/hash-table.inline.h @@ -16,6 +16,10 @@ #ifndef IMPALA_EXEC_HASH_TABLE_INLINE_H #define IMPALA_EXEC_HASH_TABLE_INLINE_H +#include +#include +#include + #include "exec/hash-table.h" namespace impala { @@ -38,6 +42,7 @@ inline int64_t HashTable::Probe(Bucket* buckets, int64_t num_buckets, HashTableCtx* ht_ctx, uint32_t hash, bool* found) { DCHECK(buckets != NULL); DCHECK_GT(num_buckets, 0); + //::std::ofstream debug_file("/tmp/probe-debug.txt", ::std::ios::app | ::std::ios::out); *found = false; int64_t bucket_idx = hash & (num_buckets - 1); @@ -47,10 +52,22 @@ inline int64_t HashTable::Probe(Bucket* buckets, int64_t num_buckets, int64_t step = 0; do { Bucket* bucket = &buckets[bucket_idx]; - if (!bucket->filled) return bucket_idx; + if (!bucket->filled) { + if (step > max_probe_) { + max_probe_ = step; + ::std::cerr << "Maximum probe length: " << max_probe_ << ::std::endl; + //debug_file << "Maximum probe length: " << max_probe_ << ::std::endl; + } + return bucket_idx; + } if (hash == bucket->hash) { if (ht_ctx != NULL && ht_ctx->Equals(GetRow(bucket, ht_ctx->row_))) { *found = true; + if (step > max_probe_) { + max_probe_ = step; + ::std::cerr << "Maximum probe length: " << max_probe_ << ::std::endl; + //debug_file << "Maximum probe length: " << max_probe_ << ::std::endl; + } return bucket_idx; } // Row equality failed, or not performed. This is a hash collision. Continue @@ -69,6 +86,11 @@ inline int64_t HashTable::Probe(Bucket* buckets, int64_t num_buckets, bucket_idx = (bucket_idx + 1) & (num_buckets - 1); } } while (LIKELY(step < num_buckets)); + if (step > max_probe_) { + max_probe_ = step; + ::std::cerr << "Maximum probe length: " << max_probe_ << ::std::endl; + //debug_file << "Maximum probe length: " << max_probe_ << ::std::endl; + } DCHECK_EQ(num_filled_buckets_, num_buckets) << "Probing of a non-full table " << "failed: " << quadratic_probing_ << " " << hash; return Iterator::BUCKET_NOT_FOUND; diff --git a/be/src/util/cpu-info.h b/be/src/util/cpu-info.h index 860498e..1789c8c 100644 --- a/be/src/util/cpu-info.h +++ b/be/src/util/cpu-info.h @@ -57,6 +57,7 @@ class CpuInfo { /// Returns whether of not the cpu supports this flag inline static bool IsSupported(long flag) { DCHECK(initialized_); + if (SSE4_2 == flag) return false; return (hardware_flags_ & flag) != 0; }