Notes: - I start hive with "hive -hiveconf hive.tez.optimize=true", not exactly necessary, but it will start the AM/containers right away instead of on first query. - hive-exec jar should be copied to hdfs:///user/hive/ (location can be changed with: hive.jar.directory). This avoids re-localization of the hive jar. Hive settings: // needed because SMB isn't supported on tez yet set hive.optimize.bucketmapjoin=false; set hive.optimize.bucketmapjoin.sortedmerge=false; set hive.auto.convert.sortmerge.join=false; set hive.auto.convert.sortmerge.join.noconditionaltask=false; set hive.auto.convert.join.noconditionaltask=true; // depends on your available mem/cluster, but map/reduce mb should be set to the same for container reuse set hive.auto.convert.join.noconditionaltask.size=64000000; set mapred.map.child.java.opts=-server -Xmx3584m -Djava.net.preferIPv4Stack=true; set mapred.reduce.child.java.opts=-server -Xmx3584m -Djava.net.preferIPv4Stack=true; set mapreduce.map.memory.mb=4096; set mapreduce.reduce.memory.mb=4096; // generic opts set hive.optimize.reducededuplication.min.reducer=1; set hive.optimize.mapjoin.mapreduce=true; // autogather might require you to up the max number of counters, if you run into issues set hive.stats.autogather=true; set hive.stats.dbclass=counter; // tea settings can also go into fez-site if desired set mapreduce.map.output.compress=true; set mapreduce.map.output.compress.codec=org.apache.hadoop.io.compress.DefaultCodec; set tez.runtime.intermediate-output.should-compress=true; set tez.runtime.intermediate-output.compress.codec=org.apache.hadoop.io.compress.DefaultCodec; set tez.runtime.intermdiate-input.is-compressed=true; set tez.runtime.intermediate-input.compress.codec=org.apache.hadoop.io.compress.DefaultCodec; // tez groups in the AM set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.orc.splits.include.file.footer=true; set hive.root.logger=ERROR,console; set hive.optimize.tez=true; set hive.vectorized.execution.enabled=true; set hive.exec.local.cache=true; set hive.compute.query.using.stats=true; for tez: tez.am.resource.memory.mb 8192 tez.am.java.opts -server -Xmx7168m -Djava.net.preferIPv4Stack=true tez.am.grouping.min-size 16777216 tez.session.client.timeout.secs -1 tez.session.pre-warm.enabled true tez.session.pre-warm.num.containers 10 tez.am.grouping.split-waves 0.9 tez.am.container.reuse.enabled true tez.am.container.reuse.rack-fallback.enabled true tez.am.container.reuse.non-local-fallback.enabled true tez.am.container.session.delay-allocation-millis -1 tez.am.container.reuse.locality.delay-allocation-millis 250