diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java index 2e041f9..e2a8ab3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java @@ -279,6 +279,10 @@ public Object process(Node nd, Stack stack, MapJoinDesc mjDesc = mapJoinOp.getConf(); HiveConf conf = context.conf; + // Unlike in MR, we may call this method multiple times, for each + // small table HTS. But, since it's idempotent, it should be OK. + mjDesc.resetOrder(); + float hashtableMemoryUsage; if (hasGroupBy(mapJoinOp, context)) { hashtableMemoryUsage = conf.getFloatVar(