diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 8f6cf29..38380fb 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -480,7 +480,6 @@ spark.query.files=add_part_multiple.q, \ auto_join22.q, \ auto_join23.q, \ auto_join24.q, \ - auto_join25.q, \ auto_join26.q, \ auto_join27.q, \ auto_join28.q, \ @@ -525,8 +524,6 @@ spark.query.files=add_part_multiple.q, \ bucketmapjoin7.q, \ bucketmapjoin8.q, \ bucketmapjoin9.q, \ - bucketmapjoin10.q, \ - bucketmapjoin11.q, \ bucketmapjoin12.q, \ bucketmapjoin13.q, \ bucketmapjoin_negative.q, \ @@ -658,7 +655,6 @@ spark.query.files=add_part_multiple.q, \ join_cond_pushdown_unqual2.q, \ join_cond_pushdown_unqual3.q, \ join_cond_pushdown_unqual4.q, \ - join_empty.q, \ join_filters.q, \ join_filters_overlap.q, \ join_hive_626.q, \ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkMapJoinResolver.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkMapJoinResolver.java index 819b020..670cdd3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkMapJoinResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkMapJoinResolver.java @@ -21,7 +21,6 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -43,7 +42,6 @@ import org.apache.hadoop.hive.ql.lib.TaskGraphWalker; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.BaseWork; -import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SparkWork; @@ -88,8 +86,8 @@ private boolean containsOp(BaseWork work, Class clazz) { public SparkMapJoinTaskDispatcher(PhysicalContext pc) { super(); physicalContext = pc; - sparkWorkMap = new HashMap(); - dependencyGraph = new HashMap>(); + sparkWorkMap = new LinkedHashMap(); + dependencyGraph = new LinkedHashMap>(); } // Move the specified work from the sparkWork to the targetWork @@ -228,7 +226,7 @@ public Object dispatch(Node nd, Stack stack, Object... nos) } } - Map createdTaskMap = new HashMap(); + Map createdTaskMap = new LinkedHashMap(); // Now create SparkTasks from the SparkWorks, also set up dependency for (SparkWork work : dependencyGraph.keySet()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java index 9f8f7f8..78cbc6d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java @@ -113,6 +113,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException { GenSparkProcContext context = (GenSparkProcContext) procContext; + + if (!nd.getClass().equals(MapJoinOperator.class)) { + return null; + } + MapJoinOperator mapJoinOp = (MapJoinOperator)nd; if (stack.size() < 2 || !(stack.get(stack.size() - 2) instanceof ReduceSinkOperator)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java index a76cac5..5965cc5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java @@ -34,6 +34,8 @@ import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; @@ -58,9 +60,12 @@ import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck; import org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; +import org.apache.hadoop.hive.ql.optimizer.physical.SparkMapJoinResolver; import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger; import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.optimizer.spark.SetSparkReducerParallelism; +import org.apache.hadoop.hive.ql.optimizer.spark.SparkMapJoinOptimizer; +import org.apache.hadoop.hive.ql.optimizer.spark.SparkReduceSinkMapJoinProc; import org.apache.hadoop.hive.ql.optimizer.spark.SparkSortMergeJoinFactory; import org.apache.hadoop.hive.ql.optimizer.spark.SplitSparkWorkResolver; import org.apache.hadoop.hive.ql.parse.GlobalLimitCtx; @@ -114,8 +119,8 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, new SetSparkReducerParallelism()); // TODO: need to research and verify support convert join to map join optimization. - //opRules.put(new RuleRegExp(new String("Convert Join to Map-join"), - // JoinOperator.getOperatorName() + "%"), new SparkMapJoinOptimizer()); + opRules.put(new RuleRegExp(new String("Convert Join to Map-join"), + JoinOperator.getOperatorName() + "%"), new SparkMapJoinOptimizer()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along @@ -147,8 +152,8 @@ protected void generateTaskTree(List> rootTasks, Pa opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genSparkWork); - //opRules.put(new RuleRegExp("No more walking on ReduceSink-MapJoin", - // MapJoinOperator.getOperatorName() + "%"), new SparkReduceSinkMapJoinProc()); + opRules.put(new RuleRegExp("No more walking on ReduceSink-MapJoin", + MapJoinOperator.getOperatorName() + "%"), new SparkReduceSinkMapJoinProc()); opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), @@ -265,6 +270,8 @@ protected void optimizeTaskPlan(List> rootTasks, Pa physicalCtx = new SplitSparkWorkResolver().resolve(physicalCtx); + physicalCtx = new SparkMapJoinResolver().resolve(physicalCtx); + if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) { physicalCtx = new NullScanOptimizer().resolve(physicalCtx); } else { diff --git ql/src/test/queries/clientpositive/parallel_join1.q ql/src/test/queries/clientpositive/parallel_join1.q index 3e29fbb..8a58c7e 100644 --- ql/src/test/queries/clientpositive/parallel_join1.q +++ ql/src/test/queries/clientpositive/parallel_join1.q @@ -1,7 +1,7 @@ set mapreduce.job.reduces=4; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; --- SORT_BEFORE_DIFF +-- SORT_QUERY_RESULTS CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE; diff --git ql/src/test/queries/clientpositive/ppd_multi_insert.q ql/src/test/queries/clientpositive/ppd_multi_insert.q index 06fe7ce..c9080ca 100644 --- ql/src/test/queries/clientpositive/ppd_multi_insert.q +++ ql/src/test/queries/clientpositive/ppd_multi_insert.q @@ -1,6 +1,8 @@ set hive.optimize.ppd=true; set hive.ppd.remove.duplicatefilters=false; +-- SORT_QUERY_RESULTS + CREATE TABLE mi1(key INT, value STRING) STORED AS TEXTFILE; CREATE TABLE mi2(key INT, value STRING) STORED AS TEXTFILE; CREATE TABLE mi3(key INT) PARTITIONED BY(ds STRING, hr STRING) STORED AS TEXTFILE; diff --git ql/src/test/queries/clientpositive/ptf_streaming.q ql/src/test/queries/clientpositive/ptf_streaming.q index 6fa6573..04b674c 100644 --- ql/src/test/queries/clientpositive/ptf_streaming.q +++ ql/src/test/queries/clientpositive/ptf_streaming.q @@ -1,5 +1,7 @@ create temporary function noopstreaming as 'org.apache.hadoop.hive.ql.udf.ptf.NoopStreaming$NoopStreamingResolver'; +-- SORT_QUERY_RESULTS + --1. test1 select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, diff --git ql/src/test/queries/clientpositive/subquery_exists.q ql/src/test/queries/clientpositive/subquery_exists.q index f812e36..b16584e 100644 --- ql/src/test/queries/clientpositive/subquery_exists.q +++ ql/src/test/queries/clientpositive/subquery_exists.q @@ -1,4 +1,4 @@ - +-- SORT_QUERY_RESULTS -- no agg, corr explain @@ -42,4 +42,4 @@ from (select * from src a where b.value = a.value and a.key = b.key and a.value > 'val_9') ) a -; \ No newline at end of file +; diff --git ql/src/test/queries/clientpositive/subquery_multiinsert.q ql/src/test/queries/clientpositive/subquery_multiinsert.q index ed36d9e..c250737 100644 --- ql/src/test/queries/clientpositive/subquery_multiinsert.q +++ ql/src/test/queries/clientpositive/subquery_multiinsert.q @@ -1,5 +1,7 @@ set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook; +-- SORT_QUERY_RESULTS + CREATE TABLE src_4( key STRING, value STRING @@ -79,4 +81,4 @@ INSERT OVERWRITE TABLE src_5 select * from src_4 ; select * from src_5 -; \ No newline at end of file +; diff --git ql/src/test/queries/clientpositive/vectorized_ptf.q ql/src/test/queries/clientpositive/vectorized_ptf.q index f2279eb..64082e9 100644 --- ql/src/test/queries/clientpositive/vectorized_ptf.q +++ ql/src/test/queries/clientpositive/vectorized_ptf.q @@ -1,5 +1,7 @@ SET hive.vectorized.execution.enabled=true; +-- SORT_QUERY_RESULTS + DROP TABLE part_staging; DROP TABLE part_orc; diff --git ql/src/test/results/clientpositive/parallel_join1.q.out ql/src/test/results/clientpositive/parallel_join1.q.out index 81604b5..16b263c 100644 --- ql/src/test/results/clientpositive/parallel_join1.q.out +++ ql/src/test/results/clientpositive/parallel_join1.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: -- SORT_BEFORE_DIFF +PREHOOK: query: -- SORT_QUERY_RESULTS CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest_j1 -POSTHOOK: query: -- SORT_BEFORE_DIFF +POSTHOOK: query: -- SORT_QUERY_RESULTS CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE @@ -114,36 +114,140 @@ POSTHOOK: Input: default@dest_j1 0 val_0 0 val_0 0 val_0 +10 val_10 +100 val_100 +100 val_100 +100 val_100 +100 val_100 103 val_103 103 val_103 103 val_103 103 val_103 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +105 val_105 11 val_11 +111 val_111 +113 val_113 +113 val_113 +113 val_113 +113 val_113 114 val_114 +116 val_116 118 val_118 118 val_118 118 val_118 118 val_118 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +120 val_120 +120 val_120 125 val_125 125 val_125 125 val_125 125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 129 val_129 129 val_129 129 val_129 129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +134 val_134 +134 val_134 136 val_136 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 143 val_143 +145 val_145 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +149 val_149 +149 val_149 15 val_15 15 val_15 15 val_15 15 val_15 150 val_150 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +164 val_164 +164 val_164 165 val_165 165 val_165 165 val_165 165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +168 val_168 169 val_169 169 val_169 169 val_169 @@ -160,15 +264,38 @@ POSTHOOK: Input: default@dest_j1 169 val_169 169 val_169 169 val_169 +17 val_17 +170 val_170 172 val_172 172 val_172 172 val_172 172 val_172 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +175 val_175 +175 val_175 176 val_176 176 val_176 176 val_176 176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +180 val_180 +181 val_181 183 val_183 +186 val_186 187 val_187 187 val_187 187 val_187 @@ -178,731 +305,112 @@ POSTHOOK: Input: default@dest_j1 187 val_187 187 val_187 187 val_187 +189 val_189 19 val_19 190 val_190 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 194 val_194 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +201 val_201 202 val_202 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +209 val_209 +209 val_209 213 val_213 213 val_213 213 val_213 213 val_213 +214 val_214 +216 val_216 +216 val_216 +216 val_216 +216 val_216 217 val_217 217 val_217 217 val_217 217 val_217 -224 val_224 -224 val_224 -224 val_224 -224 val_224 -228 val_228 -235 val_235 -239 val_239 -239 val_239 -239 val_239 -239 val_239 -242 val_242 -242 val_242 -242 val_242 -242 val_242 -257 val_257 -26 val_26 -26 val_26 -26 val_26 -26 val_26 -260 val_260 -275 val_275 -282 val_282 -282 val_282 -282 val_282 -282 val_282 -286 val_286 -305 val_305 -309 val_309 -309 val_309 -309 val_309 -309 val_309 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -323 val_323 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -33 val_33 -338 val_338 -341 val_341 -345 val_345 -356 val_356 -367 val_367 -367 val_367 -367 val_367 -367 val_367 -37 val_37 -37 val_37 -37 val_37 -37 val_37 -374 val_374 -378 val_378 -389 val_389 -392 val_392 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -4 val_4 -400 val_400 -404 val_404 -404 val_404 -404 val_404 -404 val_404 -411 val_411 -419 val_419 -437 val_437 -44 val_44 -444 val_444 -448 val_448 -455 val_455 -459 val_459 -459 val_459 -459 val_459 -459 val_459 -462 val_462 -462 val_462 -462 val_462 -462 val_462 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -477 val_477 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -484 val_484 -491 val_491 -495 val_495 -51 val_51 -51 val_51 -51 val_51 -51 val_51 -66 val_66 -77 val_77 -8 val_8 -80 val_80 -84 val_84 -84 val_84 -84 val_84 -84 val_84 -95 val_95 -95 val_95 -95 val_95 -95 val_95 -100 val_100 -100 val_100 -100 val_100 -100 val_100 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -111 val_111 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -12 val_12 -12 val_12 -126 val_126 -133 val_133 -137 val_137 -137 val_137 -137 val_137 -137 val_137 -155 val_155 -162 val_162 -166 val_166 -177 val_177 -180 val_180 -191 val_191 -191 val_191 -191 val_191 -191 val_191 -195 val_195 -195 val_195 -195 val_195 -195 val_195 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -203 val_203 -203 val_203 -203 val_203 -203 val_203 -207 val_207 -207 val_207 -207 val_207 -207 val_207 -214 val_214 -218 val_218 -221 val_221 -221 val_221 -221 val_221 -221 val_221 -229 val_229 -229 val_229 -229 val_229 -229 val_229 -247 val_247 -258 val_258 -265 val_265 -265 val_265 -265 val_265 -265 val_265 -27 val_27 -272 val_272 -272 val_272 -272 val_272 -272 val_272 -283 val_283 -287 val_287 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -30 val_30 -302 val_302 -306 val_306 -317 val_317 -317 val_317 -317 val_317 -317 val_317 -331 val_331 -331 val_331 -331 val_331 -331 val_331 -335 val_335 -339 val_339 -34 val_34 -342 val_342 -342 val_342 -342 val_342 -342 val_342 -353 val_353 -353 val_353 -353 val_353 -353 val_353 -360 val_360 -364 val_364 -368 val_368 -375 val_375 -379 val_379 -382 val_382 -382 val_382 -382 val_382 -382 val_382 -386 val_386 -393 val_393 -397 val_397 -397 val_397 -397 val_397 -397 val_397 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -41 val_41 -427 val_427 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -449 val_449 -452 val_452 -463 val_463 -463 val_463 -463 val_463 -463 val_463 -467 val_467 -470 val_470 -478 val_478 -478 val_478 -478 val_478 -478 val_478 -481 val_481 -485 val_485 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -492 val_492 -492 val_492 -492 val_492 -492 val_492 -496 val_496 -5 val_5 -5 val_5 -5 val_5 -5 val_5 -5 val_5 -5 val_5 -5 val_5 -5 val_5 -5 val_5 -67 val_67 -67 val_67 -67 val_67 -67 val_67 -70 val_70 -70 val_70 -70 val_70 -70 val_70 -70 val_70 -70 val_70 -70 val_70 -70 val_70 -70 val_70 -74 val_74 -78 val_78 -85 val_85 -9 val_9 -92 val_92 -96 val_96 -105 val_105 -116 val_116 -134 val_134 -134 val_134 -134 val_134 -134 val_134 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -145 val_145 -149 val_149 -149 val_149 -149 val_149 -149 val_149 -152 val_152 -152 val_152 -152 val_152 -152 val_152 -156 val_156 -163 val_163 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -17 val_17 -170 val_170 -174 val_174 -174 val_174 -174 val_174 -174 val_174 -178 val_178 -181 val_181 -189 val_189 -192 val_192 -196 val_196 -2 val_2 -20 val_20 -200 val_200 -200 val_200 -200 val_200 -200 val_200 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -219 val_219 -219 val_219 -219 val_219 -219 val_219 -222 val_222 -226 val_226 -233 val_233 -233 val_233 -233 val_233 -233 val_233 -237 val_237 -237 val_237 -237 val_237 -237 val_237 -24 val_24 -24 val_24 -24 val_24 -24 val_24 -244 val_244 -248 val_248 -255 val_255 -255 val_255 -255 val_255 -255 val_255 -262 val_262 -266 val_266 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -28 val_28 -280 val_280 -280 val_280 -280 val_280 -280 val_280 -284 val_284 -288 val_288 -288 val_288 -288 val_288 -288 val_288 -291 val_291 -307 val_307 -307 val_307 -307 val_307 -307 val_307 -310 val_310 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -321 val_321 -321 val_321 -321 val_321 -321 val_321 -325 val_325 -325 val_325 -325 val_325 -325 val_325 -332 val_332 -336 val_336 -35 val_35 -35 val_35 -35 val_35 -35 val_35 -35 val_35 -35 val_35 -35 val_35 -35 val_35 -35 val_35 -365 val_365 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -394 val_394 -402 val_402 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -413 val_413 -413 val_413 -413 val_413 -413 val_413 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -42 val_42 -42 val_42 -42 val_42 -42 val_42 -424 val_424 -424 val_424 -424 val_424 -424 val_424 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -435 val_435 -439 val_439 -439 val_439 -439 val_439 -439 val_439 -446 val_446 -453 val_453 -457 val_457 -460 val_460 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -475 val_475 -479 val_479 -482 val_482 -493 val_493 -497 val_497 -53 val_53 -57 val_57 -64 val_64 -82 val_82 -86 val_86 -97 val_97 -97 val_97 -97 val_97 -97 val_97 -10 val_10 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -120 val_120 -120 val_120 -120 val_120 -120 val_120 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -131 val_131 -146 val_146 -146 val_146 -146 val_146 -146 val_146 -153 val_153 -157 val_157 -160 val_160 -164 val_164 -164 val_164 -164 val_164 -164 val_164 -168 val_168 -175 val_175 -175 val_175 -175 val_175 -175 val_175 -179 val_179 -179 val_179 -179 val_179 -179 val_179 -18 val_18 -18 val_18 -18 val_18 -18 val_18 -186 val_186 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -193 val_193 -197 val_197 -197 val_197 -197 val_197 -197 val_197 -201 val_201 -205 val_205 -205 val_205 -205 val_205 -205 val_205 -209 val_209 -209 val_209 -209 val_209 -209 val_209 -216 val_216 -216 val_216 -216 val_216 -216 val_216 +218 val_218 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +222 val_222 223 val_223 223 val_223 223 val_223 223 val_223 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +229 val_229 +229 val_229 230 val_230 230 val_230 230 val_230 @@ -928,32 +436,144 @@ POSTHOOK: Input: default@dest_j1 230 val_230 230 val_230 230 val_230 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +237 val_237 +237 val_237 238 val_238 238 val_238 238 val_238 238 val_238 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +24 val_24 +24 val_24 241 val_241 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 249 val_249 252 val_252 +255 val_255 +255 val_255 +255 val_255 +255 val_255 256 val_256 256 val_256 256 val_256 256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +260 val_260 +262 val_262 263 val_263 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 278 val_278 278 val_278 278 val_278 278 val_278 +28 val_28 +280 val_280 +280 val_280 +280 val_280 +280 val_280 281 val_281 281 val_281 281 val_281 281 val_281 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +283 val_283 +284 val_284 285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +288 val_288 +288 val_288 289 val_289 +291 val_291 292 val_292 296 val_296 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +307 val_307 +307 val_307 308 val_308 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +310 val_310 311 val_311 311 val_311 311 val_311 @@ -964,18 +584,75 @@ POSTHOOK: Input: default@dest_j1 311 val_311 311 val_311 315 val_315 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +321 val_321 +321 val_321 322 val_322 322 val_322 322 val_322 322 val_322 +323 val_323 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +332 val_332 333 val_333 333 val_333 333 val_333 333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +342 val_342 +342 val_342 344 val_344 344 val_344 344 val_344 344 val_344 +345 val_345 348 val_348 348 val_348 348 val_348 @@ -1001,11 +678,54 @@ POSTHOOK: Input: default@dest_j1 348 val_348 348 val_348 348 val_348 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 351 val_351 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +356 val_356 +360 val_360 362 val_362 +364 val_364 +365 val_365 366 val_366 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +37 val_37 +37 val_37 373 val_373 +374 val_374 +375 val_375 377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +382 val_382 +382 val_382 384 val_384 384 val_384 384 val_384 @@ -1015,14 +735,60 @@ POSTHOOK: Input: default@dest_j1 384 val_384 384 val_384 384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 395 val_395 395 val_395 395 val_395 395 val_395 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +397 val_397 +397 val_397 399 val_399 399 val_399 399 val_399 399 val_399 +4 val_4 +400 val_400 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +402 val_402 403 val_403 403 val_403 403 val_403 @@ -1032,21 +798,115 @@ POSTHOOK: Input: default@dest_j1 403 val_403 403 val_403 403 val_403 +404 val_404 +404 val_404 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 407 val_407 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +41 val_41 +411 val_411 +413 val_413 +413 val_413 +413 val_413 +413 val_413 414 val_414 414 val_414 414 val_414 414 val_414 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 418 val_418 +419 val_419 +42 val_42 +42 val_42 +42 val_42 +42 val_42 421 val_421 +424 val_424 +424 val_424 +424 val_424 +424 val_424 +427 val_427 429 val_429 429 val_429 429 val_429 429 val_429 43 val_43 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 432 val_432 +435 val_435 436 val_436 +437 val_437 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +439 val_439 +439 val_439 +439 val_439 +439 val_439 +44 val_44 443 val_443 +444 val_444 +446 val_446 +448 val_448 +449 val_449 +452 val_452 +453 val_453 454 val_454 454 val_454 454 val_454 @@ -1056,10 +916,51 @@ POSTHOOK: Input: default@dest_j1 454 val_454 454 val_454 454 val_454 +455 val_455 +457 val_457 458 val_458 458 val_458 458 val_458 458 val_458 +459 val_459 +459 val_459 +459 val_459 +459 val_459 +460 val_460 +462 val_462 +462 val_462 +462 val_462 +462 val_462 +463 val_463 +463 val_463 +463 val_463 +463 val_463 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +467 val_467 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 469 val_469 469 val_469 469 val_469 @@ -1086,11 +987,57 @@ POSTHOOK: Input: default@dest_j1 469 val_469 469 val_469 47 val_47 +470 val_470 472 val_472 +475 val_475 +477 val_477 +478 val_478 +478 val_478 +478 val_478 +478 val_478 +479 val_479 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +481 val_481 +482 val_482 483 val_483 +484 val_484 +485 val_485 487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 490 val_490 +491 val_491 +492 val_492 +492 val_492 +492 val_492 +492 val_492 +493 val_493 494 val_494 +495 val_495 +496 val_496 +497 val_497 498 val_498 498 val_498 498 val_498 @@ -1100,26 +1047,69 @@ POSTHOOK: Input: default@dest_j1 498 val_498 498 val_498 498 val_498 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +51 val_51 +51 val_51 +53 val_53 54 val_54 +57 val_57 58 val_58 58 val_58 58 val_58 58 val_58 +64 val_64 65 val_65 +66 val_66 +67 val_67 +67 val_67 +67 val_67 +67 val_67 69 val_69 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 72 val_72 72 val_72 72 val_72 72 val_72 +74 val_74 76 val_76 76 val_76 76 val_76 76 val_76 +77 val_77 +78 val_78 +8 val_8 +80 val_80 +82 val_82 83 val_83 83 val_83 83 val_83 83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +86 val_86 87 val_87 +9 val_9 90 val_90 90 val_90 90 val_90 @@ -1129,6 +1119,16 @@ POSTHOOK: Input: default@dest_j1 90 val_90 90 val_90 90 val_90 +92 val_92 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +97 val_97 +97 val_97 98 val_98 98 val_98 98 val_98 diff --git ql/src/test/results/clientpositive/ppd_multi_insert.q.out ql/src/test/results/clientpositive/ppd_multi_insert.q.out index b9394eb..b04fd0c 100644 --- ql/src/test/results/clientpositive/ppd_multi_insert.q.out +++ ql/src/test/results/clientpositive/ppd_multi_insert.q.out @@ -1,8 +1,12 @@ -PREHOOK: query: CREATE TABLE mi1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE mi1(key INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@mi1 -POSTHOOK: query: CREATE TABLE mi1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE mi1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@mi1 diff --git ql/src/test/results/clientpositive/ptf_streaming.q.out ql/src/test/results/clientpositive/ptf_streaming.q.out index cb94983..ef7ae88 100644 --- ql/src/test/results/clientpositive/ptf_streaming.q.out +++ ql/src/test/results/clientpositive/ptf_streaming.q.out @@ -4,7 +4,9 @@ PREHOOK: Output: noopstreaming POSTHOOK: query: create temporary function noopstreaming as 'org.apache.hadoop.hive.ql.udf.ptf.NoopStreaming$NoopStreamingResolver' POSTHOOK: type: CREATEFUNCTION POSTHOOK: Output: noopstreaming -PREHOOK: query: --1. test1 +PREHOOK: query: -- SORT_QUERY_RESULTS + +--1. test1 select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -16,7 +18,9 @@ from noopstreaming(on part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: --1. test1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +--1. test1 select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -118,20 +122,6 @@ order by p_name POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve -33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s -42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull -86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ 110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously 112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car @@ -142,10 +132,24 @@ POSTHOOK: Input: default@part 132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about 146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu 155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir 195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r diff --git ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out index 2165969..8a09449 100644 --- ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out +++ ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out @@ -155,14 +155,13 @@ POSTHOOK: query: -- number of rows explain select * from emp e join dept d on (e.deptid = d.deptid) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -173,13 +172,21 @@ STAGE PLANS: Filter Operator predicate: deptid is not null (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: deptname (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {lastname} {deptid} {locid} + 1 {deptname} + keys: + 0 deptid (type: int) + 1 deptid (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: e @@ -187,33 +194,32 @@ STAGE PLANS: Filter Operator predicate: deptid is not null (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: lastname (type: string), locid (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col6, _col7 - Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {lastname} {deptid} {locid} + 1 {deptid} {deptname} + keys: + 0 deptid (type: int) + 1 deptid (type: int) + outputColumnNames: _col0, _col1, _col2, _col6, _col7 + input vertices: + 1 Map 1 + Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -230,14 +236,13 @@ POSTHOOK: query: -- 2 relations, 2 attributes explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -248,12 +253,21 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and deptname is not null) (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), deptname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), deptname (type: string) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {lastname} {deptid} {locid} + 1 + keys: + 0 deptid (type: int), lastname (type: string) + 1 deptid (type: int), deptname (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: emp @@ -261,36 +275,35 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and lastname is not null) (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), lastname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), lastname (type: string) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: locid (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col2, _col6, _col7 - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((_col1 = _col6) and (_col0 = _col7)) (type: boolean) - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {lastname} {deptid} {locid} + 1 {deptid} {deptname} + keys: + 0 deptid (type: int), lastname (type: string) + 1 deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1, _col2, _col6, _col7 + input vertices: + 1 Map 1 + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col1 = _col6) and (_col0 = _col7)) (type: boolean) + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -303,14 +316,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -321,12 +333,21 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and deptname is not null) (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), deptname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), deptname (type: string) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {lastname} {deptid} {locid} + 1 + keys: + 0 deptid (type: int), lastname (type: string) + 1 deptid (type: int), deptname (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: e @@ -334,33 +355,32 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and lastname is not null) (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), lastname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), lastname (type: string) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: locid (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col2, _col6, _col7 - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {lastname} {deptid} {locid} + 1 {deptid} {deptname} + keys: + 0 deptid (type: int), lastname (type: string) + 1 deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1, _col2, _col6, _col7 + input vertices: + 1 Map 1 + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -377,14 +397,13 @@ POSTHOOK: query: -- 2 relations, 3 attributes explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -395,12 +414,21 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and deptname is not null) (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), deptname (type: string), deptname (type: string) - sort order: +++ - Map-reduce partition columns: deptid (type: int), deptname (type: string), deptname (type: string) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {lastname} {deptid} {locid} + 1 + keys: + 0 deptid (type: int), lastname (type: string), lastname (type: string) + 1 deptid (type: int), deptname (type: string), deptname (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: emp @@ -408,36 +436,35 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and lastname is not null) (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), lastname (type: string), lastname (type: string) - sort order: +++ - Map-reduce partition columns: deptid (type: int), lastname (type: string), lastname (type: string) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: locid (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col2, _col6, _col7 - Statistics: Num rows: 11 Data size: 2134 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (((_col1 = _col6) and (_col0 = _col7)) and (_col7 = _col0)) (type: boolean) - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {lastname} {deptid} {locid} + 1 {deptid} {deptname} + keys: + 0 deptid (type: int), lastname (type: string), lastname (type: string) + 1 deptid (type: int), deptname (type: string), deptname (type: string) + outputColumnNames: _col0, _col1, _col2, _col6, _col7 + input vertices: + 1 Map 1 + Statistics: Num rows: 11 Data size: 2134 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((_col1 = _col6) and (_col0 = _col7)) and (_col7 = _col0)) (type: boolean) + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -454,14 +481,13 @@ POSTHOOK: query: -- 3 relations, 1 attribute explain select * from emp e join dept d on (e.deptid = d.deptid) join emp e1 on (e.deptid = e1.deptid) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -472,13 +498,18 @@ STAGE PLANS: Filter Operator predicate: deptid is not null (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: deptname (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {lastname} {deptid} {locid} + 1 {deptname} + 2 {lastname} {deptid} {locid} + keys: + 0 deptid (type: int) + 1 deptid (type: int) + 2 deptid (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: e1 @@ -486,13 +517,23 @@ STAGE PLANS: Filter Operator predicate: deptid is not null (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: lastname (type: string), locid (type: int) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {lastname} {deptid} {locid} + 1 {deptid} {deptname} + 2 {lastname} {locid} + keys: + 0 deptid (type: int) + 1 deptid (type: int) + 2 deptid (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: e @@ -500,35 +541,36 @@ STAGE PLANS: Filter Operator predicate: deptid is not null (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: lastname (type: string), locid (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13 - Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {lastname} {deptid} {locid} + 1 {deptid} {deptname} + 2 {lastname} {deptid} {locid} + keys: + 0 deptid (type: int) + 1 deptid (type: int) + 2 deptid (type: int) + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13 + input vertices: + 1 Map 1 + 2 Map 2 + Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -543,14 +585,13 @@ POSTHOOK: query: -- Expected output rows: (48*6*8)/top2largest(3,7,7) = 47 explain select * from emp e join dept d on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -561,63 +602,79 @@ STAGE PLANS: Filter Operator predicate: deptid is not null (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: deptname (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {lastname} {deptid} {locid} + 1 {deptname} + 2 {state} {locid} {zip} {year} + keys: + 0 deptid (type: int) + 1 deptid (type: int) + 2 locid (type: int) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: e - Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: deptid is not null (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: lastname (type: string), locid (type: int) - Map 4 - Map Operator Tree: - TableScan alias: l Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: locid is not null (type: boolean) Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: locid (type: int) - sort order: + - Map-reduce partition columns: locid (type: int) - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: state (type: string), zip (type: bigint), year (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13, _col14 - Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: bigint), _col14 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Spark HashTable Sink Operator + condition expressions: + 0 {lastname} {deptid} {locid} + 1 {deptid} {deptname} + 2 {state} {zip} {year} + keys: + 0 deptid (type: int) + 1 deptid (type: int) + 2 locid (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {lastname} {deptid} {locid} + 1 {deptid} {deptname} + 2 {state} {locid} {zip} {year} + keys: + 0 deptid (type: int) + 1 deptid (type: int) + 2 locid (type: int) + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13, _col14 + input vertices: + 1 Map 1 + 2 Map 3 + Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: bigint), _col14 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -634,14 +691,13 @@ POSTHOOK: query: -- 3 relations and 2 attribute explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -652,62 +708,79 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and deptname is not null) (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), deptname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), deptname (type: string) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Spark HashTable Sink Operator + condition expressions: + 0 {lastname} {deptid} {locid} + 1 + 2 {state} {locid} {zip} {year} + keys: + 0 deptid (type: int), lastname (type: string) + 1 deptid (type: int), deptname (type: string) + 2 locid (type: int), state (type: string) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: e - Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), lastname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), lastname (type: string) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: locid (type: int) - Map 4 - Map Operator Tree: - TableScan alias: l Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid is not null and state is not null) (type: boolean) Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: locid (type: int), state (type: string) - sort order: ++ - Map-reduce partition columns: locid (type: int), state (type: string) - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: zip (type: bigint), year (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 2 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13, _col14 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: bigint), _col14 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Spark HashTable Sink Operator + condition expressions: + 0 {lastname} {deptid} {locid} + 1 {deptid} {deptname} + 2 {zip} {year} + keys: + 0 deptid (type: int), lastname (type: string) + 1 deptid (type: int), deptname (type: string) + 2 locid (type: int), state (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {lastname} {deptid} {locid} + 1 {deptid} {deptname} + 2 {state} {locid} {zip} {year} + keys: + 0 deptid (type: int), lastname (type: string) + 1 deptid (type: int), deptname (type: string) + 2 locid (type: int), state (type: string) + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13, _col14 + input vertices: + 1 Map 1 + 2 Map 3 + Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: bigint), _col14 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_join0.q.out ql/src/test/results/clientpositive/spark/auto_join0.q.out index 1ca917e..9ba25c8 100644 --- ql/src/test/results/clientpositive/spark/auto_join0.q.out +++ ql/src/test/results/clientpositive/spark/auto_join0.q.out @@ -21,19 +21,16 @@ SELECT src1.key as k1, src1.value as v1, ) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -45,11 +42,24 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src @@ -61,30 +71,31 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 @@ -98,7 +109,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git ql/src/test/results/clientpositive/spark/auto_join1.q.out ql/src/test/results/clientpositive/spark/auto_join1.q.out index ea4364a..8704532 100644 --- ql/src/test/results/clientpositive/spark/auto_join1.q.out +++ ql/src/test/results/clientpositive/spark/auto_join1.q.out @@ -15,16 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -35,13 +34,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -49,33 +56,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/auto_join10.q.out ql/src/test/results/clientpositive/spark/auto_join10.q.out index 1ffb8e0..1a2e009 100644 --- ql/src/test/results/clientpositive/spark/auto_join10.q.out +++ ql/src/test/results/clientpositive/spark/auto_join10.q.out @@ -15,18 +15,16 @@ ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src @@ -35,15 +33,26 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src @@ -52,40 +61,39 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/auto_join11.q.out ql/src/test/results/clientpositive/spark/auto_join11.q.out index 8299359..a6445bd 100644 --- ql/src/test/results/clientpositive/spark/auto_join11.q.out +++ ql/src/test/results/clientpositive/spark/auto_join11.q.out @@ -15,15 +15,13 @@ JOIN ON src1.c1 = src2.c3 AND src1.c1 < 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -38,13 +36,23 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src @@ -56,34 +64,34 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col3 + input vertices: + 1 Map 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/auto_join12.q.out ql/src/test/results/clientpositive/spark/auto_join12.q.out index 7a9610f..cb3cf6e 100644 --- ql/src/test/results/clientpositive/spark/auto_join12.q.out +++ ql/src/test/results/clientpositive/spark/auto_join12.q.out @@ -21,15 +21,13 @@ JOIN ON src1.c1 = src3.c5 AND src3.c5 < 80 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -44,71 +42,91 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 80) and (key < 100)) (type: boolean) + predicate: ((key < 100) and (key < 80)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 100) and (key < 80)) (type: boolean) + predicate: ((key < 80) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - 2 - outputColumnNames: _col0, _col3 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {_col0} + 1 {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col3 + input vertices: + 1 Map 1 + 2 Map 4 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/auto_join13.q.out ql/src/test/results/clientpositive/spark/auto_join13.q.out index a7f03e1..fecdde1 100644 --- ql/src/test/results/clientpositive/spark/auto_join13.q.out +++ ql/src/test/results/clientpositive/spark/auto_join13.q.out @@ -21,16 +21,13 @@ JOIN ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -45,90 +42,101 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 100) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 200) and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 6 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 (_col0 + _col2) (type: double) + 1 UDFToDouble(_col0) (type: double) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 200) and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 + _col2) is not null (type: boolean) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (_col0 + _col2) (type: double) - sort order: + - Map-reduce partition columns: (_col0 + _col2) (type: double) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col3 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3 + input vertices: + 1 Map 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col2) is not null (type: boolean) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 (_col0 + _col2) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col3} - 1 - outputColumnNames: _col0, _col3 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/auto_join14.q.out ql/src/test/results/clientpositive/spark/auto_join14.q.out index f8c2f71..7147151 100644 --- ql/src/test/results/clientpositive/spark/auto_join14.q.out +++ ql/src/test/results/clientpositive/spark/auto_join14.q.out @@ -19,16 +19,37 @@ FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 100) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -39,47 +60,33 @@ STAGE PLANS: Filter Operator predicate: ((key > 100) and key is not null) (type: boolean) Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 100) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col6 - Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/auto_join15.q.out ql/src/test/results/clientpositive/spark/auto_join15.q.out index 5202b2d..be44271 100644 --- ql/src/test/results/clientpositive/spark/auto_join15.q.out +++ ql/src/test/results/clientpositive/spark/auto_join15.q.out @@ -15,16 +15,13 @@ SORT BY k1, v1, k2, v2 ) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -35,13 +32,24 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -49,30 +57,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/auto_join16.q.out ql/src/test/results/clientpositive/spark/auto_join16.q.out index e5cecc4..563cf8f 100644 --- ql/src/test/results/clientpositive/spark/auto_join16.q.out +++ ql/src/test/results/clientpositive/spark/auto_join16.q.out @@ -15,15 +15,13 @@ ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) where tab.value < 200 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -38,12 +36,23 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {value} + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 key (type: string), value (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: tab @@ -51,34 +60,34 @@ STAGE PLANS: Filter Operator predicate: ((((key > 20) and value is not null) and key is not null) and (value < 200)) (type: boolean) Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey1} - outputColumnNames: _col0, _col3 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 key (type: string), value (type: string) + outputColumnNames: _col0, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/auto_join17.q.out ql/src/test/results/clientpositive/spark/auto_join17.q.out index 9d9915a..562ba8e 100644 --- ql/src/test/results/clientpositive/spark/auto_join17.q.out +++ ql/src/test/results/clientpositive/spark/auto_join17.q.out @@ -15,16 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -35,13 +34,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -49,34 +56,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col5) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col5) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/auto_join19.q.out ql/src/test/results/clientpositive/spark/auto_join19.q.out index f716db1..ab767b0 100644 --- ql/src/test/results/clientpositive/spark/auto_join19.q.out +++ ql/src/test/results/clientpositive/spark/auto_join19.q.out @@ -17,16 +17,15 @@ INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -37,13 +36,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -51,33 +58,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col8 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col8 + input vertices: + 1 Map 1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/auto_join2.q.out ql/src/test/results/clientpositive/spark/auto_join2.q.out index 72bd25e..4e5f6d8 100644 --- ql/src/test/results/clientpositive/spark/auto_join2.q.out +++ ql/src/test/results/clientpositive/spark/auto_join2.q.out @@ -15,17 +15,16 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-5 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -36,79 +35,93 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan - alias: src3 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col5 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 + _col5) is not null (type: boolean) - Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (_col0 + _col5) (type: double) - sort order: + - Map-reduce partition columns: (_col0 + _col5) (type: double) - Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col1} - outputColumnNames: _col0, _col11 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col11 + input vertices: + 0 Map 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/auto_join20.q.out ql/src/test/results/clientpositive/spark/auto_join20.q.out index 4dbf97a..8924053 100644 --- ql/src/test/results/clientpositive/spark/auto_join20.q.out +++ ql/src/test/results/clientpositive/spark/auto_join20.q.out @@ -15,30 +15,16 @@ SORT BY k1,v1,k2,v2,k3,v3 )a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -46,13 +32,22 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 5 Map Operator Tree: TableScan alias: src1 @@ -60,37 +55,66 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 - 2 {(KEY.reducesinkkey0 < 20)} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 5 + 1 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -105,7 +129,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -166,30 +190,16 @@ SORT BY k1,v1,k2,v2,k3,v3 )a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -197,13 +207,22 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) and (key < 15)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 5 Map Operator Tree: TableScan alias: src1 @@ -211,37 +230,66 @@ STAGE PLANS: Filter Operator predicate: ((key < 15) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 - 2 {(KEY.reducesinkkey0 < 20)} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 5 + 1 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -256,7 +304,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git ql/src/test/results/clientpositive/spark/auto_join21.q.out ql/src/test/results/clientpositive/spark/auto_join21.q.out index 1f2443b..620a949 100644 --- ql/src/test/results/clientpositive/spark/auto_join21.q.out +++ ql/src/test/results/clientpositive/spark/auto_join21.q.out @@ -5,15 +5,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -24,58 +22,84 @@ STAGE PLANS: Filter Operator predicate: (key > 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: src3 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 {(KEY.reducesinkkey0 < 10)} - 1 - 2 {(KEY.reducesinkkey0 < 10)} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Map Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 1 Map 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/auto_join22.q.out ql/src/test/results/clientpositive/spark/auto_join22.q.out index c6729c6..0b2c5b7 100644 --- ql/src/test/results/clientpositive/spark/auto_join22.q.out +++ ql/src/test/results/clientpositive/spark/auto_join22.q.out @@ -5,16 +5,13 @@ POSTHOOK: query: explain SELECT sum(hash(src5.src1_value)) FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -25,83 +22,94 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src4 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col3} + keys: + 0 key (type: string) + 1 _col2 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: src4 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {_col3} + keys: + 0 key (type: string) + 1 _col2 (type: string) + outputColumnNames: _col8 + input vertices: + 0 Map 4 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col8 (type: string) + outputColumnNames: _col3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {VALUE._col2} - outputColumnNames: _col8 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col8 (type: string) - outputColumnNames: _col3 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/auto_join23.q.out ql/src/test/results/clientpositive/spark/auto_join23.q.out index 4c167a4..e7571b4 100644 --- ql/src/test/results/clientpositive/spark/auto_join23.q.out +++ ql/src/test/results/clientpositive/spark/auto_join23.q.out @@ -5,15 +5,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -24,11 +22,23 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -36,28 +46,29 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/auto_join24.q.out ql/src/test/results/clientpositive/spark/auto_join24.q.out index f80a863..6263c9a 100644 --- ql/src/test/results/clientpositive/spark/auto_join24.q.out +++ ql/src/test/results/clientpositive/spark/auto_join24.q.out @@ -25,15 +25,13 @@ POSTHOOK: query: explain SELECT sum(a.cnt) FROM tst1 a JOIN tst1 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -44,12 +42,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 155 Data size: 743 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 155 Data size: 743 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {cnt} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -57,35 +66,34 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 155 Data size: 743 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 155 Data size: 743 Basic stats: COMPLETE Column stats: NONE - value expressions: cnt (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col1 - Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {cnt} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/auto_join26.q.out ql/src/test/results/clientpositive/spark/auto_join26.q.out index 05ac213..1a40b10 100644 --- ql/src/test/results/clientpositive/spark/auto_join26.q.out +++ ql/src/test/results/clientpositive/spark/auto_join26.q.out @@ -19,17 +19,39 @@ INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -40,51 +62,38 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0 + input vertices: + 0 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/spark/auto_join28.q.out ql/src/test/results/clientpositive/spark/auto_join28.q.out index f11e089..4505e3e 100644 --- ql/src/test/results/clientpositive/spark/auto_join28.q.out +++ ql/src/test/results/clientpositive/spark/auto_join28.q.out @@ -5,15 +5,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -24,58 +22,84 @@ STAGE PLANS: Filter Operator predicate: (key > 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: src3 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 {(KEY.reducesinkkey0 < 10)} - 1 - 2 {(KEY.reducesinkkey0 < 10)} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Map Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 1 Map 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -103,15 +127,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -122,13 +144,22 @@ STAGE PLANS: Filter Operator predicate: (key > 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: src3 @@ -136,48 +167,65 @@ STAGE PLANS: Filter Operator predicate: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 {(KEY.reducesinkkey0 < 10)} - 1 - 2 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -204,29 +252,16 @@ POSTHOOK: query: explain SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -234,13 +269,22 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: src1 @@ -248,37 +292,65 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 {(KEY.reducesinkkey0 > 10)} - 2 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 2 Map 3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -305,15 +377,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -321,61 +391,87 @@ STAGE PLANS: TableScan alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 {(KEY.reducesinkkey0 > 10)} - 2 {(KEY.reducesinkkey0 < 10)} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 1 Map 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/auto_join29.q.out ql/src/test/results/clientpositive/spark/auto_join29.q.out index 0e5c4ac..ea59646 100644 --- ql/src/test/results/clientpositive/spark/auto_join29.q.out +++ ql/src/test/results/clientpositive/spark/auto_join29.q.out @@ -5,15 +5,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -24,58 +22,84 @@ STAGE PLANS: Filter Operator predicate: (key > 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: src3 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 {(KEY.reducesinkkey0 < 10)} - 1 - 2 {(KEY.reducesinkkey0 < 10)} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Map Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 1 Map 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -611,15 +635,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -630,13 +652,22 @@ STAGE PLANS: Filter Operator predicate: (key > 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: src3 @@ -644,48 +675,65 @@ STAGE PLANS: Filter Operator predicate: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 {(KEY.reducesinkkey0 < 10)} - 1 - 2 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -1220,29 +1268,16 @@ POSTHOOK: query: explain SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -1250,13 +1285,22 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: src1 @@ -1264,37 +1308,65 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 {(KEY.reducesinkkey0 > 10)} - 2 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 2 Map 3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -1841,15 +1913,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1857,61 +1927,87 @@ STAGE PLANS: TableScan alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 {(KEY.reducesinkkey0 > 10)} - 2 {(KEY.reducesinkkey0 < 10)} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 1 Map 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -2459,15 +2555,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2478,13 +2572,18 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: src3 @@ -2492,13 +2591,25 @@ STAGE PLANS: Filter Operator predicate: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: src1 @@ -2506,33 +2617,34 @@ STAGE PLANS: Filter Operator predicate: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Map Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -2567,15 +2679,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2586,61 +2696,87 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 - 2 {(KEY.reducesinkkey0 < 10)} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 1 Map 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -3176,15 +3312,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3195,13 +3329,22 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: src3 @@ -3209,48 +3352,65 @@ STAGE PLANS: Filter Operator predicate: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 {(KEY.reducesinkkey0 < 10)} - 1 - 2 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Map Join Operator + condition map: + Left Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -3285,85 +3445,109 @@ POSTHOOK: query: explain SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: src3 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Map Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 2 Map 3 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 {(KEY.reducesinkkey0 > 10)} - 2 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -3419,15 +3603,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3438,13 +3620,18 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: src3 @@ -3452,13 +3639,25 @@ STAGE PLANS: Filter Operator predicate: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: src1 @@ -3466,33 +3665,34 @@ STAGE PLANS: Filter Operator predicate: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) diff --git ql/src/test/results/clientpositive/spark/auto_join3.q.out ql/src/test/results/clientpositive/spark/auto_join3.q.out index 1d7756a..515861a 100644 --- ql/src/test/results/clientpositive/spark/auto_join3.q.out +++ ql/src/test/results/clientpositive/spark/auto_join3.q.out @@ -15,16 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -35,12 +34,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: src3 @@ -48,13 +53,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: src1 @@ -62,35 +77,37 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - 2 {VALUE._col0} - outputColumnNames: _col0, _col11 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} + 1 + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col11 + input vertices: + 1 Map 1 + 2 Map 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/auto_join30.q.out ql/src/test/results/clientpositive/spark/auto_join30.q.out index 661f73e..23d88b5 100644 --- ql/src/test/results/clientpositive/spark/auto_join30.q.out +++ ql/src/test/results/clientpositive/spark/auto_join30.q.out @@ -15,20 +15,18 @@ ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 6 <- Map 5 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 4 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -37,14 +35,38 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 + value expressions: _col0 (type: string) + Reducer 5 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src @@ -53,49 +75,48 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col2, _col3 + input vertices: + 1 Reducer 5 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -113,18 +134,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -168,81 +177,102 @@ ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 6 <- Map 5 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 4 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + value expressions: _col0 (type: string) + Reducer 5 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col2, _col3 + input vertices: + 1 Reducer 5 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -260,18 +290,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -315,17 +333,15 @@ ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 6 <- Map 5 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -341,7 +357,30 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Reducer 2 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (SORT, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: src @@ -355,41 +394,41 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 4 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col2, _col3 + input vertices: + 0 Reducer 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -407,18 +446,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -468,18 +495,16 @@ ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 6 <- Map 5 (SORT, 1) - Reducer 8 <- Map 7 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 7 <- Map 6 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -499,7 +524,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 5 + Map 6 Map Operator Tree: TableScan alias: src @@ -508,14 +533,57 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 7 + value expressions: _col0 (type: string) + Reducer 2 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col0} {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Reducer 7 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (SORT, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: src @@ -524,51 +592,52 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 2 + Reducer 4 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) + expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 - outputColumnNames: _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 {_col0} {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col2, _col3 + input vertices: + 1 Reducer 7 + 2 Reducer 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -586,29 +655,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -664,18 +710,16 @@ ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 6 <- Map 5 (SORT, 1) - Reducer 8 <- Map 7 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 7 <- Map 6 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -692,70 +736,114 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 5 + Map 6 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 7 + value expressions: _col0 (type: string) + Reducer 2 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col0} {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Reducer 7 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (SORT, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 2 + Reducer 4 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) + expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join0 to 2 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 + 1 {_col0} {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col2, _col3 + input vertices: + 1 Reducer 7 + 2 Reducer 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -773,29 +861,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -851,18 +916,16 @@ ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 6 <- Map 5 (SORT, 1) - Reducer 8 <- Map 7 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 7 <- Map 6 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -879,70 +942,114 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 5 + Map 6 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 7 + value expressions: _col0 (type: string) + Reducer 2 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col0} {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Reducer 7 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (SORT, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 2 + Reducer 4 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) + expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join0 to 2 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) + Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 + 1 {_col0} {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col2, _col3 + input vertices: + 1 Reducer 7 + 2 Reducer 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -960,29 +1067,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -1038,48 +1122,88 @@ ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 6 <- Map 5 (SORT, 1) - Reducer 8 <- Map 7 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 4 (SORT, 1) + Reducer 7 <- Map 6 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Map 5 + Map 6 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 7 + value expressions: _col0 (type: string) + Reducer 5 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col0} {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Reducer 7 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src @@ -1094,42 +1218,44 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) + Map Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 + 1 {_col0} {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col2, _col3 + input vertices: + 0 Reducer 5 + 1 Reducer 7 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1147,29 +1273,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -1225,48 +1328,88 @@ ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 6 <- Map 5 (SORT, 1) - Reducer 8 <- Map 7 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 4 (SORT, 1) + Reducer 7 <- Map 6 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Map 5 + Map 6 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 7 + value expressions: _col0 (type: string) + Reducer 5 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col0} {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Reducer 7 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src @@ -1281,42 +1424,44 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) + Map Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 + 1 {_col0} {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col2, _col3 + input vertices: + 0 Reducer 5 + 1 Reducer 7 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1334,29 +1479,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_join31.q.out ql/src/test/results/clientpositive/spark/auto_join31.q.out index 5c3518f..8568b22 100644 --- ql/src/test/results/clientpositive/spark/auto_join31.q.out +++ ql/src/test/results/clientpositive/spark/auto_join31.q.out @@ -21,18 +21,16 @@ ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 6 <- Map 5 (SORT, 1) - Reducer 8 <- Map 7 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Map 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -49,7 +47,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 5 + Map 3 Map Operator Tree: TableScan alias: src @@ -62,7 +60,49 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 7 + Reducer 2 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col0} {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Reducer 4 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col0} {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-1 + Spark + Edges: + Reducer 6 <- Map 5 (SORT, 1) + Reducer 7 <- Reducer 6 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan alias: src @@ -76,43 +116,45 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 6 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) + Map Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 {_col0} {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col2, _col3 + input vertices: + 0 Reducer 4 + 2 Reducer 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -130,29 +172,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_join32.q.out ql/src/test/results/clientpositive/spark/auto_join32.q.out index a5f0d1f..4319d18 100644 --- ql/src/test/results/clientpositive/spark/auto_join32.q.out +++ ql/src/test/results/clientpositive/spark/auto_join32.q.out @@ -27,15 +27,13 @@ on (s.name = v.name) group by s.name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -46,13 +44,23 @@ STAGE PLANS: Filter Operator predicate: name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: name (type: string) - sort order: + - Map-reduce partition columns: name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: registration (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {name} + 1 {registration} + keys: + 0 name (type: string) + 1 name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: s @@ -60,36 +68,36 @@ STAGE PLANS: Filter Operator predicate: name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: name (type: string) - sort order: + - Map-reduce partition columns: name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col1} - outputColumnNames: _col0, _col8 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col8 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT _col8) - keys: _col0 (type: string), _col8 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {name} + 1 {registration} + keys: + 0 name (type: string) + 1 name (type: string) + outputColumnNames: _col0, _col8 + input vertices: + 1 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col8) + keys: _col0 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/auto_join4.q.out ql/src/test/results/clientpositive/spark/auto_join4.q.out index 20dff67..35c340e 100644 --- ql/src/test/results/clientpositive/spark/auto_join4.q.out +++ ql/src/test/results/clientpositive/spark/auto_join4.q.out @@ -37,76 +37,82 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((key > 15) and (key < 25)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 15) and (key < 25)) (type: boolean) + predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/auto_join5.q.out ql/src/test/results/clientpositive/spark/auto_join5.q.out index 5158229..0cd5f96 100644 --- ql/src/test/results/clientpositive/spark/auto_join5.q.out +++ ql/src/test/results/clientpositive/spark/auto_join5.q.out @@ -37,16 +37,15 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -61,13 +60,21 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src2 @@ -79,34 +86,33 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/auto_join8.q.out ql/src/test/results/clientpositive/spark/auto_join8.q.out index 183d24f..967bbd6 100644 --- ql/src/test/results/clientpositive/spark/auto_join8.q.out +++ ql/src/test/results/clientpositive/spark/auto_join8.q.out @@ -37,79 +37,85 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) + predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) + predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is null (type: boolean) - Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(null) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is null (type: boolean) + Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(null) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/auto_join9.q.out ql/src/test/results/clientpositive/spark/auto_join9.q.out index 7694ca3..657e8ed 100644 --- ql/src/test/results/clientpositive/spark/auto_join9.q.out +++ ql/src/test/results/clientpositive/spark/auto_join9.q.out @@ -15,16 +15,15 @@ FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -35,13 +34,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -49,33 +56,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col8 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col8 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out index 6f9e51a..9ff868b 100644 --- ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out @@ -168,17 +168,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -191,13 +188,16 @@ STAGE PLANS: isSamplingPred: false predicate: date is not null (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: date (type: string) - sort order: + - Map-reduce partition columns: date (type: string) - Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {dealid} {cityid} {userid} + 1 + keys: + 0 date (type: string) + 1 date (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -247,7 +247,7 @@ STAGE PLANS: name: default.orderpayment_small Truncated Path -> Alias: /orderpayment_small [dim_pay_date] - Map 6 + Map 2 Map Operator Tree: TableScan alias: deal @@ -257,13 +257,16 @@ STAGE PLANS: isSamplingPred: false predicate: dealid is not null (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: dealid (type: int) - sort order: + - Map-reduce partition columns: dealid (type: int) - Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {_col3} {_col4} {_col9} + 1 + keys: + 0 _col0 (type: int) + 1 dealid (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -313,7 +316,7 @@ STAGE PLANS: name: default.orderpayment_small Truncated Path -> Alias: /orderpayment_small [deal] - Map 7 + Map 3 Map Operator Tree: TableScan alias: order_city @@ -323,13 +326,16 @@ STAGE PLANS: isSamplingPred: false predicate: cityid is not null (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: cityid (type: int) - sort order: + - Map-reduce partition columns: cityid (type: int) - Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {_col4} {_col9} {_col16} + 1 + keys: + 0 _col3 (type: int) + 1 cityid (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -379,7 +385,12 @@ STAGE PLANS: name: default.orderpayment_small Truncated Path -> Alias: /orderpayment_small [order_city] - Map 8 + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: orderpayment @@ -389,14 +400,58 @@ STAGE PLANS: isSamplingPred: false predicate: (((date is not null and dealid is not null) and cityid is not null) and userid is not null) (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: date (type: string) - sort order: + - Map-reduce partition columns: date (type: string) - Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: dealid (type: int), cityid (type: int), userid (type: int) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {dealid} {cityid} {userid} + 1 {date} + keys: + 0 date (type: string) + 1 date (type: string) + outputColumnNames: _col0, _col3, _col4, _col9 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 1 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col3} {_col4} {_col9} + 1 {dealid} + keys: + 0 _col0 (type: int) + 1 dealid (type: int) + outputColumnNames: _col3, _col4, _col9, _col16 + input vertices: + 1 Map 2 + Position of Big Table: 0 + Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col4} {_col9} {_col16} + 1 + keys: + 0 _col3 (type: int) + 1 cityid (type: int) + outputColumnNames: _col4, _col9, _col16 + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 1 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col9} {_col16} + 1 + keys: + 0 _col4 (type: int) + 1 userid (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -446,7 +501,12 @@ STAGE PLANS: name: default.orderpayment_small Truncated Path -> Alias: /orderpayment_small [orderpayment] - Map 9 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan alias: user @@ -456,13 +516,50 @@ STAGE PLANS: isSamplingPred: false predicate: userid is not null (type: boolean) Statistics: Num rows: 50 Data size: 144 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: userid (type: int) - sort order: + - Map-reduce partition columns: userid (type: int) - Statistics: Num rows: 50 Data size: 144 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col9} {_col16} + 1 + keys: + 0 _col4 (type: int) + 1 userid (type: int) + outputColumnNames: _col9, _col16 + input vertices: + 0 Map 4 + Position of Big Table: 1 + Statistics: Num rows: 55 Data size: 158 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col9 (type: string), _col16 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 158 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -512,102 +609,6 @@ STAGE PLANS: name: default.user_small Truncated Path -> Alias: /user_small [user] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col2} {VALUE._col3} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col3, _col4, _col9 - Statistics: Num rows: 1 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 39 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col3 (type: int), _col4 (type: int), _col9 (type: string) - auto parallelism: false - Reducer 3 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col2} {VALUE._col3} {VALUE._col8} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col3, _col4, _col9, _col16 - Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col4 (type: int), _col9 (type: string), _col16 (type: int) - auto parallelism: false - Reducer 4 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col3} {VALUE._col8} {VALUE._col15} - 1 - outputColumnNames: _col4, _col9, _col16 - Statistics: Num rows: 1 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 1 Data size: 46 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col9 (type: string), _col16 (type: int) - auto parallelism: false - Reducer 5 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col8} {VALUE._col15} - 1 - outputColumnNames: _col9, _col16 - Statistics: Num rows: 55 Data size: 158 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col9 (type: string), _col16 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 158 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out index aefd84e..0c7e0e0 100644 --- ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out @@ -949,18 +949,16 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -972,12 +970,23 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: a @@ -989,33 +998,33 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out index 9540ccb..32b2f32 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out @@ -219,16 +219,15 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -256,24 +255,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -285,31 +269,57 @@ STAGE PLANS: expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 0 Reducer 2 + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 4 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out index 46c1e36..72bb54d 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out @@ -138,34 +138,35 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -178,51 +179,101 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 4 @@ -263,102 +314,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -460,34 +466,35 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -500,51 +507,101 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 4 @@ -585,102 +642,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -781,34 +793,35 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -821,51 +834,101 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 4 @@ -906,102 +969,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -1117,34 +1135,37 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1243,25 +1264,30 @@ STAGE PLANS: name: default.bucket_big name: default.bucket_big Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] + /bucket_big/ds=2008-04-08 [c] + /bucket_big/ds=2008-04-09 [c] Map 4 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - tag: 2 - auto parallelism: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1274,51 +1300,105 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + input vertices: + 0 Map 4 + 2 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 4 @@ -1359,104 +1439,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [c] - /bucket_big/ds=2008-04-09 [c] - Map 5 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 - 1 - 2 - Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out index f739979..933a562 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out @@ -203,16 +203,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -221,11 +218,16 @@ STAGE PLANS: alias: d Statistics: Num rows: 0 Data size: 170 Basic stats: PARTIAL Column stats: NONE GatherStats: false - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 170 Basic stats: PARTIAL Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 + 1 + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -279,7 +281,7 @@ STAGE PLANS: name: default.bucket_medium Truncated Path -> Alias: /bucket_medium/ds=2008-04-08 [d] - Map 4 + Map 2 Map Operator Tree: TableScan alias: b @@ -289,13 +291,18 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Position of Big Table: 2 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -349,23 +356,28 @@ STAGE PLANS: name: default.bucket_medium Truncated Path -> Alias: /bucket_medium/ds=2008-04-08 [b] - Map 6 + Map 5 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - tag: 2 - auto parallelism: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Position of Big Table: 2 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -378,22 +390,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -401,29 +413,96 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + input vertices: + 0 Map 5 + 1 Map 2 + Position of Big Table: 2 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 + 1 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 139 Data size: 14064 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 139 Data size: 14064 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 4 @@ -465,54 +544,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [c] - /bucket_big/ds=2008-04-09 [c] - Map 7 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -520,48 +576,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 139 Data size: 14064 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 139 Data size: 14064 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 + /bucket_big/ds=2008-04-08 [c] + /bucket_big/ds=2008-04-09 [c] + Reducer 4 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -594,23 +628,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 5 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 - 1 - 2 - Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out index 08d116f..bf44473 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out @@ -1054,18 +1054,16 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -1077,12 +1075,23 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: a @@ -1094,33 +1103,33 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out index ce121ac..4eebc90 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out @@ -104,15 +104,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -125,13 +123,16 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -185,7 +186,14 @@ STAGE PLANS: name: default.table2 Truncated Path -> Alias: /table2 [b] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -195,13 +203,34 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + input vertices: + 1 Map 1 + Position of Big Table: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - tag: 0 - auto parallelism: false + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -255,29 +284,6 @@ STAGE PLANS: name: default.table1 Truncated Path -> Alias: /table1 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out index 8c12ce7..b7a5752 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out @@ -104,15 +104,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -125,13 +123,16 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -185,7 +186,14 @@ STAGE PLANS: name: default.table2 Truncated Path -> Alias: /table2 [b] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -195,13 +203,34 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + input vertices: + 1 Map 1 + Position of Big Table: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - tag: 0 - auto parallelism: false + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -255,29 +284,6 @@ STAGE PLANS: name: default.table1 Truncated Path -> Alias: /table1 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out index 91a9bbf..1e7d2a2 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -117,14 +117,35 @@ select a.key, a.value, b.value from tab a join tab_part b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -135,47 +156,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col7 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col7 + input vertices: + 0 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -198,32 +204,18 @@ from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbuc join tab b on a.k1 = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: srcbucket_mapjoin @@ -247,28 +239,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -280,12 +253,52 @@ STAGE PLANS: expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col1 (type: int) + 1 key (type: int) + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col1 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 3 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -304,16 +317,14 @@ from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on join tab b on a.k1 = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Reducer 4 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -324,13 +335,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col1 (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: tab_part @@ -338,73 +359,40 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan - alias: tab - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col6, _col7 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: string) - outputColumnNames: _col6, _col7 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(_col7, 5)) - keys: _col6 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col6, _col7 + input vertices: + 1 Map 4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reducer 5 + Select Operator + expressions: _col6 (type: int), _col7 (type: string) + outputColumnNames: _col6, _col7 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(substr(_col7, 5)) + keys: _col6 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Local Work: + Map Reduce Local Work + Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -416,12 +404,52 @@ STAGE PLANS: expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col1 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 1 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: tab + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -440,107 +468,82 @@ from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on join tab_part b on a.k1 = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Reducer 4 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan - alias: x + alias: y Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: y + alias: x Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(_col1, 5)) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 4 Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reducer 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(substr(_col1, 5)) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Local Work: + Map Reduce Local Work + Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -552,12 +555,52 @@ STAGE PLANS: expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 66 Data size: 700 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 66 Data size: 700 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col1 (type: int) + 1 key (type: int) + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col1 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -576,14 +619,13 @@ select a.key, a.value, b.value from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -594,13 +636,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c @@ -608,12 +655,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a @@ -621,35 +679,36 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - 2 - outputColumnNames: _col0, _col1, _col7 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} {value} + 1 {value} + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col7 + input vertices: + 1 Map 1 + 2 Map 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -666,15 +725,13 @@ select a.key, a.value, c.value from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -685,80 +742,88 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col0 (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -779,32 +844,18 @@ from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket join tab_part b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: srcbucket_mapjoin @@ -828,28 +879,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -861,12 +893,52 @@ STAGE PLANS: expressions: _col0 (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {value} + keys: + 0 _col0 (type: int) + 1 key (type: int) + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -885,32 +957,18 @@ from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket join tab_part b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: srcbucket_mapjoin @@ -928,28 +986,9 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE value expressions: substr(value, 5) (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -961,12 +1000,52 @@ STAGE PLANS: expressions: _col0 (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {value} + keys: + 0 _col0 (type: int) + 1 key (type: int) + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -985,14 +1064,35 @@ select a.key, a.value, b.value from tab a join tab_part b on a.value = b.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 value (type: string) + 1 value (type: string) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1003,46 +1103,32 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col7 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 value (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col7 + input vertices: + 0 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -1081,14 +1167,35 @@ select a.key, a.value, b.value from tab1 a join tab_part b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1099,47 +1206,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -1152,15 +1244,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1171,76 +1261,84 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {key} + keys: + 0 _col1 (type: string) + 1 value (type: string) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: c + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col12 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col12 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {key} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col12 + input vertices: + 1 Map 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col12 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out index b38e660..de5aca4 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out @@ -113,15 +113,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -132,76 +130,84 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {key} + keys: + 0 _col1 (type: string) + 1 value (type: string) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: c + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col12 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col12 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {key} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col12 + input vertices: + 1 Map 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col12 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -240,14 +246,35 @@ select a.key, a.value, b.value from tab1 a join src b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(key) (type: double) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -258,47 +285,32 @@ STAGE PLANS: Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col1} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -313,71 +325,79 @@ POSTHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 1) (type: boolean) + predicate: (key > 2) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 2) (type: boolean) + predicate: (key > 1) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {_col0} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -392,71 +412,79 @@ POSTHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 1) (type: boolean) + predicate: (key > 2) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 2) (type: boolean) + predicate: (key > 1) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} + 1 {_col0} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -471,14 +499,13 @@ POSTHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -493,12 +520,21 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col0} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: tab_part @@ -510,32 +546,32 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {_col0} + 1 {_col0} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -548,31 +584,18 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 2 Map Operator Tree: TableScan alias: tab @@ -594,28 +617,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -626,11 +630,52 @@ STAGE PLANS: expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {key} + keys: + 0 _col0 (type: int) + 1 key (type: int) + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {key} + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 0 Reducer 3 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -643,32 +688,18 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: tab @@ -690,28 +721,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col1} - outputColumnNames: _col0, _col2 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -722,12 +734,52 @@ STAGE PLANS: expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(key) (type: double) + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col2 + input vertices: + 0 Reducer 3 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out index f632a0e..737a73f 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out @@ -98,47 +98,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Map 2 <- Map 1 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 - Map 3 - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types int:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Map 2 Stage: Stage-0 Fetch Operator @@ -234,47 +198,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Map 2 <- Map 1 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 - Map 3 - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types int:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Map 2 Stage: Stage-0 Fetch Operator @@ -456,194 +384,197 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + COLUMN_STATS_ACCURATE true + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - partition_columns ds - partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part - name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin Truncated Path -> Alias: - /srcbucket_mapjoin_part/ds=2008-04-08 [b] - Map 3 + /srcbucket_mapjoin [a] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: srcbucket_mapjoin + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin - name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part Truncated Path -> Alias: - /srcbucket_mapjoin [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + /srcbucket_mapjoin_part/ds=2008-04-08 [b] Stage: Stage-2 Dependency Collection @@ -845,199 +776,202 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + COLUMN_STATS_ACCURATE true + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - partition_columns ds - partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part - name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin Truncated Path -> Alias: - /srcbucket_mapjoin_part/ds=2008-04-08 [b] - Map 3 + /srcbucket_mapjoin [a] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows 464 + rawDataSize 8519 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8983 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: srcbucket_mapjoin + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin - name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part Truncated Path -> Alias: - /srcbucket_mapjoin [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 464 - rawDataSize 8519 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + /srcbucket_mapjoin_part/ds=2008-04-08 [b] Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out index dbdf2b2..a392c68 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out @@ -161,15 +161,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -182,13 +180,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -240,7 +241,14 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/part=1 [b] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -250,13 +258,34 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -309,29 +338,6 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -457,15 +463,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -478,13 +482,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -536,7 +543,14 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_3 Truncated Path -> Alias: /srcbucket_mapjoin_part_3/part=1 [b] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -546,13 +560,34 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -605,29 +640,6 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out index ebf1766..eeb78b8 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out @@ -120,15 +120,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -141,13 +139,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -200,7 +201,14 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/part=1 [b] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -210,13 +218,34 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -317,29 +346,6 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] /srcbucket_mapjoin_part_1/part=2 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -460,15 +466,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -481,13 +485,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -540,7 +547,14 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/part=1 [b] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -550,13 +564,34 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -609,29 +644,6 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=2 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -751,15 +763,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -772,13 +782,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -831,7 +844,14 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/part=1 [b] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -841,13 +861,34 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -900,29 +941,6 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=2 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -1044,15 +1062,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1065,13 +1081,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1124,7 +1143,14 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/part=1 [b] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -1134,13 +1160,34 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1193,29 +1240,6 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=2 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out index a771a56..f78959b 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out @@ -158,16 +158,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -180,14 +179,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -240,7 +241,12 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -250,14 +256,52 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col7 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -310,47 +354,6 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col7 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -556,16 +559,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -578,14 +580,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -638,7 +642,12 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -648,14 +657,57 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col7 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows 564 + rawDataSize 10503 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -708,52 +760,6 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col7 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 564 - rawDataSize 10503 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11067 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -976,36 +982,37 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1018,51 +1025,122 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 2 + name default.srcbucket_mapjoin_part + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 3062 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [a] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col7 + input vertices: + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows 564 + rawDataSize 10503 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -1103,125 +1181,56 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_2 name: default.srcbucket_mapjoin_part_2 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] - /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - numFiles 4 + name default.srcbucket_mapjoin_part_2 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 3062 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part + name default.srcbucket_mapjoin_part_2 partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part - name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col7 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 564 - rawDataSize 10503 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11067 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out index 520eb47..9d40b9d 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out @@ -189,36 +189,37 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -231,64 +232,107 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - numFiles 4 + name default.srcbucket_mapjoin_part_2 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 3062 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part + name default.srcbucket_mapjoin_part_2 partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part - name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part/ds=2008-04-08 [b] - Map 3 + /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col7 + input vertices: + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -301,87 +345,46 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 2 + name default.srcbucket_mapjoin_part + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 3062 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col7 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + /srcbucket_mapjoin_part/ds=2008-04-08 [b] Stage: Stage-2 Dependency Collection @@ -594,36 +597,37 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -636,64 +640,112 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - numFiles 4 + name default.srcbucket_mapjoin_part_2 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 3062 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part + name default.srcbucket_mapjoin_part_2 partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part - name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part/ds=2008-04-08 [b] - Map 3 + /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col7 + input vertices: + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows 564 + rawDataSize 10503 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -706,92 +758,46 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 2 + name default.srcbucket_mapjoin_part + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 3062 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col7 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 564 - rawDataSize 10503 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11067 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + /srcbucket_mapjoin_part/ds=2008-04-08 [b] Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out index c7ed4d5..ddaa316 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out @@ -175,16 +175,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -197,14 +196,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -252,7 +253,12 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [b] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -262,14 +268,52 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -317,47 +361,6 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -544,16 +547,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -566,14 +568,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -621,7 +625,12 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [b] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -631,14 +640,57 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 2 + numRows 464 + rawDataSize 8519 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8983 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -686,52 +738,6 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 464 - rawDataSize 8519 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -752,7 +758,7 @@ STAGE PLANS: columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result - numFiles 1 + numFiles 2 numRows 464 rawDataSize 8519 serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out index 57b1fc7..d744755 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out @@ -225,93 +225,155 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 110 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + COLUMN_STATS_ACCURATE true + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - partition_columns ds - partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part - name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin + Truncated Path -> Alias: + /srcbucket_mapjoin [a] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 110 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 4 @@ -352,115 +414,56 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part name: default.srcbucket_mapjoin_part - Truncated Path -> Alias: - /srcbucket_mapjoin_part/ds=2008-04-08 [b] - /srcbucket_mapjoin_part/ds=2008-04-09 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: srcbucket_mapjoin + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin - name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part Truncated Path -> Alias: - /srcbucket_mapjoin [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + /srcbucket_mapjoin_part/ds=2008-04-08 [b] + /srcbucket_mapjoin_part/ds=2008-04-09 [b] Stage: Stage-2 Dependency Collection @@ -659,46 +662,45 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -707,45 +709,113 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 3062 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - partition_columns ds - partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin + Truncated Path -> Alias: + /srcbucket_mapjoin [a] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows 928 + rawDataSize 17038 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 17966 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -786,35 +856,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_2 name: default.srcbucket_mapjoin_part_2 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] - /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: srcbucket_mapjoin + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -823,83 +871,41 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin + name default.srcbucket_mapjoin_part_2 numFiles 2 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 3062 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin - name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 928 - rawDataSize 17038 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 17966 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out index 2db3388..b8ebfc8 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out @@ -127,14 +127,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -147,14 +146,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -208,7 +209,12 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08/hr=0 [b] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -218,13 +224,50 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col8 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -278,45 +321,6 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/ds=2008-04-08/hr=0 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col8 - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types int:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -342,4 +346,4 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 POSTHOOK: Input: default@srcbucket_mapjoin_part_2 POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 #### A masked pattern was here #### -0 val_0 +165 val_165 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out index 93c27cb..3f6f93f 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out @@ -126,15 +126,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -147,13 +145,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -206,7 +207,14 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/part=1 [b] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -216,13 +224,34 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -275,29 +304,6 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -431,15 +437,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -452,13 +456,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -511,7 +518,14 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/part=1 [b] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -521,13 +535,34 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -580,29 +615,6 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out index 77d9fb7..6da94a5 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out @@ -134,34 +134,35 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1050 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 2100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 525 Data size: 2100 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -174,22 +175,22 @@ STAGE PLANS: part 1 properties: COLUMN_STATS_ACCURATE true - bucket_count 3 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 3 + name default.srcbucket_mapjoin_part_1 + numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 4200 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -202,35 +203,63 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/part=1 [b] - Map 4 + /srcbucket_mapjoin_part_1/part=1 [a] + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1050 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Statistics: Num rows: 525 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 577 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 577 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -243,22 +272,22 @@ STAGE PLANS: part 1 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 3 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 - numFiles 2 + name default.srcbucket_mapjoin_part_2 + numFiles 3 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -271,42 +300,19 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_2/part=1 [b] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 577 Data size: 2310 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 577 Data size: 2310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -472,15 +478,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -493,13 +497,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -552,7 +559,14 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/part=1 [b] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -562,13 +576,34 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -621,29 +656,6 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out index 20e87d8..b765aaa 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out @@ -133,194 +133,197 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 3 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - numFiles 3 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 4200 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 3 + COLUMN_STATS_ACCURATE true + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - partition_columns ds - partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part - name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin Truncated Path -> Alias: - /srcbucket_mapjoin_part/ds=2008-04-08 [b] - Map 3 + /srcbucket_mapjoin [a] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: srcbucket_mapjoin + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 3 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + name default.srcbucket_mapjoin_part + numFiles 3 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 3 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin - name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part Truncated Path -> Alias: - /srcbucket_mapjoin [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + /srcbucket_mapjoin_part/ds=2008-04-08 [b] Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out index 1d6b68a..3fa3f8f 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out @@ -135,46 +135,45 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -183,45 +182,108 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 3062 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - partition_columns ds - partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin + Truncated Path -> Alias: + /srcbucket_mapjoin [a] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -262,35 +324,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_2 name: default.srcbucket_mapjoin_part_2 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] - /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: srcbucket_mapjoin + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -299,78 +339,41 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin + name default.srcbucket_mapjoin_part_2 numFiles 2 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 3062 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin - name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out index 9b7cc22..569e92a 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out @@ -195,14 +195,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -215,13 +214,16 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -271,7 +273,12 @@ STAGE PLANS: name: default.test1 Truncated Path -> Alias: /test1 [r] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: l @@ -281,13 +288,47 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -337,42 +378,6 @@ STAGE PLANS: name: default.test1 Truncated Path -> Alias: /test1 [l] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -431,14 +436,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -451,13 +455,16 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -507,7 +514,12 @@ STAGE PLANS: name: default.test2 Truncated Path -> Alias: /test2 [r] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: l @@ -517,13 +529,47 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -573,42 +619,6 @@ STAGE PLANS: name: default.test2 Truncated Path -> Alias: /test2 [l] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -664,14 +674,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -684,14 +693,16 @@ STAGE PLANS: isSamplingPred: false predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 2200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 11 Data size: 2200 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: key (type: string), value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 (key + key) (type: double) + 1 UDFToDouble(key) (type: double) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -741,7 +752,12 @@ STAGE PLANS: name: default.test1 Truncated Path -> Alias: /test1 [r] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: l @@ -751,14 +767,47 @@ STAGE PLANS: isSamplingPred: false predicate: (key + key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 2200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (key + key) (type: double) - sort order: + - Map-reduce partition columns: (key + key) (type: double) - Statistics: Num rows: 11 Data size: 2200 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: key (type: string), value (type: string) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 (key + key) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -808,42 +857,6 @@ STAGE PLANS: name: default.test1 Truncated Path -> Alias: /test1 [l] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -902,14 +915,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -922,13 +934,16 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -978,7 +993,12 @@ STAGE PLANS: name: default.test2 Truncated Path -> Alias: /test2 [r] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: l @@ -988,13 +1008,47 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1044,42 +1098,6 @@ STAGE PLANS: name: default.test1 Truncated Path -> Alias: /test1 [l] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1138,14 +1156,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1158,13 +1175,16 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1214,7 +1234,12 @@ STAGE PLANS: name: default.test3 Truncated Path -> Alias: /test3 [r] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: l @@ -1224,13 +1249,47 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1280,42 +1339,6 @@ STAGE PLANS: name: default.test1 Truncated Path -> Alias: /test1 [l] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1374,14 +1397,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1394,13 +1416,16 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1450,7 +1475,12 @@ STAGE PLANS: name: default.test4 Truncated Path -> Alias: /test4 [r] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: l @@ -1460,13 +1490,47 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1516,42 +1580,6 @@ STAGE PLANS: name: default.test1 Truncated Path -> Alias: /test1 [l] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1610,14 +1638,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1630,13 +1657,16 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1686,7 +1716,12 @@ STAGE PLANS: name: default.test3 Truncated Path -> Alias: /test3 [r] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: l @@ -1696,13 +1731,47 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1752,42 +1821,6 @@ STAGE PLANS: name: default.test2 Truncated Path -> Alias: /test2 [l] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1846,14 +1879,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1866,13 +1898,16 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1922,7 +1957,12 @@ STAGE PLANS: name: default.test4 Truncated Path -> Alias: /test4 [r] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: l @@ -1932,13 +1972,47 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1988,42 +2062,6 @@ STAGE PLANS: name: default.test2 Truncated Path -> Alias: /test2 [l] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -2082,14 +2120,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2102,13 +2139,16 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2158,7 +2198,12 @@ STAGE PLANS: name: default.test4 Truncated Path -> Alias: /test4 [r] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: l @@ -2168,13 +2213,47 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2224,42 +2303,6 @@ STAGE PLANS: name: default.test3 Truncated Path -> Alias: /test3 [l] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/column_access_stats.q.out ql/src/test/results/clientpositive/spark/column_access_stats.q.out index 6b011fc..5090873 100644 --- ql/src/test/results/clientpositive/spark/column_access_stats.q.out +++ ql/src/test/results/clientpositive/spark/column_access_stats.q.out @@ -372,14 +372,13 @@ FROM T1 JOIN T2 ON T1.key = T2.key PREHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -390,12 +389,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: t1 @@ -403,32 +411,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -495,14 +503,13 @@ FROM T1 JOIN T2 ON T1.key = T2.key AND T1.val = 3 and T2.val = 3 PREHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -513,12 +520,21 @@ STAGE PLANS: Filter Operator predicate: ((val = 3) and key is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: t1 @@ -526,32 +542,32 @@ STAGE PLANS: Filter Operator predicate: ((val = 3) and key is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col5 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), '3' (type: string), _col5 (type: string), '3' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), '3' (type: string), _col5 (type: string), '3' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -585,14 +601,13 @@ JOIN ON subq1.val = subq2.val PREHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -607,12 +622,21 @@ STAGE PLANS: expressions: val (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: t1 @@ -624,32 +648,32 @@ STAGE PLANS: expressions: val (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -697,15 +721,13 @@ JOIN T3 ON T3.key = T4.key PREHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -716,13 +738,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {val} + keys: + 0 _col0 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: t2 @@ -734,12 +759,21 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: t1 @@ -751,51 +785,49 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {key} {val} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/cross_join.q.out ql/src/test/results/clientpositive/spark/cross_join.q.out index 4e28710..5886e8c 100644 --- ql/src/test/results/clientpositive/spark/cross_join.q.out +++ ql/src/test/results/clientpositive/spark/cross_join.q.out @@ -5,14 +5,13 @@ POSTHOOK: query: -- current explain select src.key from src join src src2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -20,39 +19,51 @@ STAGE PLANS: TableScan alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 + 1 + outputColumnNames: _col0 + input vertices: + 1 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -67,14 +78,13 @@ POSTHOOK: query: -- ansi cross join explain select src.key from src cross join src src2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -82,39 +92,51 @@ STAGE PLANS: TableScan alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 + 1 + outputColumnNames: _col0 + input vertices: + 1 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -129,14 +151,13 @@ POSTHOOK: query: -- appending condition is allowed explain select src.key from src cross join src src2 on src.key=src2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -147,12 +168,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src @@ -160,32 +190,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out index df3884f..9c3ac05 100644 --- ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out +++ ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out @@ -135,34 +135,35 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: f + alias: g Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((value <> '') and value is not null) (type: boolean) Statistics: Num rows: 13 Data size: 109 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 13 Data size: 109 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col7 (type: string) + 1 value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -212,24 +213,27 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [f] - Map 4 + /filter_join_breaktask/ds=2008-04-08 [g] + Map 3 Map Operator Tree: TableScan - alias: g + alias: m Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((value <> '') and value is not null) (type: boolean) - Statistics: Num rows: 13 Data size: 109 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 13 Data size: 109 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + predicate: ((key is not null and value is not null) and (value <> '')) (type: boolean) + Statistics: Num rows: 7 Data size: 59 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -279,25 +283,77 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [g] - Map 5 + /filter_join_breaktask/ds=2008-04-08 [m] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: m + alias: f Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key is not null and value is not null) and (value <> '')) (type: boolean) - Statistics: Num rows: 7 Data size: 59 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 7 Data size: 59 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 109 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col7 + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 14 Data size: 119 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col7 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col13 + input vertices: + 1 Map 2 + Position of Big Table: 0 + Statistics: Num rows: 15 Data size: 130 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col13 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 130 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 15 Data size: 130 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -347,62 +403,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [m] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col7 - Statistics: Num rows: 14 Data size: 119 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col7 (type: string) - sort order: + - Map-reduce partition columns: _col7 (type: string) - Statistics: Num rows: 14 Data size: 119 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: int) - auto parallelism: false - Reducer 3 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col13 - Statistics: Num rows: 15 Data size: 130 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col13 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 130 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 15 Data size: 130 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types int:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + /filter_join_breaktask/ds=2008-04-08 [f] Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/groupby_position.q.out ql/src/test/results/clientpositive/spark/groupby_position.q.out index 547eb1f..5d58851 100644 --- ql/src/test/results/clientpositive/spark/groupby_position.q.out +++ ql/src/test/results/clientpositive/spark/groupby_position.q.out @@ -569,65 +569,93 @@ FROM ( ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (GROUP, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 5 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) + predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), value (type: string), substr(value, 5) (type: string) + keys: key (type: string), value (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Map 5 + Reducer 5 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) + predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: key (type: string), value (type: string) + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), value (type: string), substr(value, 5) (type: string) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) @@ -639,31 +667,28 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: --++ + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + input vertices: + 1 Reducer 5 Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: --++ + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) @@ -676,23 +701,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out index 5a4894f..20229f7 100644 --- ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out +++ ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out @@ -2764,19 +2764,18 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: t1 @@ -2800,14 +2799,16 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2861,7 +2862,12 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: t1 @@ -2885,14 +2891,57 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 2 + Position of Big Table: 0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: bigint) - auto parallelism: false + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 2 + numRows 10 + rawDataSize 32 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 42 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2946,52 +2995,6 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 2 - numRows 10 - rawDataSize 32 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 42 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -3149,18 +3152,18 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: t1 @@ -3171,27 +3174,23 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: key + expressions: key (type: string), val (type: string) + outputColumnNames: key, val Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false + keys: key (type: string), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3245,7 +3244,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 3 + Reducer 3 + Local Work: + Map Reduce Local Work + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: t1 @@ -3256,23 +3283,60 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), val (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Reducer 3 + Position of Big Table: 0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types string:bigint:string:string:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3326,63 +3390,6 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4 - columns.types string:bigint:string:string:bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: bigint) - auto parallelism: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out index 1d43b53..75ec696 100644 --- ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out +++ ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out @@ -2854,19 +2854,18 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: t1 @@ -2890,14 +2889,16 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2951,7 +2952,12 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: t1 @@ -2975,14 +2981,57 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 2 + Position of Big Table: 0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: bigint) - auto parallelism: false + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 2 + numRows 10 + rawDataSize 32 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 42 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3036,52 +3085,6 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 2 - numRows 10 - rawDataSize 32 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 42 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -3239,19 +3242,19 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 5 <- Reducer 4 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: t1 @@ -3262,27 +3265,23 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: key + expressions: key (type: string), val (type: string) + outputColumnNames: key, val Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false + keys: key (type: string), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3336,7 +3335,52 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 3 + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: partials + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: bigint) + auto parallelism: false + Reducer 4 + Local Work: + Map Reduce Local Work + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: t1 @@ -3347,23 +3391,60 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), val (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Reducer 4 + Position of Big Table: 0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types string:bigint:string:string:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3417,80 +3498,6 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4 - columns.types string:bigint:string:string:bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: partials - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false - Reducer 5 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: bigint) - auto parallelism: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out index 5cdd660..8f2a784 100644 --- ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out +++ ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out @@ -9,14 +9,13 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -27,13 +26,21 @@ STAGE PLANS: Filter Operator predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {key} + keys: + 0 value (type: string) + 1 value (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -41,33 +48,32 @@ STAGE PLANS: Filter Operator predicate: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col5 - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 value (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 1 + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -119,14 +125,13 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -138,13 +143,21 @@ STAGE PLANS: Filter Operator predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {key} + keys: + 0 value (type: string) + 1 value (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -153,33 +166,32 @@ STAGE PLANS: Filter Operator predicate: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col5 - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 value (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 1 + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out index cd6779d..53e0a7a 100644 --- ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out +++ ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out @@ -112,10 +112,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 1 - numRows 1028 - rawDataSize 10968 - totalSize 11996 + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/spark/innerjoin.q.out ql/src/test/results/clientpositive/spark/innerjoin.q.out index 798ca32..0edfb54 100644 --- ql/src/test/results/clientpositive/spark/innerjoin.q.out +++ ql/src/test/results/clientpositive/spark/innerjoin.q.out @@ -19,16 +19,15 @@ FROM src src1 INNER JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -39,13 +38,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -53,33 +60,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection @@ -1186,14 +1193,13 @@ POSTHOOK: query: explain select * from (select * from src) inner left outer join on inner.key=src.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1201,13 +1207,21 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src @@ -1216,33 +1230,32 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {key} {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join0.q.out ql/src/test/results/clientpositive/spark/join0.q.out index b67b4f1..d40bcb6 100644 --- ql/src/test/results/clientpositive/spark/join0.q.out +++ ql/src/test/results/clientpositive/spark/join0.q.out @@ -15,15 +15,13 @@ SELECT src1.key as k1, src1.value as v1, SORT BY k1, v1, k2, v2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -38,11 +36,23 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src @@ -54,28 +64,29 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/join1.q.out ql/src/test/results/clientpositive/spark/join1.q.out index 3f5b19c..d636635 100644 --- ql/src/test/results/clientpositive/spark/join1.q.out +++ ql/src/test/results/clientpositive/spark/join1.q.out @@ -19,16 +19,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -39,13 +38,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -53,33 +60,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join10.q.out ql/src/test/results/clientpositive/spark/join10.q.out index d588946..7152b6c 100644 --- ql/src/test/results/clientpositive/spark/join10.q.out +++ ql/src/test/results/clientpositive/spark/join10.q.out @@ -17,17 +17,16 @@ ON (x.key = Y.key) SELECT Y.* POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src @@ -36,15 +35,24 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src @@ -53,36 +61,35 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join11.q.out ql/src/test/results/clientpositive/spark/join11.q.out index b6c1cef..af397c1 100644 --- ql/src/test/results/clientpositive/spark/join11.q.out +++ ql/src/test/results/clientpositive/spark/join11.q.out @@ -19,14 +19,13 @@ JOIN ON src1.c1 = src2.c3 AND src1.c1 < 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -41,13 +40,21 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src @@ -59,32 +66,32 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col3 + input vertices: + 1 Map 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join12.q.out ql/src/test/results/clientpositive/spark/join12.q.out index 1540496..95b9b4b 100644 --- ql/src/test/results/clientpositive/spark/join12.q.out +++ ql/src/test/results/clientpositive/spark/join12.q.out @@ -25,14 +25,13 @@ JOIN ON src1.c1 = src3.c5 AND src3.c5 < 80 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -47,69 +46,87 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 80) and (key < 100)) (type: boolean) + predicate: ((key < 100) and (key < 80)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 100) and (key < 80)) (type: boolean) + predicate: ((key < 80) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - 2 - outputColumnNames: _col0, _col3 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {_col0} + 1 {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col3 + input vertices: + 1 Map 1 + 2 Map 3 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join13.q.out ql/src/test/results/clientpositive/spark/join13.q.out index 3b682fd..7a6876b 100644 --- ql/src/test/results/clientpositive/spark/join13.q.out +++ ql/src/test/results/clientpositive/spark/join13.q.out @@ -25,15 +25,13 @@ JOIN ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -48,86 +46,95 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 100) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 200) and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 5 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 (_col0 + _col2) (type: double) + 1 UDFToDouble(_col0) (type: double) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 200) and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 + _col2) is not null (type: boolean) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (_col0 + _col2) (type: double) - sort order: + - Map-reduce partition columns: (_col0 + _col2) (type: double) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col3 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col3} - 1 - outputColumnNames: _col0, _col3 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3 + input vertices: + 1 Map 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col2) is not null (type: boolean) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 (_col0 + _col2) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join14.q.out ql/src/test/results/clientpositive/spark/join14.q.out index e8ede5f..2da1898 100644 --- ql/src/test/results/clientpositive/spark/join14.q.out +++ ql/src/test/results/clientpositive/spark/join14.q.out @@ -21,16 +21,37 @@ FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 100) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -41,47 +62,33 @@ STAGE PLANS: Filter Operator predicate: ((key > 100) and key is not null) (type: boolean) Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > 100) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col6 - Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join15.q.out ql/src/test/results/clientpositive/spark/join15.q.out index 9e9cd09..0242208 100644 --- ql/src/test/results/clientpositive/spark/join15.q.out +++ ql/src/test/results/clientpositive/spark/join15.q.out @@ -5,15 +5,13 @@ POSTHOOK: query: EXPLAIN SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key) SORT BY src1.key, src1.value, src2.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -24,13 +22,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -38,30 +46,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/join16.q.out ql/src/test/results/clientpositive/spark/join16.q.out index 34a4cb1..0da6032 100644 --- ql/src/test/results/clientpositive/spark/join16.q.out +++ ql/src/test/results/clientpositive/spark/join16.q.out @@ -3,14 +3,13 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT subq.key, tab.value FROM (select a.key, a.value from src a where a.key > 10 ) subq JOIN src tab ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) where tab.value < 200 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -25,12 +24,21 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {value} + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 key (type: string), value (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: tab @@ -38,32 +46,32 @@ STAGE PLANS: Filter Operator predicate: ((((key > 20) and value is not null) and key is not null) and (value < 200)) (type: boolean) Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey1} - outputColumnNames: _col0, _col3 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 key (type: string), value (type: string) + outputColumnNames: _col0, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join17.q.out ql/src/test/results/clientpositive/spark/join17.q.out index ea85be6..5f4d95e 100644 --- ql/src/test/results/clientpositive/spark/join17.q.out +++ ql/src/test/results/clientpositive/spark/join17.q.out @@ -57,16 +57,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -79,14 +78,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -136,7 +137,12 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src2] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -146,14 +152,52 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col5) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,value1,key2,value2 + columns.comments + columns.types int:string:int:string +#### A masked pattern was here #### + name default.dest1 + serialization.ddl struct dest1 { i32 key1, string value1, i32 key2, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -203,47 +247,6 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src1] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col5) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,value1,key2,value2 - columns.comments - columns.types int:string:int:string -#### A masked pattern was here #### - name default.dest1 - serialization.ddl struct dest1 { i32 key1, string value1, i32 key2, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join19.q.out ql/src/test/results/clientpositive/spark/join19.q.out index c648310..5b3abac 100644 --- ql/src/test/results/clientpositive/spark/join19.q.out +++ ql/src/test/results/clientpositive/spark/join19.q.out @@ -123,16 +123,13 @@ t6.predicate='http://sofa.semanticweb.org/sofa/v1.0/system#__LABEL_REL' ON (t66.subject=t55.object) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 7 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -147,13 +144,16 @@ STAGE PLANS: expressions: subject (type: string), object (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col2} {_col3} {_col7} + 1 {_col1} + keys: + 0 _col7 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: t2 @@ -165,13 +165,18 @@ STAGE PLANS: expressions: subject (type: string), object (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {_col1} + 2 {_col0} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col1 (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: t3 @@ -183,13 +188,18 @@ STAGE PLANS: expressions: subject (type: string), object (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string) - Map 7 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {_col1} + 2 {_col0} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col1 (type: string) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: t5 @@ -201,103 +211,117 @@ STAGE PLANS: expressions: subject (type: string), object (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string) - Map 8 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col2} {_col3} + 1 + 2 {_col1} + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work + Map 6 Map Operator Tree: TableScan - alias: t1 + alias: t4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__INSTANCEOF_REL') and (object = 'http://ontos/OntosMiner/Common.English/ontology#Citation')) and subject is not null) (type: boolean) + predicate: (((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__INSTANCEOF_REL') and (object = 'http://ontos/OntosMiner/Common.English/ontology#Author')) and subject is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: subject (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 9 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col2} {_col3} + 1 + 2 {_col1} + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan - alias: t4 + alias: t1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__INSTANCEOF_REL') and (object = 'http://ontos/OntosMiner/Common.English/ontology#Author')) and subject is not null) (type: boolean) + predicate: (((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__INSTANCEOF_REL') and (object = 'http://ontos/OntosMiner/Common.English/ontology#Citation')) and subject is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: subject (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {_col0} + 1 {_col1} + 2 {_col0} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col1 (type: string) + outputColumnNames: _col0, _col2, _col3 + input vertices: + 1 Map 2 + 2 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col2} {VALUE._col3} {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col2, _col3, _col7, _col9 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: string), _col9 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - 2 {VALUE._col0} - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {VALUE._col0} {VALUE._col2} {KEY.reducesinkkey0} - 1 - 2 {VALUE._col0} - outputColumnNames: _col0, _col2, _col3, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col7 (type: string) - sort order: + - Map-reduce partition columns: _col7 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {_col0} {_col2} {_col3} + 1 + 2 {_col1} + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col7 + input vertices: + 1 Map 6 + 2 Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col2} {_col3} {_col7} + 1 {_col1} + keys: + 0 _col7 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col7, _col9 + input vertices: + 1 Map 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: string), _col9 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join2.q.out ql/src/test/results/clientpositive/spark/join2.q.out index c7f4080..f885dae 100644 --- ql/src/test/results/clientpositive/spark/join2.q.out +++ ql/src/test/results/clientpositive/spark/join2.q.out @@ -19,17 +19,16 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-5 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -40,79 +39,93 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan - alias: src3 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col5 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 + _col5) is not null (type: boolean) - Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (_col0 + _col5) (type: double) - sort order: + - Map-reduce partition columns: (_col0 + _col5) (type: double) - Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col1} - outputColumnNames: _col0, _col11 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col11 + input vertices: + 0 Map 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join20.q.out ql/src/test/results/clientpositive/spark/join20.q.out index ae40f1c..fb52451 100644 --- ql/src/test/results/clientpositive/spark/join20.q.out +++ ql/src/test/results/clientpositive/spark/join20.q.out @@ -7,15 +7,13 @@ SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -26,61 +24,87 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 - 2 {(KEY.reducesinkkey0 < 20)} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 1 Map 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -668,15 +692,13 @@ SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 A SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -687,61 +709,87 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) and (key < 15)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key < 15) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 - 2 {(KEY.reducesinkkey0 < 20)} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 1 Map 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/join21.q.out ql/src/test/results/clientpositive/spark/join21.q.out index 7ca62a1..6736384 100644 --- ql/src/test/results/clientpositive/spark/join21.q.out +++ ql/src/test/results/clientpositive/spark/join21.q.out @@ -5,15 +5,13 @@ POSTHOOK: query: EXPLAIN SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -24,58 +22,84 @@ STAGE PLANS: Filter Operator predicate: (key > 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: src3 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 {(KEY.reducesinkkey0 < 10)} - 1 - 2 {(KEY.reducesinkkey0 < 10)} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Map Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 1 Map 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/join22.q.out ql/src/test/results/clientpositive/spark/join22.q.out index d108173..9b0a4e7 100644 --- ql/src/test/results/clientpositive/spark/join22.q.out +++ ql/src/test/results/clientpositive/spark/join22.q.out @@ -5,15 +5,13 @@ POSTHOOK: query: explain SELECT src5.src1_value FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -24,79 +22,88 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src4 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col3} + keys: + 0 key (type: string) + 1 _col2 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: src4 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {VALUE._col2} - outputColumnNames: _col8 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col8 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {_col3} + keys: + 0 key (type: string) + 1 _col2 (type: string) + outputColumnNames: _col8 + input vertices: + 0 Map 3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col8 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join23.q.out ql/src/test/results/clientpositive/spark/join23.q.out index 53d44d9..ccfb1f7 100644 --- ql/src/test/results/clientpositive/spark/join23.q.out +++ ql/src/test/results/clientpositive/spark/join23.q.out @@ -5,15 +5,13 @@ POSTHOOK: query: EXPLAIN SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -24,11 +22,23 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -36,28 +46,29 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/join25.q.out ql/src/test/results/clientpositive/spark/join25.q.out index 9a5db27..bd4692f 100644 --- ql/src/test/results/clientpositive/spark/join25.q.out +++ ql/src/test/results/clientpositive/spark/join25.q.out @@ -21,16 +21,37 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -41,48 +62,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join26.q.out ql/src/test/results/clientpositive/spark/join26.q.out index e4ccf02..14127d1 100644 --- ql/src/test/results/clientpositive/spark/join26.q.out +++ ql/src/test/results/clientpositive/spark/join26.q.out @@ -102,16 +102,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -124,14 +123,18 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 2 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -186,27 +189,31 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {value} + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -216,14 +223,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -236,43 +243,91 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [y] - Map 4 + /src1 [x] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} + 1 {value} + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col6, _col11 + input vertices: + 0 Map 3 + 2 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -282,14 +337,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -302,63 +357,20 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [x] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - 2 {VALUE._col0} - outputColumnNames: _col0, _col6, _col11 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + /src [y] Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join27.q.out ql/src/test/results/clientpositive/spark/join27.q.out index 81cc089..fe350d3 100644 --- ql/src/test/results/clientpositive/spark/join27.q.out +++ ql/src/test/results/clientpositive/spark/join27.q.out @@ -21,16 +21,37 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.value = y.value) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 value (type: string) + 1 value (type: string) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -41,47 +62,33 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 3 - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 value (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join28.q.out ql/src/test/results/clientpositive/spark/join28.q.out index 0349069..8d08739 100644 --- ql/src/test/results/clientpositive/spark/join28.q.out +++ ql/src/test/results/clientpositive/spark/join28.q.out @@ -31,17 +31,15 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -52,79 +50,89 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col5 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0 + input vertices: + 0 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join29.q.out ql/src/test/results/clientpositive/spark/join29.q.out index 5d0b47c..90894b7 100644 --- ql/src/test/results/clientpositive/spark/join29.q.out +++ ql/src/test/results/clientpositive/spark/join29.q.out @@ -27,69 +27,98 @@ FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 + Reducer 4 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -101,52 +130,31 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 4 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join3.q.out ql/src/test/results/clientpositive/spark/join3.q.out index b786116..3aaae9c 100644 --- ql/src/test/results/clientpositive/spark/join3.q.out +++ ql/src/test/results/clientpositive/spark/join3.q.out @@ -19,16 +19,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -39,12 +38,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: src3 @@ -52,13 +57,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: src1 @@ -66,35 +81,37 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - 2 {VALUE._col0} - outputColumnNames: _col0, _col11 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} + 1 + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col11 + input vertices: + 1 Map 1 + 2 Map 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join30.q.out ql/src/test/results/clientpositive/spark/join30.q.out index 94645bb..c45b547 100644 --- ql/src/test/results/clientpositive/spark/join30.q.out +++ ql/src/test/results/clientpositive/spark/join30.q.out @@ -19,17 +19,39 @@ INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -40,51 +62,38 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0 + input vertices: + 0 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/spark/join31.q.out ql/src/test/results/clientpositive/spark/join31.q.out index 89fc24c..e4a9882 100644 --- ql/src/test/results/clientpositive/spark/join31.q.out +++ ql/src/test/results/clientpositive/spark/join31.q.out @@ -29,70 +29,99 @@ FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN group by subq1.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 + Reducer 5 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -104,38 +133,36 @@ STAGE PLANS: expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 0 Reducer 5 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 4 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -155,23 +182,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Reducer 6 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join32.q.out ql/src/test/results/clientpositive/spark/join32.q.out index 65124fa..d23c462 100644 --- ql/src/test/results/clientpositive/spark/join32.q.out +++ ql/src/test/results/clientpositive/spark/join32.q.out @@ -100,17 +100,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -123,13 +121,16 @@ STAGE PLANS: isSamplingPred: false predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col6} + 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -184,27 +185,29 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -214,14 +217,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -234,44 +237,101 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [y] - Map 5 + /src1 [x] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col6} + 1 {value} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col6, _col11 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -281,14 +341,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -301,80 +361,20 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [x] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col5} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col6, _col11 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Reducer 4 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: string), _col6 (type: string) - auto parallelism: false + /src [y] Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join32_lessSize.q.out ql/src/test/results/clientpositive/spark/join32_lessSize.q.out index 37e561d..6f9f9c8 100644 --- ql/src/test/results/clientpositive/spark/join32_lessSize.q.out +++ ql/src/test/results/clientpositive/spark/join32_lessSize.q.out @@ -108,17 +108,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -131,13 +129,16 @@ STAGE PLANS: isSamplingPred: false predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col6} + 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -192,27 +193,29 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -222,14 +225,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -242,44 +245,101 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [y] - Map 5 + /src1 [x] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col6} + 1 {value} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col6, _col11 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -289,14 +349,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -309,80 +369,20 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [x] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col5} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col6, _col11 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Reducer 4 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: string), _col6 (type: string) - auto parallelism: false + /src [y] Stage: Stage-2 Dependency Collection @@ -617,86 +617,18 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: w - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [w] - Map 4 + Map 2 Map Operator Tree: TableScan alias: z @@ -706,14 +638,18 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - tag: 2 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {_col5} + 1 {value} + 2 {value} + keys: + 0 _col5 (type: string) + 1 key (type: string) + 2 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -763,7 +699,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [z] - Map 5 + Map 3 Map Operator Tree: TableScan alias: y @@ -773,14 +709,18 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {_col5} + 1 {value} + 2 {value} + keys: + 0 _col5 (type: string) + 1 key (type: string) + 2 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -830,7 +770,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 6 + Map 4 Map Operator Tree: TableScan alias: x @@ -840,14 +780,16 @@ STAGE PLANS: isSamplingPred: false predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: key (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {key} + keys: + 0 value (type: string) + 1 value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -897,72 +839,139 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {VALUE._col0} - outputColumnNames: _col5 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: string) - sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false - Reducer 3 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - 2 {VALUE._col0} - outputColumnNames: _col5, _col11, _col16 - Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: string), _col16 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - numFiles 1 - numRows 85 - rawDataSize 1600 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1685 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: w + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {key} + keys: + 0 value (type: string) + 1 value (type: string) + outputColumnNames: _col5 + input vertices: + 1 Map 4 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {_col5} + 1 {value} + 2 {value} + keys: + 0 _col5 (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col5, _col11, _col16 + input vertices: + 1 Map 3 + 2 Map 2 + Position of Big Table: 0 + Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: string), _col16 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [w] Stage: Stage-2 Dependency Collection @@ -1217,42 +1226,43 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1262,14 +1272,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1282,46 +1292,51 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [y] - Map 4 + /src1 [x] + Map 3 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1329,68 +1344,126 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src1 [x] - Map 5 + /srcpart/ds=2008-04-08/hr=11 [z] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2 + serialization.ddl struct dest_j2 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1398,13 +1471,11 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1414,87 +1485,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: string) - auto parallelism: false - Reducer 3 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j2 - serialization.ddl struct dest_j2 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + /src [y] Stage: Stage-2 Dependency Collection @@ -1746,17 +1756,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1765,13 +1773,16 @@ STAGE PLANS: alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1821,32 +1832,37 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 4 + Map 3 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: value is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1854,68 +1870,131 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src1 [x] - Map 5 + /srcpart/ds=2008-04-08/hr=11 [z] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j2 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1923,108 +2002,40 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src1 numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: string) - auto parallelism: false - Reducer 3 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j2 - numFiles 1 - numRows 85 - rawDataSize 1600 - serialization.ddl struct dest_j2 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1685 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + /src1 [x] Stage: Stage-2 Dependency Collection @@ -2201,33 +2212,18 @@ FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res JOIN srcpart x ON (res.value = x.value and x.ds='2008-04-08' and x.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 + Map 2 Map Operator Tree: TableScan alias: x @@ -2235,13 +2231,16 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: x @@ -2249,53 +2248,72 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection @@ -2442,33 +2460,18 @@ FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res JOIN srcpart y ON (res.value = y.value and y.ds='2008-04-08' and y.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 + Map 2 Map Operator Tree: TableScan alias: x @@ -2476,13 +2479,16 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: y @@ -2490,53 +2496,72 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join33.q.out ql/src/test/results/clientpositive/spark/join33.q.out index 65124fa..d23c462 100644 --- ql/src/test/results/clientpositive/spark/join33.q.out +++ ql/src/test/results/clientpositive/spark/join33.q.out @@ -100,17 +100,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -123,13 +121,16 @@ STAGE PLANS: isSamplingPred: false predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col6} + 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -184,27 +185,29 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -214,14 +217,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -234,44 +237,101 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [y] - Map 5 + /src1 [x] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col6} + 1 {value} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col6, _col11 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -281,14 +341,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -301,80 +361,20 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [x] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col5} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col6, _col11 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Reducer 4 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: string), _col6 (type: string) - auto parallelism: false + /src [y] Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join36.q.out ql/src/test/results/clientpositive/spark/join36.q.out index 1608626..5c972e0 100644 --- ql/src/test/results/clientpositive/spark/join36.q.out +++ ql/src/test/results/clientpositive/spark/join36.q.out @@ -61,16 +61,15 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -81,13 +80,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 155 Data size: 743 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 155 Data size: 743 Basic stats: COMPLETE Column stats: NONE - value expressions: cnt (type: int) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {cnt} + 1 {cnt} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: x @@ -95,34 +102,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 155 Data size: 743 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 155 Data size: 743 Basic stats: COMPLETE Column stats: NONE - value expressions: cnt (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col6 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {cnt} + 1 {cnt} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col6 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join37.q.out ql/src/test/results/clientpositive/spark/join37.q.out index 617be10..b1cc628 100644 --- ql/src/test/results/clientpositive/spark/join37.q.out +++ ql/src/test/results/clientpositive/spark/join37.q.out @@ -21,16 +21,37 @@ SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -41,48 +62,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join38.q.out ql/src/test/results/clientpositive/spark/join38.q.out index b792dad..e0c6de7 100644 --- ql/src/test/results/clientpositive/spark/join38.q.out +++ ql/src/test/results/clientpositive/spark/join38.q.out @@ -49,15 +49,13 @@ where b.col11 = 111 group by a.value, b.col5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,12 +66,23 @@ STAGE PLANS: Filter Operator predicate: (col11 = 111) (type: boolean) Statistics: Num rows: 1 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '111' (type: string) - sort order: + - Statistics: Num rows: 1 Data size: 63 Basic stats: COMPLETE Column stats: NONE - value expressions: col5 (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {col5} + keys: + 0 '111' (type: string) + 1 '111' (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -81,37 +90,37 @@ STAGE PLANS: Filter Operator predicate: (key = 111) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '111' (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col5} - outputColumnNames: _col1, _col10 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col10 (type: string) - outputColumnNames: _col1, _col10 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col1 (type: string), _col10 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 {col5} + keys: + 0 '111' (type: string) + 1 '111' (type: string) + outputColumnNames: _col1, _col10 + input vertices: + 1 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + Select Operator + expressions: _col1 (type: string), _col10 (type: string) + outputColumnNames: _col1, _col10 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col1 (type: string), _col10 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/join39.q.out ql/src/test/results/clientpositive/spark/join39.q.out index a2bda84..82419ce 100644 --- ql/src/test/results/clientpositive/spark/join39.q.out +++ ql/src/test/results/clientpositive/spark/join39.q.out @@ -21,30 +21,18 @@ SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -56,34 +44,52 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {_col1} + keys: + 0 key (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {_col0} {_col1} + keys: + 0 key (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join4.q.out ql/src/test/results/clientpositive/spark/join4.q.out index 8fb9b38..f3554f6 100644 --- ql/src/test/results/clientpositive/spark/join4.q.out +++ ql/src/test/results/clientpositive/spark/join4.q.out @@ -41,76 +41,82 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((key > 15) and (key < 25)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 15) and (key < 25)) (type: boolean) + predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join40.q.out ql/src/test/results/clientpositive/spark/join40.q.out index 0a96c44..9ac501d 100644 --- ql/src/test/results/clientpositive/spark/join40.q.out +++ ql/src/test/results/clientpositive/spark/join40.q.out @@ -9,28 +9,16 @@ EXPLAIN SELECT x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -42,33 +30,51 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {_col1} + keys: + 0 key (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {_col0} {_col1} + keys: + 0 key (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -659,14 +665,13 @@ POSTHOOK: query: EXPLAIN select src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -677,13 +682,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -691,32 +704,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -1771,15 +1784,13 @@ SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1790,61 +1801,87 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 - 2 {(KEY.reducesinkkey0 < 20)} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 1 Map 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -2432,15 +2469,13 @@ SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 A SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2451,61 +2486,87 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) and (key < 15)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key < 15) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 - 2 {(KEY.reducesinkkey0 < 20)} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 + 2 {(key < 20)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 1 Map 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -3093,28 +3154,16 @@ SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -3126,33 +3175,51 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {_col1} + keys: + 0 key (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {_col0} {_col1} + keys: + 0 key (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -3743,15 +3810,13 @@ POSTHOOK: query: EXPLAIN SELECT COUNT(1) FROM SRC A JOIN SRC B ON (A.KEY=B.KEY) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3762,12 +3827,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -3775,31 +3851,31 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/join41.q.out ql/src/test/results/clientpositive/spark/join41.q.out index 630e406..54a7a76 100644 --- ql/src/test/results/clientpositive/spark/join41.q.out +++ ql/src/test/results/clientpositive/spark/join41.q.out @@ -15,14 +15,13 @@ POSTHOOK: query: EXPLAIN SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -33,44 +32,51 @@ STAGE PLANS: Filter Operator predicate: (key > 10) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -98,14 +104,13 @@ EXPLAIN SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -116,44 +121,51 @@ STAGE PLANS: Filter Operator predicate: (key > 10) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join5.q.out ql/src/test/results/clientpositive/spark/join5.q.out index d0a5d11..e672023 100644 --- ql/src/test/results/clientpositive/spark/join5.q.out +++ ql/src/test/results/clientpositive/spark/join5.q.out @@ -41,16 +41,15 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -65,13 +64,21 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src2 @@ -83,34 +90,33 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join8.q.out ql/src/test/results/clientpositive/spark/join8.q.out index 97fd93d..4be26b3 100644 --- ql/src/test/results/clientpositive/spark/join8.q.out +++ ql/src/test/results/clientpositive/spark/join8.q.out @@ -41,79 +41,85 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) + predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) + predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is null (type: boolean) - Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(null) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is null (type: boolean) + Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(null) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join9.q.out ql/src/test/results/clientpositive/spark/join9.q.out index 8d0815b..35aa904 100644 --- ql/src/test/results/clientpositive/spark/join9.q.out +++ ql/src/test/results/clientpositive/spark/join9.q.out @@ -73,16 +73,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -95,14 +94,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -152,7 +153,12 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src2] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -162,13 +168,52 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col8 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.dest1 + serialization.ddl struct dest1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -220,47 +265,6 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [src1] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col8 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.dest1 - serialization.ddl struct dest1 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out index e55fbc5..9febda9 100644 --- ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out +++ ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out @@ -5,14 +5,13 @@ POSTHOOK: query: explain select p1.p_name, p2.p_name from part p1 , part p2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -20,40 +19,51 @@ STAGE PLANS: TableScan alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {p_name} + 1 {p_name} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col1} - 1 {VALUE._col1} - outputColumnNames: _col1, _col13 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col13 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_name} + 1 {p_name} + keys: + 0 + 1 + outputColumnNames: _col1, _col13 + input vertices: + 1 Map 1 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col1 (type: string), _col13 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -70,14 +80,13 @@ from part p1 ,part p2 ,part p3 where p1.p_name = p2.p_name and p2.p_name = p3.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -88,12 +97,18 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {p_name} + 1 {p_name} + 2 + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p2 @@ -101,12 +116,23 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_name} + 1 + 2 {p_name} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 @@ -114,37 +140,39 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - 2 {KEY.reducesinkkey0} - outputColumnNames: _col1, _col13, _col25 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {p_name} + 1 {p_name} + 2 {p_name} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + outputColumnNames: _col1, _col13, _col25 + input vertices: + 1 Map 2 + 2 Map 1 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -161,14 +189,13 @@ from part p1 , (select p_name from part) p2 ,part p3 where p1.p_name = p2.p_name and p2.p_name = p3.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -183,12 +210,18 @@ STAGE PLANS: expressions: p_name (type: string) outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {p_name} + 1 + 2 {p_name} + keys: + 0 p_name (type: string) + 1 _col0 (type: string) + 2 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p3 @@ -196,12 +229,23 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_name} + 1 {_col0} + 2 + keys: + 0 p_name (type: string) + 1 _col0 (type: string) + 2 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 @@ -209,37 +253,39 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - 2 {KEY.reducesinkkey0} - outputColumnNames: _col1, _col12, _col14 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 = _col12) and (_col12 = _col14)) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col12 (type: string), _col14 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {p_name} + 1 {_col0} + 2 {p_name} + keys: + 0 p_name (type: string) + 1 _col0 (type: string) + 2 p_name (type: string) + outputColumnNames: _col1, _col12, _col14 + input vertices: + 1 Map 1 + 2 Map 2 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col12) and (_col12 = _col14)) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col12 (type: string), _col14 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -256,94 +302,110 @@ from part p1 , part p2 , part p3 where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} + 1 {p_partkey} {p_name} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string) - Map 5 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} + 1 {p_partkey} {p_name} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col12, _col13 + input vertices: + 1 Map 2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col12} + 1 {p_name} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col12} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col12, _col13, _col25 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) - Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col12, _col13 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col12} {_col13} + 1 {p_name} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + outputColumnNames: _col0, _col1, _col12, _col13, _col25 + input vertices: + 0 Map 3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -362,127 +424,140 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: p4 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string) - Map 3 + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} + 1 + keys: + 0 p_name (type: string), p_partkey (type: int) + 1 p_name (type: string), p_partkey (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: p3 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col12} {_col13} {_col25} + 1 {p_name} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string), p_partkey (type: int) - sort order: ++ - Map-reduce partition columns: p_name (type: string), p_partkey (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} + 1 {p_partkey} {p_name} + keys: + 0 p_name (type: string), p_partkey (type: int) + 1 p_name (type: string), p_partkey (type: int) + outputColumnNames: _col0, _col1, _col12, _col13 + input vertices: + 1 Map 3 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col12} + 1 {p_name} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string), p_partkey (type: int) - sort order: ++ - Map-reduce partition columns: p_name (type: string), p_partkey (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col11} {VALUE._col12} {VALUE._col24} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string), _col37 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col12} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col12, _col13, _col25 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col12 (type: int), _col13 (type: string), _col25 (type: string) - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col12, _col13 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col12} {_col13} + 1 {p_name} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + outputColumnNames: _col0, _col1, _col12, _col13, _col25 + input vertices: + 0 Map 4 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col12} {_col13} {_col25} + 1 {p_partkey} {p_name} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 + input vertices: + 1 Map 1 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string), _col37 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -501,127 +576,140 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: p4 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string) - Map 3 + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} + 1 + keys: + 0 p_name (type: string), p_partkey (type: int) + 1 p_name (type: string), p_partkey (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: p3 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col12} {_col13} {_col25} + 1 {p_name} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string), p_partkey (type: int) - sort order: ++ - Map-reduce partition columns: p_name (type: string), p_partkey (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} + 1 {p_partkey} {p_name} + keys: + 0 p_name (type: string), p_partkey (type: int) + 1 p_name (type: string), p_partkey (type: int) + outputColumnNames: _col0, _col1, _col12, _col13 + input vertices: + 1 Map 3 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col12} + 1 {p_name} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string), p_partkey (type: int) - sort order: ++ - Map-reduce partition columns: p_name (type: string), p_partkey (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col11} {VALUE._col12} {VALUE._col24} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string), _col37 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col12} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col12, _col13, _col25 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col12 (type: int), _col13 (type: string), _col25 (type: string) - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col12, _col13 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col12} {_col13} + 1 {p_name} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + outputColumnNames: _col0, _col1, _col12, _col13, _col25 + input vertices: + 0 Map 4 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col12} {_col13} {_col25} + 1 {p_partkey} {p_name} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 + input vertices: + 1 Map 1 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string), _col37 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out index 2d59f38..df0ee42 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out @@ -5,14 +5,13 @@ POSTHOOK: query: explain select * from part p1 join part p2 join part p3 on p1.p_name = p2.p_name and p2.p_name = p3.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -23,13 +22,18 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p2 @@ -37,13 +41,23 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 @@ -51,35 +65,36 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 2 + 2 Map 1 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -94,14 +109,13 @@ POSTHOOK: query: explain select * from part p1 join part p2 join part p3 on p2.p_name = p1.p_name and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -112,13 +126,18 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p2 @@ -126,13 +145,23 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 @@ -140,35 +169,36 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 2 + 2 Map 1 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -183,92 +213,107 @@ POSTHOOK: query: explain select * from part p1 join part p2 join part p3 on p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 5 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + input vertices: + 1 Map 2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 0 Map 3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -283,15 +328,13 @@ POSTHOOK: query: explain select * from part p1 join part p2 join part p3 on p2.p_partkey = 1 and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -302,13 +345,16 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p2 @@ -316,56 +362,64 @@ STAGE PLANS: Filter Operator predicate: ((p_partkey = 1) and p_name is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + input vertices: + 1 Map 2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 1 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out index 2a1c4db..d78ddfe 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out @@ -5,14 +5,13 @@ POSTHOOK: query: explain select * from part p1 join part p2 join part p3 on p1.p_name = p2.p_name join part p4 on p2.p_name = p3.p_name and p1.p_name = p4.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -23,13 +22,20 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + 3 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p3 @@ -37,13 +43,20 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + 3 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: p2 @@ -51,13 +64,25 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + 3 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: p1 @@ -65,37 +90,40 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 0 to 3 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 3 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + 3 p_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + input vertices: + 1 Map 3 + 2 Map 2 + 3 Map 1 + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -112,127 +140,137 @@ from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 on and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: p4 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 3 + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string), p_partkey (type: int) + 1 p_name (type: string), p_partkey (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: p3 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} + 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string), p_partkey (type: int) - sort order: ++ - Map-reduce partition columns: p_name (type: string), p_partkey (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string), p_partkey (type: int) + 1 p_name (type: string), p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + input vertices: + 1 Map 3 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string), p_partkey (type: int) - sort order: ++ - Map-reduce partition columns: p_name (type: string), p_partkey (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 0 Map 4 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + input vertices: + 1 Map 1 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out index f4fa8c8..d385a8f 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out @@ -7,14 +7,13 @@ from part p1 join part p2 join part p3 where p1.p_name = p2.p_name and p2.p_name = p3.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -25,13 +24,18 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p2 @@ -39,13 +43,23 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 @@ -53,38 +67,39 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 2 + 2 Map 1 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -101,14 +116,13 @@ from part p1 join part p2 join part p3 where p2.p_name = p1.p_name and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -119,13 +133,18 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p2 @@ -133,13 +152,23 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 @@ -147,38 +176,39 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col13 = _col1) and (_col25 = _col13)) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 2 + 2 Map 1 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col13 = _col1) and (_col25 = _col13)) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -195,95 +225,110 @@ from part p1 join part p2 join part p3 where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 5 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + input vertices: + 1 Map 2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) - Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 0 Map 3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -300,15 +345,13 @@ from part p1 join part p2 join part p3 where p2.p_partkey = 1 and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -319,13 +362,16 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p2 @@ -333,59 +379,67 @@ STAGE PLANS: Filter Operator predicate: (p_name is not null and (p_partkey = 1)) (type: boolean) Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col25 = _col13) (type: boolean) - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + input vertices: + 1 Map 2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 1 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col25 = _col13) (type: boolean) + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out index b9ed3e1..320891b 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out @@ -7,14 +7,13 @@ from part p1 join part p2 join part p3 on p1.p_name = p2.p_name join part p4 where p2.p_name = p3.p_name and p1.p_name = p4.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -25,13 +24,20 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + 3 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p3 @@ -39,13 +45,20 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + 3 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: p2 @@ -53,13 +66,25 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + 3 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: p1 @@ -67,40 +92,43 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 0 to 3 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 3 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col13 = _col25) and (_col1 = _col37)) (type: boolean) - Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p_name (type: string) + 2 p_name (type: string) + 3 p_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + input vertices: + 1 Map 3 + 2 Map 2 + 3 Map 1 + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col13 = _col25) and (_col1 = _col37)) (type: boolean) + Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -119,130 +147,140 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: p4 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 3 + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string), p_partkey (type: int) + 1 p_name (type: string), p_partkey (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: p3 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} + 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string), p_partkey (type: int) - sort order: ++ - Map-reduce partition columns: p_name (type: string), p_partkey (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string), p_partkey (type: int) + 1 p_name (type: string), p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + input vertices: + 1 Map 3 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string), p_partkey (type: int) - sort order: ++ - Map-reduce partition columns: p_name (type: string), p_partkey (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col13 (type: string) + 1 p_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 0 Map 4 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + input vertices: + 1 Map 1 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out index 2c03e5d..c913691 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out @@ -61,14 +61,13 @@ POSTHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name and p2_name = p3_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -79,13 +78,18 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p3_name (type: string) - sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p2 @@ -93,13 +97,23 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p2_name (type: string) - sort order: + - Map-reduce partition columns: p2_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 @@ -107,35 +121,36 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 2 + 2 Map 1 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -150,14 +165,13 @@ POSTHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -168,13 +182,18 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p3_name (type: string) - sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p2 @@ -182,13 +201,23 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p2_name (type: string) - sort order: + - Map-reduce partition columns: p2_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 @@ -196,35 +225,36 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 2 + 2 Map 1 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -239,15 +269,13 @@ POSTHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 on p2_partkey + p_partkey = p1.p_partkey and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -258,13 +286,16 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p3_name (type: string) - sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 _col13 (type: string) + 1 p3_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p2 @@ -272,59 +303,67 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + input vertices: + 1 Map 2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 _col13 (type: string) + 1 p3_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 1 + Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -339,15 +378,13 @@ POSTHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 on p2_partkey = 1 and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -358,13 +395,16 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p3_name (type: string) - sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 _col13 (type: string) + 1 p3_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p2 @@ -372,56 +412,64 @@ STAGE PLANS: Filter Operator predicate: ((p2_partkey = 1) and p2_name is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + input vertices: + 1 Map 2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 _col13 (type: string) + 1 p3_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 1 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out index e0af3c8..7cee393 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out @@ -61,14 +61,13 @@ POSTHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name join part p4 on p2_name = p3_name and p1.p_name = p4.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -79,13 +78,20 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + 3 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p3 @@ -93,13 +99,20 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p3_name (type: string) - sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + 3 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: p2 @@ -107,13 +120,25 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p2_name (type: string) - sort order: + - Map-reduce partition columns: p2_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + 3 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: p1 @@ -121,37 +146,40 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 0 to 3 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 3 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + 3 p_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + input vertices: + 1 Map 3 + 2 Map 2 + 3 Map 1 + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -168,33 +196,17 @@ from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name join part p4 on and p1.p_partkey = p2_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p4 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: p3 @@ -202,13 +214,16 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p3_name (type: string) - sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 _col13 (type: string) + 1 p3_name (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: p2 @@ -216,13 +231,21 @@ STAGE PLANS: Filter Operator predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p2_name (type: string), p2_partkey (type: int) - sort order: ++ - Map-reduce partition columns: p2_name (type: string), p2_partkey (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - Map 7 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + keys: + 0 p_name (type: string), p_partkey (type: int) + 1 p2_name (type: string), p2_partkey (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: p1 @@ -230,65 +253,80 @@ STAGE PLANS: Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string), p_partkey (type: int) - sort order: ++ - Map-reduce partition columns: p_name (type: string), p_partkey (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + keys: + 0 p_name (type: string), p_partkey (type: int) + 1 p2_name (type: string), p2_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + input vertices: + 1 Map 3 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 _col13 (type: string) + 1 p3_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 2 + Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + input vertices: + 0 Map 4 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out index b2403ce..fa9681e 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out @@ -63,14 +63,13 @@ from part p1 join part2 p2 join part3 p3 where p1.p_name = p2_name and p2_name = p3_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -81,13 +80,18 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p3_name (type: string) - sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p2 @@ -95,13 +99,23 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p2_name (type: string) - sort order: + - Map-reduce partition columns: p2_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 @@ -109,38 +123,39 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 2 + 2 Map 1 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -157,14 +172,13 @@ from part p1 join part2 p2 join part3 p3 where p2_name = p1.p_name and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -175,13 +189,18 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p3_name (type: string) - sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p2 @@ -189,13 +208,23 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p2_name (type: string) - sort order: + - Map-reduce partition columns: p2_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 @@ -203,38 +232,39 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col13 = _col1) and (_col25 = _col13)) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 2 + 2 Map 1 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col13 = _col1) and (_col25 = _col13)) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -251,15 +281,13 @@ from part p1 join part2 p2 join part3 p3 where p2_partkey + p1.p_partkey = p1.p_partkey and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -270,13 +298,16 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p3_name (type: string) - sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 _col13 (type: string) + 1 p3_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p2 @@ -284,62 +315,70 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) - Statistics: Num rows: 1 Data size: 135 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 1 Data size: 135 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 135 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + input vertices: + 1 Map 2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 _col13 (type: string) + 1 p3_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 1 + Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) + Statistics: Num rows: 1 Data size: 135 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 1 Data size: 135 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 135 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -356,15 +395,13 @@ from part p1 join part2 p2 join part3 p3 where p2_partkey = 1 and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -375,13 +412,16 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p3_name (type: string) - sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 _col13 (type: string) + 1 p3_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p2 @@ -389,59 +429,67 @@ STAGE PLANS: Filter Operator predicate: (p2_name is not null and (p2_partkey = 1)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col25 = _col13) (type: boolean) - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + input vertices: + 1 Map 2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 _col13 (type: string) + 1 p3_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 1 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col25 = _col13) (type: boolean) + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out index 704835f..5afb31d 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out @@ -63,14 +63,13 @@ from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name join part p4 where p2_name = p3_name and p1.p_name = p4.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -81,13 +80,20 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + 3 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: p3 @@ -95,13 +101,20 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p3_name (type: string) - sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + 3 p_name (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: p2 @@ -109,13 +122,25 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p2_name (type: string) - sort order: + - Map-reduce partition columns: p2_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + 3 p_name (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: p1 @@ -123,40 +148,43 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 0 to 3 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 3 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col13 = _col25) and (_col1 = _col37)) (type: boolean) - Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 p_name (type: string) + 1 p2_name (type: string) + 2 p3_name (type: string) + 3 p_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + input vertices: + 1 Map 3 + 2 Map 2 + 3 Map 1 + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col13 = _col25) and (_col1 = _col37)) (type: boolean) + Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -175,33 +203,17 @@ where p2_name = p3_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p4 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: p3 @@ -209,13 +221,16 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p3_name (type: string) - sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 _col13 (type: string) + 1 p3_name (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: p2 @@ -223,13 +238,21 @@ STAGE PLANS: Filter Operator predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p2_name (type: string), p2_partkey (type: int) - sort order: ++ - Map-reduce partition columns: p2_name (type: string), p2_partkey (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - Map 7 + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + keys: + 0 p_name (type: string), p_partkey (type: int) + 1 p2_name (type: string), p2_partkey (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: p1 @@ -237,68 +260,83 @@ STAGE PLANS: Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_name (type: string), p_partkey (type: int) - sort order: ++ - Map-reduce partition columns: p_name (type: string), p_partkey (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + keys: + 0 p_name (type: string), p_partkey (type: int) + 1 p2_name (type: string), p2_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + input vertices: + 1 Map 3 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + keys: + 0 _col13 (type: string) + 1 p3_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + input vertices: + 1 Map 2 + Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + input vertices: + 0 Map 4 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out index b729063..6e1a06f 100644 --- ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out +++ ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out @@ -93,14 +93,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -113,14 +112,24 @@ STAGE PLANS: isSamplingPred: false predicate: (value = 50) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: int) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter mappings: + 0 [1, 1, 2, 1] + filter predicates: + 0 {(value = 50)} {(value = 60)} + 1 + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -170,7 +179,7 @@ STAGE PLANS: name: default.a Truncated Path -> Alias: /a [b] - Map 3 + Map 2 Map Operator Tree: TableScan alias: c @@ -180,14 +189,24 @@ STAGE PLANS: isSamplingPred: false predicate: (value = 60) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 2 - value expressions: value (type: int) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + filter mappings: + 0 [1, 1, 2, 1] + filter predicates: + 0 {(value = 50)} {(value = 60)} + 1 + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -237,20 +256,68 @@ STAGE PLANS: name: default.a Truncated Path -> Alias: /a [c] - Map 4 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: int) - auto parallelism: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter mappings: + 0 [1, 1, 2, 1] + filter predicates: + 0 {(value = 50)} {(value = 60)} + 1 + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 + Position of Big Table: 0 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:int:int:int:int:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -300,50 +367,6 @@ STAGE PLANS: name: default.a Truncated Path -> Alias: /a [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter mappings: - 0 [1, 1, 2, 1] - filter predicates: - 0 {(VALUE._col0 = 50)} {(VALUE._col0 = 60)} - 1 - 2 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types int:int:int:int:int:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -452,30 +475,43 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: int) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: (value = 60) (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + filter mappings: + 1 [0, 1, 2, 1] + filter predicates: + 0 + 1 {(value = 50)} {(value = 60)} + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -524,25 +560,35 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [b] + /a [c] Map 3 Map Operator Tree: TableScan - alias: c + alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value = 60) (type: boolean) + predicate: (value = 50) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 2 - value expressions: value (type: int) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter mappings: + 1 [0, 1, 2, 1] + filter predicates: + 0 + 1 {(value = 50)} {(value = 60)} + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -591,25 +637,69 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [c] - Map 4 + /a [a] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 50) (type: boolean) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: int) - auto parallelism: false + Map Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter mappings: + 1 [0, 1, 2, 1] + filter predicates: + 0 + 1 {(value = 50)} {(value = 60)} + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 3 + 2 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:int:int:int:int:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -658,51 +748,7 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter mappings: - 1 [0, 1, 2, 1] - filter predicates: - 0 - 1 {(VALUE._col0 = 50)} {(VALUE._col0 = 60)} - 2 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types int:int:int:int:int:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + /a [b] Stage: Stage-0 Fetch Operator @@ -825,30 +871,43 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: int) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: (value = 60) (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + filter mappings: + 1 [0, 2, 2, 2] + filter predicates: + 0 + 1 {(value = 50)} {(value > 10)} {(value = 60)} {(value > 20)} + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -897,25 +956,35 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [b] + /a [c] Map 3 Map Operator Tree: TableScan - alias: c + alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value = 60) (type: boolean) + predicate: (value = 50) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 2 - value expressions: value (type: int) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter mappings: + 1 [0, 2, 2, 2] + filter predicates: + 0 + 1 {(value = 50)} {(value > 10)} {(value = 60)} {(value > 20)} + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -964,25 +1033,69 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [c] - Map 4 + /a [a] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 50) (type: boolean) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: int) - auto parallelism: false + Map Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter mappings: + 1 [0, 2, 2, 2] + filter predicates: + 0 + 1 {(value = 50)} {(value > 10)} {(value = 60)} {(value > 20)} + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 3 + 2 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:int:int:int:int:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1031,51 +1144,7 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter mappings: - 1 [0, 2, 2, 2] - filter predicates: - 0 - 1 {(VALUE._col0 = 50)} {(VALUE._col0 > 10)} {(VALUE._col0 = 60)} {(VALUE._col0 > 20)} - 2 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types int:int:int:int:int:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + /a [b] Stage: Stage-0 Fetch Operator @@ -1657,14 +1726,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1677,14 +1745,27 @@ STAGE PLANS: isSamplingPred: false predicate: (value = 40) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 3 - value expressions: value (type: int) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + 3 {value} + filter mappings: + 0 [1, 1, 2, 1, 3, 1] + filter predicates: + 0 {(value = 50)} {(value = 60)} {(value = 40)} + 1 + 2 + 3 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1734,7 +1815,7 @@ STAGE PLANS: name: default.a Truncated Path -> Alias: /a [d] - Map 3 + Map 2 Map Operator Tree: TableScan alias: b @@ -1744,14 +1825,27 @@ STAGE PLANS: isSamplingPred: false predicate: (value = 50) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: int) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + 3 {key} {value} + filter mappings: + 0 [1, 1, 2, 1, 3, 1] + filter predicates: + 0 {(value = 50)} {(value = 60)} {(value = 40)} + 1 + 2 + 3 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1801,7 +1895,7 @@ STAGE PLANS: name: default.a Truncated Path -> Alias: /a [b] - Map 4 + Map 3 Map Operator Tree: TableScan alias: c @@ -1811,14 +1905,27 @@ STAGE PLANS: isSamplingPred: false predicate: (value = 60) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 2 - value expressions: value (type: int) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + 3 {key} {value} + filter mappings: + 0 [1, 1, 2, 1, 3, 1] + filter predicates: + 0 {(value = 50)} {(value = 60)} {(value = 40)} + 1 + 2 + 3 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1868,20 +1975,73 @@ STAGE PLANS: name: default.a Truncated Path -> Alias: /a [c] - Map 5 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: int) - auto parallelism: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + 3 {key} {value} + filter mappings: + 0 [1, 1, 2, 1, 3, 1] + filter predicates: + 0 {(value = 50)} {(value = 60)} {(value = 40)} + 1 + 2 + 3 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 1 Map 2 + 2 Map 3 + 3 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 9 Data size: 59 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 9 Data size: 59 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 9 Data size: 59 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types int:int:int:int:int:int:int:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1931,53 +2091,6 @@ STAGE PLANS: name: default.a Truncated Path -> Alias: /a [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join0 to 2 - Left Outer Join0 to 3 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - 3 {KEY.reducesinkkey0} {VALUE._col0} - filter mappings: - 0 [1, 1, 2, 1, 3, 1] - filter predicates: - 0 {(VALUE._col0 = 50)} {(VALUE._col0 = 60)} {(VALUE._col0 = 40)} - 1 - 2 - 3 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 9 Data size: 59 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9 Data size: 59 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 9 Data size: 59 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 - columns.types int:int:int:int:int:int:int:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_hive_626.q.out ql/src/test/results/clientpositive/spark/join_hive_626.q.out index f58f1ce..6242068 100644 --- ql/src/test/results/clientpositive/spark/join_hive_626.q.out +++ ql/src/test/results/clientpositive/spark/join_hive_626.q.out @@ -65,32 +65,16 @@ select hive_foo.foo_name, hive_bar.bar_name, n from hive_foo join hive_bar on hi hive_bar.foo_id join hive_count on hive_count.bar_id = hive_bar.bar_id POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: hive_foo - Statistics: Num rows: 0 Data size: 15 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: foo_id is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: foo_id (type: int) - sort order: + - Map-reduce partition columns: foo_id (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: foo_name (type: string) - Map 4 + Map 2 Map Operator Tree: TableScan alias: hive_count @@ -98,13 +82,16 @@ STAGE PLANS: Filter Operator predicate: bar_id is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: bar_id (type: int) - sort order: + - Map-reduce partition columns: bar_id (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: n (type: int) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} {_col13} + 1 {n} + keys: + 0 _col9 (type: int) + 1 bar_id (type: int) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: hive_bar @@ -112,49 +99,67 @@ STAGE PLANS: Filter Operator predicate: (foo_id is not null and bar_id is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: foo_id (type: int) - sort order: + - Map-reduce partition columns: foo_id (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: bar_id (type: int), bar_name (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col0} {VALUE._col3} - outputColumnNames: _col1, _col9, _col13 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col9 (type: int) - sort order: + - Map-reduce partition columns: _col9 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string), _col13 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col1} {VALUE._col12} - 1 {VALUE._col0} - outputColumnNames: _col1, _col13, _col22 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col13 (type: string), _col22 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Spark HashTable Sink Operator + condition expressions: + 0 {foo_name} + 1 {bar_id} {bar_name} + keys: + 0 foo_id (type: int) + 1 foo_id (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hive_foo + Statistics: Num rows: 0 Data size: 15 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: foo_id is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {foo_name} + 1 {bar_id} {bar_name} + keys: + 0 foo_id (type: int) + 1 foo_id (type: int) + outputColumnNames: _col1, _col9, _col13 + input vertices: + 1 Map 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col1} {_col13} + 1 {n} + keys: + 0 _col9 (type: int) + 1 bar_id (type: int) + outputColumnNames: _col1, _col13, _col22 + input vertices: + 1 Map 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col22 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_map_ppr.q.out ql/src/test/results/clientpositive/spark/join_map_ppr.q.out index 9269c08..751d775 100644 --- ql/src/test/results/clientpositive/spark/join_map_ppr.q.out +++ ql/src/test/results/clientpositive/spark/join_map_ppr.q.out @@ -104,16 +104,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -126,14 +125,18 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 2 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -188,27 +191,31 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {value} + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -218,14 +225,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -238,43 +245,91 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [y] - Map 4 + /src1 [x] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} + 1 {value} + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col6, _col11 + input vertices: + 0 Map 3 + 2 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -284,14 +339,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -304,63 +359,20 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [x] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - 2 {VALUE._col0} - outputColumnNames: _col0, _col6, _col11 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + /src [y] Stage: Stage-2 Dependency Collection @@ -661,16 +673,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -683,14 +694,18 @@ STAGE PLANS: isSamplingPred: false predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 2 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + 2 {value} + keys: + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(key) (type: double) + 2 UDFToDouble(key) (type: double) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -745,27 +760,31 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + 2 {value} + keys: + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(key) (type: double) + 2 UDFToDouble(key) (type: double) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src_copy + base file name: src1_copy input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -773,16 +792,16 @@ STAGE PLANS: bucket_count -1 columns key,value columns.comments - columns.types int:string + columns.types string:string #### A masked pattern was here #### - name default.src_copy + name default.src1_copy numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src_copy { i32 key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1_copy { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -793,46 +812,98 @@ STAGE PLANS: bucket_count -1 columns key,value columns.comments - columns.types int:string + columns.types string:string #### A masked pattern was here #### - name default.src_copy + name default.src1_copy numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src_copy { i32 key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1_copy { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_copy - name: default.src_copy + name: default.src1_copy + name: default.src1_copy Truncated Path -> Alias: - /src_copy [y] - Map 4 + /src1_copy [x] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: key (type: string) - auto parallelism: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} + 1 {value} + 2 {value} + keys: + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(key) (type: double) + 2 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col6, _col11 + input vertices: + 0 Map 3 + 2 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + numFiles 1 + numRows 107 + rawDataSize 2018 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2125 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1_copy + base file name: src_copy input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -840,16 +911,16 @@ STAGE PLANS: bucket_count -1 columns key,value columns.comments - columns.types string:string + columns.types int:string #### A masked pattern was here #### - name default.src1_copy + name default.src_copy numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1_copy { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src_copy { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -860,70 +931,22 @@ STAGE PLANS: bucket_count -1 columns key,value columns.comments - columns.types string:string + columns.types int:string #### A masked pattern was here #### - name default.src1_copy + name default.src_copy numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1_copy { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src_copy { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1_copy - name: default.src1_copy + name: default.src_copy + name: default.src_copy Truncated Path -> Alias: - /src1_copy [x] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col1} - 2 {VALUE._col1} - outputColumnNames: _col0, _col6, _col11 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - numFiles 1 - numRows 107 - rawDataSize 2018 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2125 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + /src_copy [y] Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out index 87afca1..107b774 100644 --- ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out +++ ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out @@ -5,15 +5,13 @@ POSTHOOK: query: explain select count(*) from srcpart a join srcpart b on a.key = b.key and a.hr = b.hr join srcpart c on a.hr = c.hr and a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -24,12 +22,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), hr (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), hr (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: string), hr (type: string) + 1 key (type: string), hr (type: string) + 2 key (type: string), hr (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c @@ -37,12 +41,25 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), hr (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), hr (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: string), hr (type: string) + 1 key (type: string), hr (type: string) + 2 key (type: string), hr (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a @@ -50,34 +67,36 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), hr (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), hr (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 - 1 - 2 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: string), hr (type: string) + 1 key (type: string), hr (type: string) + 2 key (type: string), hr (type: string) + input vertices: + 1 Map 1 + 2 Map 2 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/spark/join_merging.q.out ql/src/test/results/clientpositive/spark/join_merging.q.out index fb45824..1583f3a 100644 --- ql/src/test/results/clientpositive/spark/join_merging.q.out +++ ql/src/test/results/clientpositive/spark/join_merging.q.out @@ -9,38 +9,32 @@ from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey p1.p_size > 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p3 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 2 Map Operator Tree: TableScan alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_size (type: int) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_size} + 1 {p_size} + 2 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + 2 p_partkey (type: int) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: p1 @@ -48,35 +42,57 @@ STAGE PLANS: Filter Operator predicate: (p_size > 10) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - value expressions: p_size (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {VALUE._col4} - 1 {VALUE._col4} - 2 - outputColumnNames: _col5, _col17 - Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: int), _col17 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Spark HashTable Sink Operator + condition expressions: + 0 {p_size} + 1 {p_size} + 2 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + 2 p_partkey (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {p_size} + 1 {p_size} + 2 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + 2 p_partkey (type: int) + outputColumnNames: _col5, _col17 + input vertices: + 0 Map 3 + 1 Map 2 Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col5 (type: int), _col17 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -95,38 +111,32 @@ from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey p1.p_size > 10 and p1.p_size > p2.p_size + 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p3 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 2 Map Operator Tree: TableScan alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_size (type: int) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {p_size} + 1 {p_size} + 2 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + 2 p_partkey (type: int) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: p1 @@ -134,38 +144,60 @@ STAGE PLANS: Filter Operator predicate: (p_size > 10) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - value expressions: p_size (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {VALUE._col4} - 1 {VALUE._col4} - 2 - outputColumnNames: _col5, _col17 - Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col5 > (_col17 + 10)) (type: boolean) - Statistics: Num rows: 19 Data size: 2307 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: int), _col17 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 19 Data size: 2307 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Spark HashTable Sink Operator + condition expressions: + 0 {p_size} + 1 {p_size} + 2 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + 2 p_partkey (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {p_size} + 1 {p_size} + 2 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + 2 p_partkey (type: int) + outputColumnNames: _col5, _col17 + input vertices: + 0 Map 3 + 1 Map 2 + Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col5 > (_col17 + 10)) (type: boolean) Statistics: Num rows: 19 Data size: 2307 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col5 (type: int), _col17 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 2307 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 19 Data size: 2307 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_nullsafe.q.out ql/src/test/results/clientpositive/spark/join_nullsafe.q.out index 2ce052f..9af1d9c 100644 --- ql/src/test/results/clientpositive/spark/join_nullsafe.q.out +++ ql/src/test/results/clientpositive/spark/join_nullsafe.q.out @@ -25,14 +25,13 @@ POSTHOOK: query: -- merging explain select * from myinput1 a join myinput1 b on a.key<=>b.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -40,45 +39,52 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: int) - sort order: + - Map-reduce partition columns: value (type: int) - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} + keys: + 0 key (type: int) + 1 value (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} {KEY.reducesinkkey0} - nullSafes: [true] - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 value (type: int) + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 Statistics: Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -110,14 +116,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -128,13 +133,18 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: int) - sort order: + - Map-reduce partition columns: value (type: int) - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} + 2 {key} {value} + keys: + 0 key (type: int) + 1 value (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c @@ -142,13 +152,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: int) + 1 value (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a @@ -156,35 +176,36 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} {KEY.reducesinkkey0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 value (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 + Statistics: Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -207,14 +228,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -222,58 +242,74 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: int) - sort order: + - Map-reduce partition columns: value (type: int) - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} + 2 {key} {value} + keys: + 0 key (type: int) + 1 value (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: int) + 1 value (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} {KEY.reducesinkkey0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - nullSafes: [true] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 value (type: int) + 2 key (type: int) + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -323,14 +359,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -341,12 +376,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: int), key (type: int) - sort order: ++ - Map-reduce partition columns: value (type: int), key (type: int) - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + 2 {key} {value} + keys: + 0 key (type: int), value (type: int) + 1 value (type: int), key (type: int) + 2 key (type: int), value (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c @@ -354,12 +395,23 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: int) - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 + keys: + 0 key (type: int), value (type: int) + 1 value (type: int), key (type: int) + 2 key (type: int), value (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a @@ -367,35 +419,37 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: int) - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} - 2 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - nullSafes: [true, false] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int), value (type: int) + 1 value (type: int), key (type: int) + 2 key (type: int), value (type: int) + nullSafes: [true, false] + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 + Statistics: Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -418,14 +472,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -433,55 +486,74 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: int), key (type: int) - sort order: ++ - Map-reduce partition columns: value (type: int), key (type: int) - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + 2 {key} {value} + keys: + 0 key (type: int), value (type: int) + 1 value (type: int), key (type: int) + 2 key (type: int), value (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: int) - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 + keys: + 0 key (type: int), value (type: int) + 1 value (type: int), key (type: int) + 2 key (type: int), value (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: int) - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} - 2 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - nullSafes: [true, true] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int), value (type: int) + 1 value (type: int), key (type: int) + 2 key (type: int), value (type: int) + nullSafes: [true, true] + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -1557,14 +1629,13 @@ POSTHOOK: query: --HIVE-3315 join predicate transitive explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.key is NULL POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1575,12 +1646,21 @@ STAGE PLANS: Filter Operator predicate: value is null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: null (type: void) - sort order: + - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} + keys: + 0 null (type: void) + 1 null (type: void) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -1588,33 +1668,33 @@ STAGE PLANS: Filter Operator predicate: key is null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: null (type: void) - sort order: + + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 {key} + keys: + 0 null (type: void) + 1 null (type: void) + nullSafes: [true] + outputColumnNames: _col1, _col5 + input vertices: + 1 Map 1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col0} - nullSafes: [true] - outputColumnNames: _col1, _col5 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: null (type: void), _col1 (type: int), _col5 (type: int), null (type: void) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: null (type: void), _col1 (type: int), _col5 (type: int), null (type: void) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_rc.q.out ql/src/test/results/clientpositive/spark/join_rc.q.out index 1a587da..bfc807e 100644 --- ql/src/test/results/clientpositive/spark/join_rc.q.out +++ ql/src/test/results/clientpositive/spark/join_rc.q.out @@ -47,14 +47,13 @@ select join_rc1.key, join_rc2.value FROM join_rc1 JOIN join_rc2 ON join_rc1.key = join_rc2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -65,13 +64,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: join_rc1 @@ -79,32 +86,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col6 - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_reorder.q.out ql/src/test/results/clientpositive/spark/join_reorder.q.out index 5ff56fb..5b830fc 100644 --- ql/src/test/results/clientpositive/spark/join_reorder.q.out +++ ql/src/test/results/clientpositive/spark/join_reorder.q.out @@ -53,14 +53,35 @@ POSTHOOK: query: EXPLAIN FROM T1 a JOIN src c ON c.key+1=a.key SELECT a.key, a.val, c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} + keys: + 0 UDFToDouble(key) (type: double) + 1 (key + 1) (type: double) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -71,47 +92,32 @@ STAGE PLANS: Filter Operator predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (key + 1) (type: double) - sort order: + - Map-reduce partition columns: (key + 1) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string), val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} + keys: + 0 UDFToDouble(key) (type: double) + 1 (key + 1) (type: double) + outputColumnNames: _col0, _col1, _col5 + input vertices: + 0 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -126,14 +132,35 @@ POSTHOOK: query: EXPLAIN FROM T1 a JOIN src c ON c.key+1=a.key SELECT /*+ STREAMTABLE(a) */ a.key, a.val, c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} + keys: + 0 UDFToDouble(key) (type: double) + 1 (key + 1) (type: double) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -144,47 +171,32 @@ STAGE PLANS: Filter Operator predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (key + 1) (type: double) - sort order: + - Map-reduce partition columns: (key + 1) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string), val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} + keys: + 0 UDFToDouble(key) (type: double) + 1 (key + 1) (type: double) + outputColumnNames: _col0, _col1, _col5 + input vertices: + 0 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -235,15 +247,14 @@ POSTHOOK: query: EXPLAIN FROM T1 a SELECT a.key, b.key, a.val, c.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -251,69 +262,83 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: val (type: string) - sort order: + - Map-reduce partition columns: val (type: string) - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: string), _col5 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5, _col11 - Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string), _col1 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5 + input vertices: + 1 Map 1 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} + 1 {val} + keys: + 0 _col1 (type: string) + 1 val (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} + 1 {val} + keys: + 0 _col1 (type: string) + 1 val (type: string) + outputColumnNames: _col0, _col1, _col5, _col11 + input vertices: + 0 Map 3 Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col1 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -332,15 +357,14 @@ POSTHOOK: query: EXPLAIN FROM T1 a SELECT /*+ STREAMTABLE(a) */ a.key, b.key, a.val, c.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -348,69 +372,83 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: val (type: string) - sort order: + - Map-reduce partition columns: val (type: string) - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: string), _col5 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5, _col11 - Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string), _col1 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5 + input vertices: + 1 Map 1 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} + 1 {val} + keys: + 0 _col1 (type: string) + 1 val (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} + 1 {val} + keys: + 0 _col1 (type: string) + 1 val (type: string) + outputColumnNames: _col0, _col1, _col5, _col11 + input vertices: + 0 Map 3 Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col1 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_reorder2.q.out ql/src/test/results/clientpositive/spark/join_reorder2.q.out index fc74757..10815e7 100644 --- ql/src/test/results/clientpositive/spark/join_reorder2.q.out +++ ql/src/test/results/clientpositive/spark/join_reorder2.q.out @@ -75,14 +75,13 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T4 d ON c.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -93,13 +92,20 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {key} {val} + 3 {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: b @@ -107,13 +113,20 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + 2 {key} {val} + 3 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: c @@ -121,13 +134,25 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {val} + 3 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: a @@ -135,37 +160,40 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 2 to 3 + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {key} {val} + 3 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 1 Map 2 + 2 Map 3 + 3 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 2 to 3 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - 3 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -207,16 +235,13 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T4 d ON a.key + 1 = d.key + 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -227,13 +252,16 @@ STAGE PLANS: Filter Operator predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: (key + 1) (type: double) - sort order: + - Map-reduce partition columns: (key + 1) (type: double) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string), val (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} + 1 {key} {val} + keys: + 0 (_col0 + 1) (type: double) + 1 (key + 1) (type: double) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: b @@ -241,13 +269,16 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (key + 1) is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: c @@ -255,13 +286,21 @@ STAGE PLANS: Filter Operator predicate: val is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: val (type: string) - sort order: + - Map-reduce partition columns: val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string) - Map 7 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {key} + keys: + 0 _col1 (type: string) + 1 val (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: a @@ -269,65 +308,58 @@ STAGE PLANS: Filter Operator predicate: ((key is not null and val is not null) and (key + 1) is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {VALUE._col6} {VALUE._col10} {VALUE._col11} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} {VALUE._col5} - 1 {VALUE._col0} {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: (_col0 + 1) (type: double) - sort order: + - Map-reduce partition columns: (_col0 + 1) (type: double) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {key} {val} + keys: + 0 _col1 (type: string) + 1 val (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} + 1 {key} {val} + keys: + 0 (_col0 + 1) (type: double) + 1 (key + 1) (type: double) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 1 Map 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_reorder3.q.out ql/src/test/results/clientpositive/spark/join_reorder3.q.out index 65216e3..56cd036 100644 --- ql/src/test/results/clientpositive/spark/join_reorder3.q.out +++ ql/src/test/results/clientpositive/spark/join_reorder3.q.out @@ -75,14 +75,13 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T4 d ON c.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -93,13 +92,20 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {key} {val} + 3 {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: b @@ -107,13 +113,20 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + 2 {key} {val} + 3 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: c @@ -121,13 +134,25 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {val} + 3 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: a @@ -135,37 +160,40 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 2 to 3 + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {key} {val} + 3 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 1 Map 2 + 2 Map 3 + 3 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 2 to 3 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - 3 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -207,16 +235,13 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T4 d ON a.key + 1 = d.key + 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -227,13 +252,16 @@ STAGE PLANS: Filter Operator predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: (key + 1) (type: double) - sort order: + - Map-reduce partition columns: (key + 1) (type: double) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string), val (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} + 1 {key} {val} + keys: + 0 (_col0 + 1) (type: double) + 1 (key + 1) (type: double) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: b @@ -241,13 +269,16 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (key + 1) is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: c @@ -255,13 +286,21 @@ STAGE PLANS: Filter Operator predicate: val is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: val (type: string) - sort order: + - Map-reduce partition columns: val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string) - Map 7 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {key} + keys: + 0 _col1 (type: string) + 1 val (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: a @@ -269,65 +308,58 @@ STAGE PLANS: Filter Operator predicate: ((key is not null and val is not null) and (key + 1) is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {VALUE._col6} {VALUE._col10} {VALUE._col11} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} {VALUE._col5} - 1 {VALUE._col0} {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: (_col0 + 1) (type: double) - sort order: + - Map-reduce partition columns: (_col0 + 1) (type: double) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {key} {val} + keys: + 0 _col1 (type: string) + 1 val (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} + 1 {key} {val} + keys: + 0 (_col0 + 1) (type: double) + 1 (key + 1) (type: double) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 1 Map 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_reorder4.q.out ql/src/test/results/clientpositive/spark/join_reorder4.q.out index d0eabe5..0843a13 100644 --- ql/src/test/results/clientpositive/spark/join_reorder4.q.out +++ ql/src/test/results/clientpositive/spark/join_reorder4.q.out @@ -51,14 +51,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ STREAMTABLE(a) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -69,13 +68,18 @@ STAGE PLANS: Filter Operator predicate: key2 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key2 (type: string) - sort order: + - Map-reduce partition columns: key2 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val2 (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key1} {val1} + 1 {val2} + 2 {key3} {val3} + keys: + 0 key1 (type: string) + 1 key2 (type: string) + 2 key3 (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c @@ -83,13 +87,23 @@ STAGE PLANS: Filter Operator predicate: key3 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key3 (type: string) - sort order: + - Map-reduce partition columns: key3 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val3 (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key1} {val1} + 1 {key2} {val2} + 2 {val3} + keys: + 0 key1 (type: string) + 1 key2 (type: string) + 2 key3 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a @@ -97,35 +111,36 @@ STAGE PLANS: Filter Operator predicate: key1 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key1 (type: string) - sort order: + - Map-reduce partition columns: key1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key1} {val1} + 1 {key2} {val2} + 2 {key3} {val3} + keys: + 0 key1 (type: string) + 1 key2 (type: string) + 2 key3 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -151,14 +166,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ STREAMTABLE(b) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -169,13 +183,18 @@ STAGE PLANS: Filter Operator predicate: key2 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key2 (type: string) - sort order: + - Map-reduce partition columns: key2 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val2 (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key1} {val1} + 1 {val2} + 2 {key3} {val3} + keys: + 0 key1 (type: string) + 1 key2 (type: string) + 2 key3 (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c @@ -183,13 +202,23 @@ STAGE PLANS: Filter Operator predicate: key3 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key3 (type: string) - sort order: + - Map-reduce partition columns: key3 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val3 (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key1} {val1} + 1 {key2} {val2} + 2 {val3} + keys: + 0 key1 (type: string) + 1 key2 (type: string) + 2 key3 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a @@ -197,35 +226,36 @@ STAGE PLANS: Filter Operator predicate: key1 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key1 (type: string) - sort order: + - Map-reduce partition columns: key1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key1} {val1} + 1 {key2} {val2} + 2 {key3} {val3} + keys: + 0 key1 (type: string) + 1 key2 (type: string) + 2 key3 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -251,14 +281,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ STREAMTABLE(c) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -269,13 +298,18 @@ STAGE PLANS: Filter Operator predicate: key2 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key2 (type: string) - sort order: + - Map-reduce partition columns: key2 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val2 (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key1} {val1} + 1 {val2} + 2 {key3} {val3} + keys: + 0 key1 (type: string) + 1 key2 (type: string) + 2 key3 (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c @@ -283,13 +317,23 @@ STAGE PLANS: Filter Operator predicate: key3 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key3 (type: string) - sort order: + - Map-reduce partition columns: key3 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val3 (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key1} {val1} + 1 {key2} {val2} + 2 {val3} + keys: + 0 key1 (type: string) + 1 key2 (type: string) + 2 key3 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a @@ -297,35 +341,36 @@ STAGE PLANS: Filter Operator predicate: key1 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key1 (type: string) - sort order: + - Map-reduce partition columns: key1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key1} {val1} + 1 {key2} {val2} + 2 {key3} {val3} + keys: + 0 key1 (type: string) + 1 key2 (type: string) + 2 key3 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_star.q.out ql/src/test/results/clientpositive/spark/join_star.q.out index 7468211..c93b2e9 100644 --- ql/src/test/results/clientpositive/spark/join_star.q.out +++ ql/src/test/results/clientpositive/spark/join_star.q.out @@ -131,14 +131,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -149,13 +148,21 @@ STAGE PLANS: Filter Operator predicate: f1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f1 (type: int) - sort order: + - Map-reduce partition columns: f1 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: f2 (type: int) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {m1} {m2} + 1 {f2} + keys: + 0 d1 (type: int) + 1 f1 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: fact @@ -163,33 +170,32 @@ STAGE PLANS: Filter Operator predicate: d1 is not null (type: boolean) Statistics: Num rows: 4 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: d1 (type: int) - sort order: + - Map-reduce partition columns: d1 (type: int) - Statistics: Num rows: 4 Data size: 49 Basic stats: COMPLETE Column stats: NONE - value expressions: m1 (type: int), m2 (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col8 - Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} + 1 {f2} + keys: + 0 d1 (type: int) + 1 f1 (type: int) + outputColumnNames: _col0, _col1, _col8 + input vertices: + 1 Map 1 + Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -222,15 +228,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -241,13 +245,16 @@ STAGE PLANS: Filter Operator predicate: f3 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f3 (type: int) - sort order: + - Map-reduce partition columns: f3 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: f4 (type: int) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col8} + 1 {f4} + keys: + 0 _col3 (type: int) + 1 f3 (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: dim1 @@ -255,13 +262,21 @@ STAGE PLANS: Filter Operator predicate: f1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f1 (type: int) - sort order: + - Map-reduce partition columns: f1 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: f2 (type: int) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + keys: + 0 d1 (type: int) + 1 f1 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: fact @@ -269,49 +284,45 @@ STAGE PLANS: Filter Operator predicate: (d1 is not null and d2 is not null) (type: boolean) Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: d1 (type: int) - sort order: + - Map-reduce partition columns: d1 (type: int) - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE - value expressions: m1 (type: int), m2 (type: int), d2 (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col7} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col8, _col13 - Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3, _col8 - Statistics: Num rows: 2 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 2 Data size: 35 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + keys: + 0 d1 (type: int) + 1 f1 (type: int) + outputColumnNames: _col0, _col1, _col3, _col8 + input vertices: + 1 Map 2 + Statistics: Num rows: 2 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col8} + 1 {f4} + keys: + 0 _col3 (type: int) + 1 f3 (type: int) + outputColumnNames: _col0, _col1, _col8, _col13 + input vertices: + 1 Map 1 + Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -346,15 +357,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -365,13 +374,16 @@ STAGE PLANS: Filter Operator predicate: f3 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f3 (type: int) - sort order: + - Map-reduce partition columns: f3 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: f4 (type: int) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col8} + 1 {f4} + keys: + 0 _col8 (type: int) + 1 f3 (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: dim1 @@ -379,13 +391,21 @@ STAGE PLANS: Filter Operator predicate: (f1 is not null and f2 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f1 (type: int) - sort order: + - Map-reduce partition columns: f1 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: f2 (type: int) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {m1} {m2} + 1 {f2} + keys: + 0 d1 (type: int) + 1 f1 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: fact @@ -393,49 +413,45 @@ STAGE PLANS: Filter Operator predicate: d1 is not null (type: boolean) Statistics: Num rows: 4 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: d1 (type: int) - sort order: + - Map-reduce partition columns: d1 (type: int) - Statistics: Num rows: 4 Data size: 49 Basic stats: COMPLETE Column stats: NONE - value expressions: m1 (type: int), m2 (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col8, _col13 - Statistics: Num rows: 4 Data size: 58 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 58 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 58 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col8 - Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col8 (type: int) - sort order: + - Map-reduce partition columns: _col8 (type: int) - Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} + 1 {f2} + keys: + 0 d1 (type: int) + 1 f1 (type: int) + outputColumnNames: _col0, _col1, _col8 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col8} + 1 {f4} + keys: + 0 _col8 (type: int) + 1 f3 (type: int) + outputColumnNames: _col0, _col1, _col8, _col13 + input vertices: + 1 Map 1 + Statistics: Num rows: 4 Data size: 58 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 58 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 58 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -470,15 +486,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -486,71 +500,78 @@ STAGE PLANS: TableScan alias: dim2 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f3 (type: int) - sort order: + - Map-reduce partition columns: f3 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: f4 (type: int) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col8} + 1 {f4} + keys: + 0 _col8 (type: int) + 1 f3 (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: dim1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f1 (type: int) - sort order: + - Map-reduce partition columns: f1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: f2 (type: int) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {m1} {m2} + 1 {f2} + keys: + 0 d1 (type: int) + 1 f1 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: fact Statistics: Num rows: 8 Data size: 98 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: d1 (type: int) - sort order: + - Map-reduce partition columns: d1 (type: int) - Statistics: Num rows: 8 Data size: 98 Basic stats: COMPLETE Column stats: NONE - value expressions: m1 (type: int), m2 (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col8, _col13 - Statistics: Num rows: 8 Data size: 117 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 117 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 117 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col8 - Statistics: Num rows: 8 Data size: 107 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col8 (type: int) - sort order: + - Map-reduce partition columns: _col8 (type: int) - Statistics: Num rows: 8 Data size: 107 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {m1} {m2} + 1 {f2} + keys: + 0 d1 (type: int) + 1 f1 (type: int) + outputColumnNames: _col0, _col1, _col8 + input vertices: + 1 Map 2 + Statistics: Num rows: 8 Data size: 107 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col8} + 1 {f4} + keys: + 0 _col8 (type: int) + 1 f3 (type: int) + outputColumnNames: _col0, _col1, _col8, _col13 + input vertices: + 1 Map 1 + Statistics: Num rows: 8 Data size: 117 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 117 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 117 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -601,19 +622,13 @@ POSTHOOK: query: explain Select m1, m2, f2, f4, f6, f8, f10, f12, f14 Left outer Join dim7 on dim6.f12 = dim7.f13 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 3 <- Map 12 (PARTITION-LEVEL SORT, 1), Map 14 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 9 <- Map 10 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -621,192 +636,208 @@ STAGE PLANS: TableScan alias: dim3 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f5 (type: int) - sort order: + - Map-reduce partition columns: f5 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: f6 (type: int) - Map 10 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col8} {_col13} + 1 {f6} + keys: + 0 _col3 (type: int) + 1 f5 (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan - alias: fact - Statistics: Num rows: 6 Data size: 98 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: d1 (type: int) - sort order: + - Map-reduce partition columns: d1 (type: int) - Statistics: Num rows: 6 Data size: 98 Basic stats: COMPLETE Column stats: NONE - value expressions: m1 (type: int), m2 (type: int), d2 (type: int) - Map 11 + alias: dim2 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col3} {_col8} + 1 {f4} + keys: + 0 _col8 (type: int) + 1 f3 (type: int) + Local Work: + Map Reduce Local Work + Map 3 + Map Operator Tree: + TableScan + alias: dim1 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + keys: + 0 d1 (type: int) + 1 f1 (type: int) + Local Work: + Map Reduce Local Work + Map 5 Map Operator Tree: TableScan alias: dim7 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f13 (type: int) - sort order: + - Map-reduce partition columns: f13 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: f14 (type: int) - Map 12 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col8} {_col13} {_col18} {_col23} {_col28} {_col33} + 1 {f14} + keys: + 0 _col28 (type: int) + 1 f13 (type: int) + Local Work: + Map Reduce Local Work + Map 6 Map Operator Tree: TableScan alias: dim6 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f11 (type: int) - sort order: + - Map-reduce partition columns: f11 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: f12 (type: int) - Map 13 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col8} {_col13} {_col18} + 1 {f8} + 2 {f12} + keys: + 0 _col18 (type: int) + 1 f7 (type: int) + 2 f11 (type: int) + Local Work: + Map Reduce Local Work + Map 7 Map Operator Tree: TableScan alias: dim5 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f9 (type: int) - sort order: + - Map-reduce partition columns: f9 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: f10 (type: int) - Map 14 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col8} {_col13} {_col18} {_col23} {_col28} + 1 {f10} + keys: + 0 _col23 (type: int) + 1 f9 (type: int) + Local Work: + Map Reduce Local Work + Map 8 Map Operator Tree: TableScan alias: dim4 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f7 (type: int) - sort order: + - Map-reduce partition columns: f7 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: f8 (type: int) - Map 6 - Map Operator Tree: - TableScan - alias: dim2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f3 (type: int) - sort order: + - Map-reduce partition columns: f3 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: f4 (type: int) - Map 8 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col8} {_col13} {_col18} + 1 {f8} + 2 {f12} + keys: + 0 _col18 (type: int) + 1 f7 (type: int) + 2 f11 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: dim1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f1 (type: int) - sort order: + - Map-reduce partition columns: f1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: f2 (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col7} {VALUE._col12} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col8, _col13, _col18 - Statistics: Num rows: 6 Data size: 128 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col18 (type: int) - sort order: + - Map-reduce partition columns: _col18 (type: int) - Statistics: Num rows: 6 Data size: 128 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join0 to 2 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col8} {VALUE._col13} {KEY.reducesinkkey0} - 1 {VALUE._col0} - 2 {VALUE._col0} - outputColumnNames: _col0, _col1, _col8, _col13, _col18, _col23, _col28 - Statistics: Num rows: 13 Data size: 281 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col23 (type: int) - sort order: + - Map-reduce partition columns: _col23 (type: int) - Statistics: Num rows: 13 Data size: 281 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int), _col18 (type: int), _col28 (type: int) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col8} {VALUE._col13} {VALUE._col18} {KEY.reducesinkkey0} {VALUE._col27} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col8, _col13, _col18, _col23, _col28, _col33 - Statistics: Num rows: 14 Data size: 309 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col28 (type: int) - sort order: + - Map-reduce partition columns: _col28 (type: int) - Statistics: Num rows: 14 Data size: 309 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int), _col18 (type: int), _col23 (type: int), _col33 (type: int) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col8} {VALUE._col13} {VALUE._col18} {VALUE._col23} {KEY.reducesinkkey0} {VALUE._col32} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col8, _col13, _col18, _col23, _col28, _col33, _col38 - Statistics: Num rows: 15 Data size: 339 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int), _col18 (type: int), _col23 (type: int), _col33 (type: int), _col28 (type: int), _col38 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 15 Data size: 339 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 339 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col3} {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3, _col8, _col13 - Statistics: Num rows: 6 Data size: 117 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 6 Data size: 117 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) - Reducer 9 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3, _col8 - Statistics: Num rows: 6 Data size: 107 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col8 (type: int) - sort order: + - Map-reduce partition columns: _col8 (type: int) - Statistics: Num rows: 6 Data size: 107 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) + alias: fact + Statistics: Num rows: 6 Data size: 98 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + keys: + 0 d1 (type: int) + 1 f1 (type: int) + outputColumnNames: _col0, _col1, _col3, _col8 + input vertices: + 1 Map 3 + Statistics: Num rows: 6 Data size: 107 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col3} {_col8} + 1 {f4} + keys: + 0 _col8 (type: int) + 1 f3 (type: int) + outputColumnNames: _col0, _col1, _col3, _col8, _col13 + input vertices: + 1 Map 2 + Statistics: Num rows: 6 Data size: 117 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col8} {_col13} + 1 {f6} + keys: + 0 _col3 (type: int) + 1 f5 (type: int) + outputColumnNames: _col0, _col1, _col8, _col13, _col18 + input vertices: + 1 Map 1 + Statistics: Num rows: 6 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {_col0} {_col1} {_col8} {_col13} {_col18} + 1 {f8} + 2 {f12} + keys: + 0 _col18 (type: int) + 1 f7 (type: int) + 2 f11 (type: int) + outputColumnNames: _col0, _col1, _col8, _col13, _col18, _col23, _col28 + input vertices: + 1 Map 8 + 2 Map 6 + Statistics: Num rows: 13 Data size: 281 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col8} {_col13} {_col18} {_col23} {_col28} + 1 {f10} + keys: + 0 _col23 (type: int) + 1 f9 (type: int) + outputColumnNames: _col0, _col1, _col8, _col13, _col18, _col23, _col28, _col33 + input vertices: + 1 Map 7 + Statistics: Num rows: 14 Data size: 309 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col8} {_col13} {_col18} {_col23} {_col28} {_col33} + 1 {f14} + keys: + 0 _col28 (type: int) + 1 f13 (type: int) + outputColumnNames: _col0, _col1, _col8, _col13, _col18, _col23, _col28, _col33, _col38 + input vertices: + 1 Map 5 + Statistics: Num rows: 15 Data size: 339 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int), _col18 (type: int), _col23 (type: int), _col33 (type: int), _col28 (type: int), _col38 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 15 Data size: 339 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 339 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_thrift.q.out ql/src/test/results/clientpositive/spark/join_thrift.q.out index 692e2ed..b349103 100644 --- ql/src/test/results/clientpositive/spark/join_thrift.q.out +++ ql/src/test/results/clientpositive/spark/join_thrift.q.out @@ -31,14 +31,13 @@ JOIN src_thrift s2 ON s1.aint = s2.aint POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -49,13 +48,21 @@ STAGE PLANS: Filter Operator predicate: aint is not null (type: boolean) Statistics: Num rows: 6 Data size: 1674 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: aint (type: int) - sort order: + - Map-reduce partition columns: aint (type: int) - Statistics: Num rows: 6 Data size: 1674 Basic stats: COMPLETE Column stats: NONE - value expressions: lintstring (type: array>) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {aint} + 1 {lintstring} + keys: + 0 aint (type: int) + 1 aint (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: s1 @@ -63,32 +70,32 @@ STAGE PLANS: Filter Operator predicate: aint is not null (type: boolean) Statistics: Num rows: 6 Data size: 1674 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: aint (type: int) - sort order: + - Map-reduce partition columns: aint (type: int) - Statistics: Num rows: 6 Data size: 1674 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col3} - outputColumnNames: _col0, _col17 - Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col17 (type: array>) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {aint} + 1 {lintstring} + keys: + 0 aint (type: int) + 1 aint (type: int) + outputColumnNames: _col0, _col17 + input vertices: + 1 Map 1 + Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col17 (type: array>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_vc.q.out ql/src/test/results/clientpositive/spark/join_vc.q.out index 93d4aa2..ac9abfb 100644 --- ql/src/test/results/clientpositive/spark/join_vc.q.out +++ ql/src/test/results/clientpositive/spark/join_vc.q.out @@ -11,16 +11,13 @@ POSTHOOK: query: -- see HIVE-4033 earlier a flag named hasVC was not initialized explain select t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value from src t1 join src t2 on t1.key = t2.key join src t3 on t2.value = t3.value order by t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value limit 3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -31,13 +28,16 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {key} {BLOCK__OFFSET__INSIDE__FILE} + keys: + 0 _col6 (type: string) + 1 value (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: t2 @@ -45,13 +45,23 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: t1 @@ -59,30 +69,43 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} - outputColumnNames: _col10, _col11, _col12 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col12 (type: bigint), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: string) - sort order: +++ - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col6 + input vertices: + 1 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {key} {value} {BLOCK__OFFSET__INSIDE__FILE} + keys: + 0 _col6 (type: string) + 1 value (type: string) + outputColumnNames: _col10, _col11, _col12 + input vertices: + 1 Map 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: bigint), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) @@ -98,21 +121,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {VALUE._col0} - outputColumnNames: _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: string) - sort order: + - Map-reduce partition columns: _col6 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -140,14 +148,13 @@ select t2.BLOCK__OFFSET__INSIDE__FILE from src t1 join src t2 on t1.key = t2.key where t1.key < 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -158,13 +165,21 @@ STAGE PLANS: Filter Operator predicate: (key < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: BLOCK__OFFSET__INSIDE__FILE (type: bigint) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {BLOCK__OFFSET__INSIDE__FILE} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: t1 @@ -172,32 +187,32 @@ STAGE PLANS: Filter Operator predicate: (key < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {VALUE._col1} - outputColumnNames: _col7 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col7 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {BLOCK__OFFSET__INSIDE__FILE} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col7 + input vertices: + 1 Map 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_view.q.out ql/src/test/results/clientpositive/spark/join_view.q.out index 88ed110..9915f33 100644 --- ql/src/test/results/clientpositive/spark/join_view.q.out +++ ql/src/test/results/clientpositive/spark/join_view.q.out @@ -50,32 +50,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Map 2 <- Map 1 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 - Map 3 - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col1} - 1 {VALUE._col0} - outputColumnNames: _col1, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col6 (type: int), '2011-09-01' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/limit_pushdown.q.out ql/src/test/results/clientpositive/spark/limit_pushdown.q.out index f086f9a..5279d60 100644 --- ql/src/test/results/clientpositive/spark/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/spark/limit_pushdown.q.out @@ -806,17 +806,15 @@ join on subq.key=subq2.key limit 4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 6 <- Reducer 5 (GROUP, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -841,29 +839,9 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: bigint) Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -881,37 +859,44 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (GROUP, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 4 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -931,7 +916,9 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col0 (type: string), _col1 (type: bigint) - Reducer 6 + Reducer 5 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint) @@ -943,12 +930,33 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 4 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out index e40e2c0..584b5c9 100644 --- ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out +++ ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out @@ -105,14 +105,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -125,14 +124,16 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -231,7 +232,12 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [b] /srcpart/ds=2008-04-08/hr=12 [b] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -241,14 +247,51 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col5 > 15) and (_col5 < 25)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -298,46 +341,6 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col5 > 15) and (_col5 < 25)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -486,14 +489,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -506,14 +508,21 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + filter mappings: + 0 [1, 1] + filter predicates: + 0 {(ds = '2008-04-08')} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -563,7 +572,12 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [b] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -573,14 +587,56 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string), ds (type: string) - auto parallelism: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + filter mappings: + 0 [1, 1] + filter predicates: + 0 {(ds = '2008-04-08')} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col7, _col8 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col7 > 15) and (_col7 < 25)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -773,51 +829,6 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [a] /srcpart/ds=2008-04-09/hr=11 [a] /srcpart/ds=2008-04-09/hr=12 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - filter mappings: - 0 [1, 1] - filter predicates: - 0 {(VALUE._col1 = '2008-04-08')} - 1 - outputColumnNames: _col0, _col1, _col7, _col8 - Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col7 > 15) and (_col7 < 25)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -970,14 +981,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -990,14 +1000,16 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} {ds} + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1190,7 +1202,12 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [b] /srcpart/ds=2008-04-09/hr=11 [b] /srcpart/ds=2008-04-09/hr=12 [b] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -1200,14 +1217,51 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} {ds} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col7 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (((_col5 > 15) and (_col5 < 25)) and (_col7 = '2008-04-08')) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1257,46 +1311,6 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col5, _col6, _col7 - Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: (((_col5 > 15) and (_col5 < 25)) and (_col7 = '2008-04-08')) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1449,14 +1463,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1469,14 +1482,16 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1526,7 +1541,12 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [b] - Map 3 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -1536,14 +1556,51 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col7, _col8 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col7 > 15) and (_col7 < 25)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1642,46 +1699,6 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [a] /srcpart/ds=2008-04-08/hr=12 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col7, _col8 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col7 > 15) and (_col7 < 25)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/mapjoin1.q.out ql/src/test/results/clientpositive/spark/mapjoin1.q.out index 2e9fdc9..366d834 100644 --- ql/src/test/results/clientpositive/spark/mapjoin1.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin1.q.out @@ -30,65 +30,74 @@ EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND true limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + filter predicates: + 0 + 1 {true} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 {true} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + filter predicates: + 0 + 1 {true} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -104,16 +113,16 @@ POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -10 val_10 10 val_10 +238 val_238 238 val_238 +238 val_238 238 val_238 +86 val_86 86 val_86 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +27 val_27 27 val_27 +165 val_165 165 val_165 +165 val_165 165 val_165 +409 val_409 409 val_409 PREHOOK: query: -- func filter on outer join EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND b.key * 10 < '1000' limit 10 @@ -123,65 +132,74 @@ EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND b.key * 10 < '1000' limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + filter predicates: + 0 + 1 {((key * 10) < '1000')} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 {((KEY.reducesinkkey0 * 10) < '1000')} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + filter predicates: + 0 + 1 {((key * 10) < '1000')} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -197,16 +215,16 @@ POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -10 val_10 10 val_10 +NULL NULL 238 val_238 +86 val_86 86 val_86 +NULL NULL 311 val_311 +27 val_27 27 val_27 +NULL NULL 165 val_165 +NULL NULL 409 val_409 +NULL NULL 255 val_255 +NULL NULL 278 val_278 +98 val_98 98 val_98 +98 val_98 98 val_98 PREHOOK: query: -- field filter on outer join EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN @@ -218,14 +236,13 @@ SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN (select key, named_struct('key', key, 'value', value) as kv from src) b on a.key=b.key AND b.kv.key > 200 limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -233,13 +250,24 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {_col0} {_col1} + filter predicates: + 0 + 1 {(_col1.key > 200)} + keys: + 0 key (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src @@ -248,39 +276,38 @@ STAGE PLANS: expressions: key (type: string), named_struct('key',key,'value',value) (type: struct) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 {(VALUE._col0.key > 200)} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {_col0} {_col1} + filter predicates: + 0 + 1 {(_col1.key > 200)} + keys: + 0 key (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -298,16 +325,16 @@ POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -NULL NULL 0 {"key":"0","value":"val_0"} -NULL NULL 0 {"key":"0","value":"val_0"} -NULL NULL 0 {"key":"0","value":"val_0"} -NULL NULL 10 {"key":"10","value":"val_10"} -NULL NULL 100 {"key":"100","value":"val_100"} -NULL NULL 100 {"key":"100","value":"val_100"} -NULL NULL 103 {"key":"103","value":"val_103"} -NULL NULL 103 {"key":"103","value":"val_103"} -NULL NULL 104 {"key":"104","value":"val_104"} -NULL NULL 104 {"key":"104","value":"val_104"} +238 val_238 238 {"key":"238","value":"val_238"} +238 val_238 238 {"key":"238","value":"val_238"} +NULL NULL 86 {"key":"86","value":"val_86"} +311 val_311 311 {"key":"311","value":"val_311"} +311 val_311 311 {"key":"311","value":"val_311"} +311 val_311 311 {"key":"311","value":"val_311"} +NULL NULL 27 {"key":"27","value":"val_27"} +NULL NULL 165 {"key":"165","value":"val_165"} +409 val_409 409 {"key":"409","value":"val_409"} +409 val_409 409 {"key":"409","value":"val_409"} PREHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND true limit 10 PREHOOK: type: QUERY @@ -315,62 +342,68 @@ POSTHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND true limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -386,16 +419,16 @@ POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -10 val_10 10 val_10 +238 val_238 238 val_238 +238 val_238 238 val_238 +86 val_86 86 val_86 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +27 val_27 27 val_27 +165 val_165 165 val_165 +165 val_165 165 val_165 +409 val_409 409 val_409 PREHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND b.key * 10 < '1000' limit 10 PREHOOK: type: QUERY @@ -403,68 +436,74 @@ POSTHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND b.key * 10 < '1000' limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key * 10) < '1000') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key * 10) < '1000') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -480,16 +519,16 @@ POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -10 val_10 10 val_10 +86 val_86 86 val_86 +27 val_27 27 val_27 +98 val_98 98 val_98 +98 val_98 98 val_98 +66 val_66 66 val_66 +37 val_37 37 val_37 +37 val_37 37 val_37 +15 val_15 15 val_15 +15 val_15 15 val_15 +82 val_82 82 val_82 PREHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN (select key, named_struct('key', key, 'value', value) as kv from src) b on a.key=b.key AND b.kv.key > 200 limit 10 @@ -499,14 +538,13 @@ SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN (select key, named_struct('key', key, 'value', value) as kv from src) b on a.key=b.key AND b.kv.key > 200 limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -514,13 +552,21 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {_col0} {_col1} + keys: + 0 key (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src @@ -532,36 +578,35 @@ STAGE PLANS: Filter Operator predicate: (_col1.key > 200) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {_col0} {_col1} + keys: + 0 key (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -579,13 +624,13 @@ POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -201 val_201 201 {"key":"201","value":"val_201"} -202 val_202 202 {"key":"202","value":"val_202"} -203 val_203 203 {"key":"203","value":"val_203"} -203 val_203 203 {"key":"203","value":"val_203"} -203 val_203 203 {"key":"203","value":"val_203"} -203 val_203 203 {"key":"203","value":"val_203"} -205 val_205 205 {"key":"205","value":"val_205"} -205 val_205 205 {"key":"205","value":"val_205"} -205 val_205 205 {"key":"205","value":"val_205"} -205 val_205 205 {"key":"205","value":"val_205"} +238 val_238 238 {"key":"238","value":"val_238"} +238 val_238 238 {"key":"238","value":"val_238"} +311 val_311 311 {"key":"311","value":"val_311"} +311 val_311 311 {"key":"311","value":"val_311"} +311 val_311 311 {"key":"311","value":"val_311"} +409 val_409 409 {"key":"409","value":"val_409"} +409 val_409 409 {"key":"409","value":"val_409"} +409 val_409 409 {"key":"409","value":"val_409"} +255 val_255 255 {"key":"255","value":"val_255"} +255 val_255 255 {"key":"255","value":"val_255"} diff --git ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out index 84819c7..cb3969f 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out @@ -83,14 +83,13 @@ POSTHOOK: query: explain select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -101,12 +100,21 @@ STAGE PLANS: Filter Operator predicate: dec is not null (type: boolean) Statistics: Num rows: 525 Data size: 58800 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: dec (type: decimal(4,0)) - sort order: + - Map-reduce partition columns: dec (type: decimal(4,0)) - Statistics: Num rows: 525 Data size: 58800 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {dec} + 1 + keys: + 0 dec (type: decimal(4,2)) + 1 dec (type: decimal(4,0)) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: t1 @@ -114,32 +122,32 @@ STAGE PLANS: Filter Operator predicate: dec is not null (type: boolean) Statistics: Num rows: 525 Data size: 58800 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: dec (type: decimal(4,2)) - sort order: + - Map-reduce partition columns: dec (type: decimal(4,2)) - Statistics: Num rows: 525 Data size: 58800 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col4 - Statistics: Num rows: 577 Data size: 64680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: decimal(4,2)), _col4 (type: decimal(4,0)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 577 Data size: 64680 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 577 Data size: 64680 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {dec} + 1 {dec} + keys: + 0 dec (type: decimal(4,2)) + 1 dec (type: decimal(4,0)) + outputColumnNames: _col0, _col4 + input vertices: + 1 Map 1 + Statistics: Num rows: 577 Data size: 64680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: decimal(4,2)), _col4 (type: decimal(4,0)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 577 Data size: 64680 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 577 Data size: 64680 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out index 8813ccd..a5cc439 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out @@ -11,16 +11,13 @@ ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -31,12 +28,24 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: c @@ -44,36 +53,35 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator @@ -152,15 +160,13 @@ ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -171,12 +177,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: c @@ -184,36 +201,35 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator @@ -280,16 +296,13 @@ ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -300,12 +313,24 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: c @@ -313,31 +338,30 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator @@ -416,15 +440,13 @@ ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -435,12 +457,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: c @@ -448,31 +481,30 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out index cee0475..d32acf8 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out @@ -53,15 +53,13 @@ SELECT /*+ mapjoin(src1, src2) */ * FROM src1 SORT BY src1.key, src2.key, src3.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,65 +70,91 @@ STAGE PLANS: Filter Operator predicate: (key < 300) (type: boolean) Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 300) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key < 300) and (key < 10)) (type: boolean) Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 {(KEY.reducesinkkey0 > 10)} - 2 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) - sort order: +++ - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 300) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 1 Map 1 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + sort order: +++ + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -202,15 +226,13 @@ SELECT * FROM src1 SORT BY src1.key, src2.key, src3.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -221,65 +243,91 @@ STAGE PLANS: Filter Operator predicate: (key < 300) (type: boolean) Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 300) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key < 300) and (key < 10)) (type: boolean) Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 {(KEY.reducesinkkey0 > 10)} - 2 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) - sort order: +++ - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 300) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 1 Map 1 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + sort order: +++ + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out index c475f2a..8b2f71b 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out @@ -13,6 +13,7 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest1 [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 +RUN: Stage-4:MAPRED RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE @@ -23,6 +24,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest1 [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 +RUN: Stage-4:MAPRED RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE @@ -39,6 +41,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1 [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 +RUN: Stage-4:MAPRED RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE @@ -49,6 +52,8 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest1 [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 +RUN: Stage-5:MAPRED +RUN: Stage-4:MAPRED RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE diff --git ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out index 26050f7..9d26f52 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out @@ -56,46 +56,43 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: key (type: string) - auto parallelism: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -103,45 +100,68 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src1 numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [src1] + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 value (type: string) + 1 value (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -149,13 +169,11 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -165,28 +183,106 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value is not null and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 value (type: string) + 1 value (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Position of Big Table: 0 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 hr 11 properties: COLUMN_STATS_ACCURATE true @@ -232,7 +328,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 hr 12 properties: COLUMN_STATS_ACCURATE true @@ -272,36 +368,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - /srcpart/ds=2008-04-09/hr=11 [srcpart] - /srcpart/ds=2008-04-09/hr=12 [srcpart] - Map 4 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -309,65 +383,45 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 - Truncated Path -> Alias: - /src1 [src1] - Map 5 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### Partition - base file name: src + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -375,11 +429,13 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -389,80 +445,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [src] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false - Reducer 3 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + /srcpart/ds=2008-04-09/hr=11 [srcpart] + /srcpart/ds=2008-04-09/hr=12 [srcpart] Stage: Stage-0 Fetch Operator @@ -477,32 +479,16 @@ POSTHOOK: query: explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value is not null and key is not null) and (value > 'val_450')) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - Map 4 + Map 2 Map Operator Tree: TableScan alias: src1 @@ -510,12 +496,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: src @@ -523,47 +513,67 @@ STAGE PLANS: Filter Operator predicate: (value > 'val_450') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 value (type: string) + 1 value (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value is not null and key is not null) and (value > 'val_450')) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 value (type: string) + 1 value (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -578,33 +588,16 @@ POSTHOOK: query: explain select count(*) from srcpart join src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (value is not null and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), ds (type: string) - Map 5 + Map 3 Map Operator Tree: TableScan alias: src1 @@ -612,12 +605,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: src @@ -625,55 +622,76 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {ds} + 1 + keys: + 0 value (type: string) + 1 value (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value is not null and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {ds} + 1 + keys: + 0 value (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col2 + input vertices: + 1 Map 3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string) + outputColumnNames: _col2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 - outputColumnNames: _col0, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col1} - 1 - outputColumnNames: _col2 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string) - outputColumnNames: _col2 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 4 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out index 50ba5f1..d40fa22 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out @@ -29,14 +29,13 @@ select src1.key as k1, src1.value as v1, src2.key, src2.value from src0 src1 inner join src0 src2 on src1.key = src2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -47,13 +46,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -61,33 +68,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out index 46e6be4..b21498f 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out @@ -19,15 +19,13 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -38,78 +36,88 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col5 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0 + input vertices: + 0 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -261,15 +269,13 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -280,78 +286,88 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col5 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0 + input vertices: + 0 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out index 69cbd58..1b5fae7 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out @@ -83,15 +83,13 @@ FROM JOIN z ON (subq.key1 = z.id) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -102,81 +100,88 @@ STAGE PLANS: Filter Operator predicate: id is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: id (type: int) - sort order: + - Map-reduce partition columns: id (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: name (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {id} {name} + 1 {name} + keys: + 0 id (type: int) + 1 id (type: int) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 0 Data size: 6 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: id is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: id (type: int) - sort order: + - Map-reduce partition columns: id (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: name (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} + 1 {name} + keys: + 0 _col0 (type: int) + 1 id (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 0 Data size: 6 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: id is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: id (type: int) - sort order: + - Map-reduce partition columns: id (type: int) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {id} {name} + 1 {name} {id} + keys: + 0 id (type: int) + 1 id (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: name (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col5 (type: string), _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col6 (type: int), _col5 (type: string), _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} + 1 {id} {name} + keys: + 0 _col0 (type: int) + 1 id (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out index 7abce67..65eb41b 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out @@ -244,71 +244,87 @@ SELECT /*+ mapjoin(src1, src2) */ * FROM src1 SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src2 - Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 2 Map 3 Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -1070,71 +1086,87 @@ SELECT * FROM src1 SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src2 - Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 2 Map 3 Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) diff --git ql/src/test/results/clientpositive/spark/mergejoins.q.out ql/src/test/results/clientpositive/spark/mergejoins.q.out index 8ccf70a..0fb26e4 100644 --- ql/src/test/results/clientpositive/spark/mergejoins.q.out +++ ql/src/test/results/clientpositive/spark/mergejoins.q.out @@ -43,15 +43,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from a join b on a.val1=b.val1 join c on a.val1=c.val1 join d on a.val1=d.val1 join e on a.val2=e.val2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -62,13 +60,20 @@ STAGE PLANS: Filter Operator predicate: val1 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: val1 (type: int) - sort order: + - Map-reduce partition columns: val1 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val2 (type: int) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {val1} {val2} + 1 {val1} {val2} + 2 {val1} {val2} + 3 {val2} + keys: + 0 val1 (type: int) + 1 val1 (type: int) + 2 val1 (type: int) + 3 val1 (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: e @@ -76,13 +81,16 @@ STAGE PLANS: Filter Operator predicate: val2 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: val2 (type: int) - sort order: + - Map-reduce partition columns: val2 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val1 (type: int) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} {_col15} {_col16} + 1 {val1} + keys: + 0 _col1 (type: int) + 1 val2 (type: int) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: b @@ -90,13 +98,20 @@ STAGE PLANS: Filter Operator predicate: val1 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: val1 (type: int) - sort order: + - Map-reduce partition columns: val1 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val2 (type: int) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {val1} {val2} + 1 {val2} + 2 {val1} {val2} + 3 {val1} {val2} + keys: + 0 val1 (type: int) + 1 val1 (type: int) + 2 val1 (type: int) + 3 val1 (type: int) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: c @@ -104,13 +119,25 @@ STAGE PLANS: Filter Operator predicate: val1 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: val1 (type: int) - sort order: + - Map-reduce partition columns: val1 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val2 (type: int) - Map 7 + Spark HashTable Sink Operator + condition expressions: + 0 {val1} {val2} + 1 {val1} {val2} + 2 {val2} + 3 {val1} {val2} + keys: + 0 val1 (type: int) + 1 val1 (type: int) + 2 val1 (type: int) + 3 val1 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan alias: a @@ -118,53 +145,53 @@ STAGE PLANS: Filter Operator predicate: (val1 is not null and val2 is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: val1 (type: int) - sort order: + - Map-reduce partition columns: val1 (type: int) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {val1} {val2} + 1 {val1} {val2} + 2 {val1} {val2} + 3 {val1} {val2} + keys: + 0 val1 (type: int) + 1 val1 (type: int) + 2 val1 (type: int) + 3 val1 (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 1 Map 3 + 2 Map 4 + 3 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val2 (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - 3 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} {VALUE._col14} {VALUE._col15} - 1 {VALUE._col0} {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16, _col20, _col21 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int), _col20 (type: int), _col21 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} {_col15} {_col16} + 1 {val1} {val2} + keys: + 0 _col1 (type: int) + 1 val2 (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16, _col20, _col21 + input vertices: + 1 Map 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int), _col20 (type: int), _col21 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -179,14 +206,13 @@ POSTHOOK: query: --HIVE-3070 filter on outer join condition removed while mergin explain select * from src a join src b on a.key=b.key left outer join src c on b.key=c.key and b.key<10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -194,61 +220,85 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key < 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + filter predicates: + 0 + 1 {(key < 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - filter predicates: - 0 - 1 {(KEY.reducesinkkey0 < 10)} - 2 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key < 10)} + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out index 6cce868..fafce96 100644 --- ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out +++ ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out @@ -19,14 +19,13 @@ explain select * from a join a b on (a.key=b.key) left outer join a c on (b.key=c.key) left outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -34,70 +33,99 @@ STAGE PLANS: TableScan alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + 3 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + 3 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + 3 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join1 to 2 + Left Outer Join0 to 3 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + 3 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 1 Map 2 + 2 Map 3 + 3 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join1 to 2 - Left Outer Join0 to 3 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - 3 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -112,85 +140,113 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) left outer join a c on (b.key=c.key) right outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: d + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + 3 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + 3 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: c + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + 3 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join1 to 2 - Right Outer Join0 to 3 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - 3 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join1 to 2 + Right Outer Join0 to 3 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + 3 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 0 Map 4 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -205,14 +261,13 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.key=c.key) left outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -220,70 +275,99 @@ STAGE PLANS: TableScan alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + 3 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + 3 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: c + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + 3 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join1 to 2 - Left Outer Join0 to 3 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - 3 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join1 to 2 + Left Outer Join0 to 3 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + 3 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 0 Map 4 + 1 Map 2 + 3 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -298,85 +382,113 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.key=c.key) right outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: d + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + 3 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + 3 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: c + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + 3 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join1 to 2 - Right Outer Join0 to 3 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - 3 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join1 to 2 + Right Outer Join0 to 3 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + 3 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 0 Map 4 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -393,100 +505,114 @@ explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) left outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 4 - Map Operator Tree: - TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} + 1 {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: string) - sort order: + - Map-reduce partition columns: _col6 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col10 (type: string), _col11 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} {VALUE._col9} {VALUE._col10} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col15 (type: string), _col16 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 2 + 2 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} + 1 {key} {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 1 Map 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col15 (type: string), _col16 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -501,100 +627,120 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) right outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: d + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col10} {_col11} + 1 {key} {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: string) - sort order: + - Map-reduce partition columns: _col6 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col10 (type: string), _col11 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} {VALUE._col9} {VALUE._col10} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col15 (type: string), _col16 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} + 1 {key} {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 0 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col15 (type: string), _col16 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -719,30 +865,17 @@ explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) right outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: d - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: b @@ -750,24 +883,35 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 7 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: a @@ -775,65 +919,77 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {key} {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} {_col5} {_col6} {_col10} {_col11} + 1 {key} {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} + 1 {key} {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 0 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: string) - sort order: + - Map-reduce partition columns: _col6 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -848,29 +1004,15 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) full outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: d - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: Map 3 Map Operator Tree: TableScan @@ -879,24 +1021,48 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - Map 7 + Map 5 Map Operator Tree: TableScan alias: a @@ -904,12 +1070,40 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {key} {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: Join Operator @@ -931,38 +1125,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: string) - sort order: + - Map-reduce partition columns: _col6 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) Stage: Stage-0 Fetch Operator @@ -977,30 +1139,17 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) left outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: d - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: b @@ -1008,24 +1157,35 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 7 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: a @@ -1033,65 +1193,77 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} + 1 {key} {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {key} {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: string) - sort order: + - Map-reduce partition columns: _col6 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} + 1 {key} {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 1 Map 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -1106,47 +1278,79 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) full outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: d + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} + 1 {key} {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -1154,20 +1358,32 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - Map 7 + Map 4 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {key} {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 5 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: key (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: key (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: Join Operator @@ -1189,38 +1405,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: string) - sort order: + - Map-reduce partition columns: _col6 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) Stage: Stage-0 Fetch Operator @@ -1237,15 +1421,13 @@ explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) left outer join a d on (c.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1253,13 +1435,18 @@ STAGE PLANS: TableScan alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {key} {value} + 2 {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: b @@ -1267,24 +1454,37 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {value} + 2 {key} {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: a @@ -1292,51 +1492,49 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: string) - sort order: + - Map-reduce partition columns: _col6 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {key} {value} + 2 {key} {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 1 Map 3 + 2 Map 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/multi_join_union.q.out ql/src/test/results/clientpositive/spark/multi_join_union.q.out index ddfa77c..dd85f4c 100644 --- ql/src/test/results/clientpositive/spark/multi_join_union.q.out +++ ql/src/test/results/clientpositive/spark/multi_join_union.q.out @@ -49,16 +49,13 @@ src12 b ON (a.key = b.key) JOIN (SELECT * FROM (SELECT * FROM src13 UNION ALL SELECT * FROM src14)a )c ON c.value = b.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Union 5 <- Map 4 (NONE, 0), Map 7 (NONE, 0) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Union 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -69,13 +66,24 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 5 (PARTITION-LEVEL SORT, 1), Union 3 (PARTITION-LEVEL SORT, 1) + Union 3 <- Map 2 (NONE, 0), Map 6 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src14 @@ -92,7 +100,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: string) - Map 6 + Map 5 Map Operator Tree: TableScan alias: a @@ -100,13 +108,28 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Local Work: + Map Reduce Local Work + Map 6 Map Operator Tree: TableScan alias: src13 @@ -123,23 +146,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: string) - sort order: + - Map-reduce partition columns: _col6 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - Reducer 3 + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -160,8 +167,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 5 - Vertex: Union 5 + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/orc_analyze.q.out ql/src/test/results/clientpositive/spark/orc_analyze.q.out index e718b29..b422db5 100644 --- ql/src/test/results/clientpositive/spark/orc_analyze.q.out +++ ql/src/test/results/clientpositive/spark/orc_analyze.q.out @@ -107,7 +107,7 @@ Table Parameters: numFiles 1 numRows 100 rawDataSize 52600 - totalSize 3121 + totalSize 3123 #### A masked pattern was here #### # Storage Information @@ -197,7 +197,7 @@ Table Parameters: numFiles 1 numRows 100 rawDataSize 52600 - totalSize 3121 + totalSize 3123 #### A masked pattern was here #### # Storage Information @@ -313,7 +313,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 21950 - totalSize 2024 + totalSize 2026 #### A masked pattern was here #### # Storage Information @@ -358,7 +358,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 22050 - totalSize 2043 + totalSize 2045 #### A masked pattern was here #### # Storage Information @@ -460,7 +460,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 21950 - totalSize 2024 + totalSize 2026 #### A masked pattern was here #### # Storage Information @@ -505,7 +505,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 22050 - totalSize 2043 + totalSize 2045 #### A masked pattern was here #### # Storage Information @@ -627,7 +627,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 21950 - totalSize 2024 + totalSize 2026 #### A masked pattern was here #### # Storage Information @@ -672,7 +672,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 22050 - totalSize 2043 + totalSize 2045 #### A masked pattern was here #### # Storage Information @@ -780,7 +780,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 21950 - totalSize 2024 + totalSize 2026 #### A masked pattern was here #### # Storage Information @@ -825,7 +825,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 22050 - totalSize 2043 + totalSize 2045 #### A masked pattern was here #### # Storage Information @@ -992,7 +992,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 21950 - totalSize 2024 + totalSize 2026 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/spark/parallel_join0.q.out ql/src/test/results/clientpositive/spark/parallel_join0.q.out index 2231b38..e2df0c2 100644 --- ql/src/test/results/clientpositive/spark/parallel_join0.q.out +++ ql/src/test/results/clientpositive/spark/parallel_join0.q.out @@ -19,15 +19,13 @@ SELECT src1.key as k1, src1.value as v1, SORT BY k1, v1, k2, v2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -42,11 +40,23 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 4) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src @@ -58,28 +68,29 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/parallel_join1.q.out ql/src/test/results/clientpositive/spark/parallel_join1.q.out index 6740c7f..d636635 100644 --- ql/src/test/results/clientpositive/spark/parallel_join1.q.out +++ ql/src/test/results/clientpositive/spark/parallel_join1.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: -- SORT_BEFORE_DIFF +PREHOOK: query: -- SORT_QUERY_RESULTS CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest_j1 -POSTHOOK: query: -- SORT_BEFORE_DIFF +POSTHOOK: query: -- SORT_QUERY_RESULTS CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE @@ -19,16 +19,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -39,13 +38,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -53,33 +60,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection @@ -117,15 +124,63 @@ POSTHOOK: query: SELECT dest_j1.* FROM dest_j1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 #### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 10 val_10 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 113 val_113 113 val_113 113 val_113 113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +12 val_12 +12 val_12 120 val_120 120 val_120 120 val_120 120 val_120 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +126 val_126 128 val_128 128 val_128 128 val_128 @@ -135,23 +190,119 @@ POSTHOOK: Input: default@dest_j1 128 val_128 128 val_128 128 val_128 +129 val_129 +129 val_129 +129 val_129 +129 val_129 131 val_131 +133 val_133 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 146 val_146 146 val_146 146 val_146 146 val_146 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +152 val_152 +152 val_152 153 val_153 +155 val_155 +156 val_156 157 val_157 +158 val_158 160 val_160 +162 val_162 +163 val_163 164 val_164 164 val_164 164 val_164 164 val_164 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +174 val_174 +174 val_174 175 val_175 175 val_175 175 val_175 175 val_175 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +177 val_177 +178 val_178 179 val_179 179 val_179 179 val_179 @@ -160,7 +311,27 @@ POSTHOOK: Input: default@dest_j1 18 val_18 18 val_18 18 val_18 +180 val_180 +181 val_181 +183 val_183 186 val_186 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +192 val_192 193 val_193 193 val_193 193 val_193 @@ -170,27 +341,95 @@ POSTHOOK: Input: default@dest_j1 193 val_193 193 val_193 193 val_193 +194 val_194 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +196 val_196 197 val_197 197 val_197 197 val_197 197 val_197 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +200 val_200 +200 val_200 201 val_201 +202 val_202 +203 val_203 +203 val_203 +203 val_203 +203 val_203 205 val_205 205 val_205 205 val_205 205 val_205 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 209 val_209 209 val_209 209 val_209 209 val_209 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +214 val_214 216 val_216 216 val_216 216 val_216 216 val_216 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +222 val_222 223 val_223 223 val_223 223 val_223 223 val_223 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +229 val_229 +229 val_229 230 val_230 230 val_230 230 val_230 @@ -216,32 +455,144 @@ POSTHOOK: Input: default@dest_j1 230 val_230 230 val_230 230 val_230 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +237 val_237 +237 val_237 238 val_238 238 val_238 238 val_238 238 val_238 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +24 val_24 +24 val_24 241 val_241 -249 val_249 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 252 val_252 +255 val_255 +255 val_255 +255 val_255 +255 val_255 256 val_256 256 val_256 256 val_256 256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +260 val_260 +262 val_262 263 val_263 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 278 val_278 278 val_278 278 val_278 278 val_278 +28 val_28 +280 val_280 +280 val_280 +280 val_280 +280 val_280 281 val_281 281 val_281 281 val_281 281 val_281 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +283 val_283 +284 val_284 285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +288 val_288 +288 val_288 289 val_289 +291 val_291 292 val_292 296 val_296 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +307 val_307 +307 val_307 308 val_308 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +310 val_310 311 val_311 311 val_311 311 val_311 @@ -252,18 +603,75 @@ POSTHOOK: Input: default@dest_j1 311 val_311 311 val_311 315 val_315 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +321 val_321 +321 val_321 322 val_322 322 val_322 322 val_322 322 val_322 +323 val_323 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +332 val_332 333 val_333 333 val_333 333 val_333 333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +342 val_342 +342 val_342 344 val_344 344 val_344 344 val_344 344 val_344 +345 val_345 348 val_348 348 val_348 348 val_348 @@ -289,11 +697,54 @@ POSTHOOK: Input: default@dest_j1 348 val_348 348 val_348 348 val_348 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 351 val_351 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +356 val_356 +360 val_360 362 val_362 +364 val_364 +365 val_365 366 val_366 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +37 val_37 +37 val_37 373 val_373 +374 val_374 +375 val_375 377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +382 val_382 +382 val_382 384 val_384 384 val_384 384 val_384 @@ -303,14 +754,60 @@ POSTHOOK: Input: default@dest_j1 384 val_384 384 val_384 384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 395 val_395 395 val_395 395 val_395 395 val_395 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +397 val_397 +397 val_397 399 val_399 399 val_399 399 val_399 399 val_399 +4 val_4 +400 val_400 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +402 val_402 403 val_403 403 val_403 403 val_403 @@ -320,21 +817,115 @@ POSTHOOK: Input: default@dest_j1 403 val_403 403 val_403 403 val_403 -407 val_407 -414 val_414 -414 val_414 -414 val_414 -414 val_414 -418 val_418 -421 val_421 -429 val_429 -429 val_429 -429 val_429 -429 val_429 -43 val_43 -432 val_432 -436 val_436 +404 val_404 +404 val_404 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +407 val_407 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +41 val_41 +411 val_411 +413 val_413 +413 val_413 +413 val_413 +413 val_413 +414 val_414 +414 val_414 +414 val_414 +414 val_414 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +418 val_418 +419 val_419 +42 val_42 +42 val_42 +42 val_42 +42 val_42 +421 val_421 +424 val_424 +424 val_424 +424 val_424 +424 val_424 +427 val_427 +429 val_429 +429 val_429 +429 val_429 +429 val_429 +43 val_43 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +432 val_432 +435 val_435 +436 val_436 +437 val_437 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +439 val_439 +439 val_439 +439 val_439 +439 val_439 +44 val_44 443 val_443 +444 val_444 +446 val_446 +448 val_448 +449 val_449 +452 val_452 +453 val_453 454 val_454 454 val_454 454 val_454 @@ -344,10 +935,51 @@ POSTHOOK: Input: default@dest_j1 454 val_454 454 val_454 454 val_454 +455 val_455 +457 val_457 458 val_458 458 val_458 458 val_458 458 val_458 +459 val_459 +459 val_459 +459 val_459 +459 val_459 +460 val_460 +462 val_462 +462 val_462 +462 val_462 +462 val_462 +463 val_463 +463 val_463 +463 val_463 +463 val_463 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +467 val_467 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 469 val_469 469 val_469 469 val_469 @@ -374,11 +1006,57 @@ POSTHOOK: Input: default@dest_j1 469 val_469 469 val_469 47 val_47 +470 val_470 472 val_472 +475 val_475 +477 val_477 +478 val_478 +478 val_478 +478 val_478 +478 val_478 +479 val_479 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +481 val_481 +482 val_482 483 val_483 +484 val_484 +485 val_485 487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 490 val_490 +491 val_491 +492 val_492 +492 val_492 +492 val_492 +492 val_492 +493 val_493 494 val_494 +495 val_495 +496 val_496 +497 val_497 498 val_498 498 val_498 498 val_498 @@ -388,26 +1066,69 @@ POSTHOOK: Input: default@dest_j1 498 val_498 498 val_498 498 val_498 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +51 val_51 +51 val_51 +53 val_53 54 val_54 +57 val_57 58 val_58 58 val_58 58 val_58 58 val_58 +64 val_64 65 val_65 +66 val_66 +67 val_67 +67 val_67 +67 val_67 +67 val_67 69 val_69 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 72 val_72 72 val_72 72 val_72 72 val_72 +74 val_74 76 val_76 76 val_76 76 val_76 76 val_76 +77 val_77 +78 val_78 +8 val_8 +80 val_80 +82 val_82 83 val_83 83 val_83 83 val_83 83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +86 val_86 87 val_87 +9 val_9 90 val_90 90 val_90 90 val_90 @@ -417,731 +1138,17 @@ POSTHOOK: Input: default@dest_j1 90 val_90 90 val_90 90 val_90 +92 val_92 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +97 val_97 +97 val_97 98 val_98 98 val_98 98 val_98 98 val_98 -100 val_100 -100 val_100 -100 val_100 -100 val_100 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -111 val_111 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -12 val_12 -12 val_12 -126 val_126 -133 val_133 -137 val_137 -137 val_137 -137 val_137 -137 val_137 -155 val_155 -162 val_162 -166 val_166 -177 val_177 -180 val_180 -191 val_191 -191 val_191 -191 val_191 -191 val_191 -195 val_195 -195 val_195 -195 val_195 -195 val_195 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -199 val_199 -203 val_203 -203 val_203 -203 val_203 -203 val_203 -207 val_207 -207 val_207 -207 val_207 -207 val_207 -214 val_214 -218 val_218 -221 val_221 -221 val_221 -221 val_221 -221 val_221 -229 val_229 -229 val_229 -229 val_229 -229 val_229 -247 val_247 -258 val_258 -265 val_265 -265 val_265 -265 val_265 -265 val_265 -27 val_27 -272 val_272 -272 val_272 -272 val_272 -272 val_272 -283 val_283 -287 val_287 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -298 val_298 -30 val_30 -302 val_302 -306 val_306 -317 val_317 -317 val_317 -317 val_317 -317 val_317 -331 val_331 -331 val_331 -331 val_331 -331 val_331 -335 val_335 -339 val_339 -34 val_34 -342 val_342 -342 val_342 -342 val_342 -342 val_342 -353 val_353 -353 val_353 -353 val_353 -353 val_353 -360 val_360 -364 val_364 -368 val_368 -375 val_375 -379 val_379 -382 val_382 -382 val_382 -382 val_382 -382 val_382 -386 val_386 -393 val_393 -397 val_397 -397 val_397 -397 val_397 -397 val_397 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -409 val_409 -41 val_41 -427 val_427 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -430 val_430 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -438 val_438 -449 val_449 -452 val_452 -463 val_463 -463 val_463 -463 val_463 -463 val_463 -467 val_467 -470 val_470 -478 val_478 -478 val_478 -478 val_478 -478 val_478 -481 val_481 -485 val_485 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -492 val_492 -492 val_492 -492 val_492 -492 val_492 -496 val_496 -5 val_5 -5 val_5 -5 val_5 -5 val_5 -5 val_5 -5 val_5 -5 val_5 -5 val_5 -5 val_5 -67 val_67 -67 val_67 -67 val_67 -67 val_67 -70 val_70 -70 val_70 -70 val_70 -70 val_70 -70 val_70 -70 val_70 -70 val_70 -70 val_70 -70 val_70 -74 val_74 -78 val_78 -85 val_85 -9 val_9 -92 val_92 -96 val_96 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -103 val_103 -103 val_103 -103 val_103 -103 val_103 -11 val_11 -114 val_114 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -125 val_125 -125 val_125 -125 val_125 -125 val_125 -129 val_129 -129 val_129 -129 val_129 -129 val_129 -136 val_136 -143 val_143 -15 val_15 -15 val_15 -15 val_15 -15 val_15 -150 val_150 -158 val_158 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -172 val_172 -172 val_172 -172 val_172 -172 val_172 -176 val_176 -176 val_176 -176 val_176 -176 val_176 -183 val_183 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -187 val_187 -19 val_19 -190 val_190 -194 val_194 -202 val_202 -213 val_213 -213 val_213 -213 val_213 -213 val_213 -217 val_217 -217 val_217 -217 val_217 -217 val_217 -224 val_224 -224 val_224 -224 val_224 -224 val_224 -228 val_228 -235 val_235 -239 val_239 -239 val_239 -239 val_239 -239 val_239 -242 val_242 -242 val_242 -242 val_242 -242 val_242 -257 val_257 -26 val_26 -26 val_26 -26 val_26 -26 val_26 -260 val_260 -275 val_275 -282 val_282 -282 val_282 -282 val_282 -282 val_282 -286 val_286 -305 val_305 -309 val_309 -309 val_309 -309 val_309 -309 val_309 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -316 val_316 -323 val_323 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -327 val_327 -33 val_33 -338 val_338 -341 val_341 -345 val_345 -356 val_356 -367 val_367 -367 val_367 -367 val_367 -367 val_367 -37 val_37 -37 val_37 -37 val_37 -37 val_37 -374 val_374 -378 val_378 -389 val_389 -392 val_392 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -396 val_396 -4 val_4 -400 val_400 -404 val_404 -404 val_404 -404 val_404 -404 val_404 -411 val_411 -419 val_419 -437 val_437 -44 val_44 -444 val_444 -448 val_448 -455 val_455 -459 val_459 -459 val_459 -459 val_459 -459 val_459 -462 val_462 -462 val_462 -462 val_462 -462 val_462 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -466 val_466 -477 val_477 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -480 val_480 -484 val_484 -491 val_491 -495 val_495 -51 val_51 -51 val_51 -51 val_51 -51 val_51 -66 val_66 -77 val_77 -8 val_8 -80 val_80 -84 val_84 -84 val_84 -84 val_84 -84 val_84 -95 val_95 -95 val_95 -95 val_95 -95 val_95 -105 val_105 -116 val_116 -134 val_134 -134 val_134 -134 val_134 -134 val_134 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -145 val_145 -149 val_149 -149 val_149 -149 val_149 -149 val_149 -152 val_152 -152 val_152 -152 val_152 -152 val_152 -156 val_156 -163 val_163 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -167 val_167 -17 val_17 -170 val_170 -174 val_174 -174 val_174 -174 val_174 -174 val_174 -178 val_178 -181 val_181 -189 val_189 -192 val_192 -196 val_196 -2 val_2 -20 val_20 -200 val_200 -200 val_200 -200 val_200 -200 val_200 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -208 val_208 -219 val_219 -219 val_219 -219 val_219 -219 val_219 -222 val_222 -226 val_226 -233 val_233 -233 val_233 -233 val_233 -233 val_233 -237 val_237 -237 val_237 -237 val_237 -237 val_237 -24 val_24 -24 val_24 -24 val_24 -24 val_24 -244 val_244 -248 val_248 -255 val_255 -255 val_255 -255 val_255 -255 val_255 -262 val_262 -266 val_266 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -28 val_28 -280 val_280 -280 val_280 -280 val_280 -280 val_280 -284 val_284 -288 val_288 -288 val_288 -288 val_288 -288 val_288 -291 val_291 -307 val_307 -307 val_307 -307 val_307 -307 val_307 -310 val_310 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -318 val_318 -321 val_321 -321 val_321 -321 val_321 -321 val_321 -325 val_325 -325 val_325 -325 val_325 -325 val_325 -332 val_332 -336 val_336 -35 val_35 -35 val_35 -35 val_35 -35 val_35 -35 val_35 -35 val_35 -35 val_35 -35 val_35 -35 val_35 -365 val_365 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -394 val_394 -402 val_402 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -413 val_413 -413 val_413 -413 val_413 -413 val_413 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -417 val_417 -42 val_42 -42 val_42 -42 val_42 -42 val_42 -424 val_424 -424 val_424 -424 val_424 -424 val_424 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -431 val_431 -435 val_435 -439 val_439 -439 val_439 -439 val_439 -439 val_439 -446 val_446 -453 val_453 -457 val_457 -460 val_460 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -475 val_475 -479 val_479 -482 val_482 -493 val_493 -497 val_497 -53 val_53 -57 val_57 -64 val_64 -82 val_82 -86 val_86 -97 val_97 -97 val_97 -97 val_97 -97 val_97 diff --git ql/src/test/results/clientpositive/spark/parquet_join.q.out ql/src/test/results/clientpositive/spark/parquet_join.q.out index 4b9006f..4634333 100644 --- ql/src/test/results/clientpositive/spark/parquet_join.q.out +++ ql/src/test/results/clientpositive/spark/parquet_join.q.out @@ -65,14 +65,35 @@ POSTHOOK: query: -- MR join explain select p2.myvalue from parquet_jointable1 p1 join parquet_jointable2 p2 on p1.key=p2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {myvalue} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -83,46 +104,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: myvalue (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {myvalue} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col7 + input vertices: + 0 Map 2 Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {VALUE._col1} - outputColumnNames: _col7 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col7 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col7 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -159,14 +166,35 @@ POSTHOOK: query: -- The two tables involved in the join have differing number of explain select p2.myvalue from parquet_jointable1 p1 join parquet_jointable2 p2 on p1.key=p2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {myvalue} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -177,46 +205,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: myvalue (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {myvalue} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col7 + input vertices: + 0 Map 2 Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {VALUE._col1} - outputColumnNames: _col7 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col7 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col7 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/pcr.q.out ql/src/test/results/clientpositive/spark/pcr.q.out index 86dc015..d3523cf 100644 --- ql/src/test/results/clientpositive/spark/pcr.q.out +++ ql/src/test/results/clientpositive/spark/pcr.q.out @@ -2746,15 +2746,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2767,14 +2765,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2825,7 +2825,14 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [t2] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: t1 @@ -2835,14 +2842,33 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6, _col7 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2893,28 +2919,6 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [t1] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -3054,15 +3058,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3075,14 +3077,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3133,7 +3137,14 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-09 [t2] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: t1 @@ -3143,14 +3154,33 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6, _col7 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3201,28 +3231,6 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [t1] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out index 1a78199..67b4c1a 100644 --- ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out +++ ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out @@ -19,15 +19,44 @@ WHERE src1.c1 > '20' AND (src1.c2 < 'val_50' OR src1.c1 > '2') AND (src2.c3 > '5 GROUP BY src1.c1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 < '400') (type: boolean) + Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {_col0} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -45,63 +74,42 @@ STAGE PLANS: Filter Operator predicate: (_col0 < '400') (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 3 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) @@ -147,15 +155,41 @@ WHERE src1.c1 > '20' AND (src1.c2 < 'val_50' OR src1.c1 > '2') AND (src2.c3 > '5 GROUP BY src1.c1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {_col0} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -170,60 +204,42 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 3 + Statistics: Num rows: 19 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 19 Data size: 210 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/ppd_join.q.out ql/src/test/results/clientpositive/spark/ppd_join.q.out index 4ebdd46..6503fb1 100644 --- ql/src/test/results/clientpositive/spark/ppd_join.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join.q.out @@ -21,82 +21,88 @@ ON src1.c1 = src2.c3 AND src1.c1 < '400' WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 3 + Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 2 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -561,76 +567,82 @@ ON src1.c1 = src2.c3 AND src1.c1 < '400' WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 3 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 19 Data size: 210 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 2 + Statistics: Num rows: 19 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_join2.q.out ql/src/test/results/clientpositive/spark/ppd_join2.q.out index 219ebd2..dba30b5 100644 --- ql/src/test/results/clientpositive/spark/ppd_join2.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join2.q.out @@ -27,60 +27,91 @@ ON src1.c2 = src3.c6 WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key <> '305') and (key < '400')) and (key <> '14')) and (key <> '311')) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key <> '302') and (key < '400')) and value is not null) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) (type: boolean) + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 4 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key <> '302') and (key < '400')) and value is not null) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) (type: boolean) - Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + predicate: ((((key <> '305') and (key < '400')) and (key <> '14')) and (key <> '311')) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 5 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 2 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col2} {_col3} + 1 {_col0} + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: src @@ -95,55 +126,35 @@ STAGE PLANS: Filter Operator predicate: _col1 is not null (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((((_col0 <> '311') and ((_col1 <> 'val_50') or (_col0 > '1'))) and ((_col2 <> '10') or (_col0 <> '10'))) and (_col2 <> '14')) and (sqrt(_col4) <> 13)) (type: boolean) - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} + 1 {_col0} + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 0 Map 1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((_col0 <> '311') and ((_col1 <> 'val_50') or (_col0 > '1'))) and ((_col2 <> '10') or (_col0 <> '10'))) and (_col2 <> '14')) and (sqrt(_col4) <> 13)) (type: boolean) + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -1720,54 +1731,82 @@ ON src1.c2 = src3.c6 WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key <> '305') and (key < '400')) and (key <> '14')) and (key <> '311')) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key <> '302') and (key < '400')) and value is not null) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) (type: boolean) + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 4 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key <> '302') and (key < '400')) and value is not null) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) (type: boolean) - Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + predicate: ((((key <> '305') and (key < '400')) and (key <> '14')) and (key <> '311')) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 5 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col2} {_col3} + 1 {_col0} + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: src @@ -1779,52 +1818,35 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((((_col0 <> '311') and ((_col1 <> 'val_50') or (_col0 > '1'))) and ((_col2 <> '10') or (_col0 <> '10'))) and (_col2 <> '14')) and (sqrt(_col4) <> 13)) (type: boolean) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} + 1 {_col0} + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 0 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((_col0 <> '311') and ((_col1 <> 'val_50') or (_col0 > '1'))) and ((_col2 <> '10') or (_col0 <> '10'))) and (_col2 <> '14')) and (sqrt(_col4) <> 13)) (type: boolean) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_join3.q.out ql/src/test/results/clientpositive/spark/ppd_join3.q.out index 31addae..8797942 100644 --- ql/src/test/results/clientpositive/spark/ppd_join3.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join3.q.out @@ -27,14 +27,13 @@ ON src1.c1 = src3.c5 WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -52,79 +51,96 @@ STAGE PLANS: Filter Operator predicate: (_col0 < '400') (type: boolean) Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + 2 {_col0} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + predicate: (((((key <> '13') and (key < '400')) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 4 + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key <> '13') and (key < '400')) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 52 Data size: 558 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((((_col0 > '0') and ((_col1 <> 'val_500') or (_col0 > '1'))) and ((_col2 > '10') or (_col0 <> '10'))) and (_col2 <> '4')) and (_col4 <> '1')) (type: boolean) - Statistics: Num rows: 29 Data size: 311 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 311 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 311 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + 2 {_col0} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 1 + 2 Map 3 + Statistics: Num rows: 52 Data size: 558 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((_col0 > '0') and ((_col1 <> 'val_500') or (_col0 > '1'))) and ((_col2 > '10') or (_col0 <> '10'))) and (_col2 <> '4')) and (_col4 <> '1')) (type: boolean) + Statistics: Num rows: 29 Data size: 311 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 311 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 311 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -1758,14 +1774,13 @@ ON src1.c1 = src3.c5 WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1780,73 +1795,90 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + 2 {_col0} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + predicate: (((((key <> '13') and (key < '400')) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 4 + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key <> '13') and (key < '400')) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 160 Data size: 1705 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((((_col0 > '0') and ((_col1 <> 'val_500') or (_col0 > '1'))) and ((_col2 > '10') or (_col0 <> '10'))) and (_col2 <> '4')) and (_col4 <> '1')) (type: boolean) - Statistics: Num rows: 93 Data size: 991 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 93 Data size: 991 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 93 Data size: 991 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + 2 {_col0} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 1 + 2 Map 3 + Statistics: Num rows: 160 Data size: 1705 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((_col0 > '0') and ((_col1 <> 'val_500') or (_col0 > '1'))) and ((_col2 > '10') or (_col0 <> '10'))) and (_col2 <> '4')) and (_col4 <> '1')) (type: boolean) + Statistics: Num rows: 93 Data size: 991 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 93 Data size: 991 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 93 Data size: 991 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_join4.q.out ql/src/test/results/clientpositive/spark/ppd_join4.q.out index 741f5cf..18b6b2c 100644 --- ql/src/test/results/clientpositive/spark/ppd_join4.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join4.q.out @@ -45,15 +45,13 @@ join test_tbl t3 on (t2.id=t3.id ) where t2.name='c' and t3.id='a' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -64,11 +62,23 @@ STAGE PLANS: Filter Operator predicate: (id = 'a') (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: 'a' (type: string) - sort order: + - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 'a' (type: string) + 1 'a' (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: test_tbl @@ -82,34 +92,35 @@ STAGE PLANS: key expressions: 'a' (type: string) sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: 'a' (type: string), 'c' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Select Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: 'a' (type: string) - sort order: + + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 'a' (type: string) + 1 'a' (type: string) + input vertices: + 1 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 'a' (type: string), 'c' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_join5.q.out ql/src/test/results/clientpositive/spark/ppd_join5.q.out index 54b6b34..18b3ff6 100644 --- ql/src/test/results/clientpositive/spark/ppd_join5.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join5.q.out @@ -43,15 +43,13 @@ select a.*,b.d d1,c.d d2 from join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -62,13 +60,16 @@ STAGE PLANS: Filter Operator predicate: (id is not null and (d <= 1)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: id (type: string), id (type: string) - sort order: ++ - Map-reduce partition columns: id (type: string), id (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: d (type: int) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {id1} {id2} + 1 {d} + keys: + 0 id1 (type: string), id2 (type: string) + 1 id (type: string), id (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c @@ -76,11 +77,21 @@ STAGE PLANS: Filter Operator predicate: (d <= 1) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: d (type: int) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col6} + 1 {d} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a @@ -88,46 +99,45 @@ STAGE PLANS: Filter Operator predicate: (id1 is not null and id2 is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: id1 (type: string), id2 (type: string) - sort order: ++ - Map-reduce partition columns: id1 (type: string), id2 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {id1} {id2} + 1 {d} + keys: + 0 id1 (type: string), id2 (type: string) + 1 id (type: string), id (type: string) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 1 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col6} - 1 {VALUE._col1} - outputColumnNames: _col0, _col1, _col6, _col11 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col6} + 1 {d} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col6, _col11 + input vertices: + 1 Map 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -150,31 +160,16 @@ select a.*,b.d d1,c.d d2 from ) z where d1 > 1 or d2 > 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 4 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (id1 is not null and id2 is not null) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: id1 (type: string), id2 (type: string) - sort order: ++ - Map-reduce partition columns: id1 (type: string), id2 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 4 + Map 2 Map Operator Tree: TableScan alias: b @@ -182,13 +177,16 @@ STAGE PLANS: Filter Operator predicate: (id is not null and (d <= 1)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: id (type: string), id (type: string) - sort order: ++ - Map-reduce partition columns: id (type: string), id (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: d (type: int) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {id1} {id2} + 1 {d} + keys: + 0 id1 (type: string), id2 (type: string) + 1 id (type: string), id (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: c @@ -196,48 +194,70 @@ STAGE PLANS: Filter Operator predicate: (d <= 1) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: d (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col6} - 1 {VALUE._col1} - outputColumnNames: _col0, _col1, _col6, _col11 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: ((_col6 > 1) or (_col11 > 1)) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col6} + 1 {d} + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 4 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (id1 is not null and id2 is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {id1} {id2} + 1 {d} + keys: + 0 id1 (type: string), id2 (type: string) + 1 id (type: string), id (type: string) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col6} + 1 {d} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col6, _col11 + input vertices: + 1 Map 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: ((_col6 > 1) or (_col11 > 1)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out index 6d7cc55..eef13b9 100644 --- ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out @@ -120,34 +120,45 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: a + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Group By Operator + aggregations: min(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -196,35 +207,90 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [a] - Map 3 + /src [src] + Reducer 3 + Local Work: + Map Reduce Local Work + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) + Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {_col3} {_col4} + keys: + 0 key (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: src + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {_col3} {_col4} + keys: + 0 key (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col8, _col9 + input vertices: + 1 Reducer 3 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -273,68 +339,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [src] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col2} {VALUE._col3} - outputColumnNames: _col0, _col8, _col9 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:double:double - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) - Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false + /src [a] Stage: Stage-0 Fetch Operator @@ -498,34 +503,45 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: a + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Group By Operator + aggregations: min(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -574,35 +590,90 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [a] - Map 3 + /src [src] + Reducer 3 + Local Work: + Map Reduce Local Work + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (_col2 < 5.0) (type: boolean) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {_col3} {_col4} + keys: + 0 key (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: src + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {_col3} {_col4} + keys: + 0 key (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col8, _col9 + input vertices: + 1 Reducer 3 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -651,68 +722,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [src] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col2} {VALUE._col3} - outputColumnNames: _col0, _col8, _col9 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:double:double - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: (_col2 < 5.0) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false + /src [a] Stage: Stage-0 Fetch Operator @@ -876,34 +886,41 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: a + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -952,31 +969,90 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [a] - Map 3 + /src [src] + Reducer 3 + Local Work: + Map Reduce Local Work + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) + Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {_col3} {_col4} + keys: + 0 key (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: src + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {_col3} {_col4} + keys: + 0 key (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col8, _col9 + input vertices: + 1 Reducer 3 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1025,68 +1101,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [src] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col2} {VALUE._col3} - outputColumnNames: _col0, _col8, _col9 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:double:double - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) - Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false + /src [a] Stage: Stage-0 Fetch Operator @@ -1250,34 +1265,45 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: a + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Group By Operator + aggregations: min(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1326,35 +1352,90 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [a] - Map 3 + /src [src] + Reducer 3 + Local Work: + Map Reduce Local Work + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (_col2 < 5.0) (type: boolean) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {_col3} {_col4} + keys: + 0 key (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: src + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {_col3} {_col4} + keys: + 0 key (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col8, _col9 + input vertices: + 1 Reducer 3 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1403,68 +1484,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [src] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col2} {VALUE._col3} - outputColumnNames: _col0, _col8, _col9 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:double:double - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: (_col2 < 5.0) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false + /src [a] Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out index 6b952f9..538ebf7 100644 --- ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out +++ ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out @@ -1,8 +1,12 @@ -PREHOOK: query: CREATE TABLE mi1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE mi1(key INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@mi1 -POSTHOOK: query: CREATE TABLE mi1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE mi1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@mi1 @@ -37,7 +41,8 @@ INSERT OVERWRITE TABLE mi3 PARTITION(ds='2008-04-08', hr='12') SELECT a.key WHER INSERT OVERWRITE DIRECTORY 'target/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage + Stage-9 is a root stage + Stage-4 depends on stages: Stage-9 Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 @@ -48,10 +53,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-9 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -62,12 +65,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -75,81 +87,80 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 < 100) (type: boolean) - Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.mi1 - Filter Operator - predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) - Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.mi2 - Filter Operator - predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) - Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.mi3 - Filter Operator - predicate: (_col0 >= 300) (type: boolean) - Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 < 100) (type: boolean) + Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.mi1 + Filter Operator + predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.mi2 + Filter Operator + predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.mi3 + Filter Operator + predicate: (_col0 >= 300) (type: boolean) + Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-5 Dependency Collection @@ -821,468 +832,468 @@ POSTHOOK: Input: default@mi3@ds=2008-04-08/hr=12 298 2008-04-08 12 298 2008-04-08 12 298 2008-04-08 12 -val_302 -val_305 -val_306 -val_307 -val_307 -val_307 -val_307 -val_308 -val_309 -val_309 -val_309 -val_309 -val_310 -val_311 -val_311 -val_311 val_311 val_311 val_311 +val_409 +val_409 +val_409 +val_484 +val_401 +val_401 +val_401 +val_401 +val_401 +val_369 +val_369 +val_369 +val_406 +val_406 +val_406 +val_406 +val_429 +val_429 +val_374 +val_469 +val_469 +val_469 +val_469 +val_469 +val_495 +val_327 +val_327 +val_327 +val_403 +val_403 +val_403 +val_417 +val_417 +val_417 +val_430 +val_430 +val_430 +val_338 +val_446 +val_459 +val_459 +val_394 +val_482 +val_413 +val_413 +val_494 +val_466 +val_466 +val_466 +val_399 +val_399 +val_396 +val_396 +val_396 +val_417 +val_417 +val_417 +val_489 +val_489 +val_489 +val_489 +val_377 +val_397 +val_397 +val_309 +val_309 +val_365 +val_439 +val_439 +val_342 +val_342 +val_367 +val_367 +val_325 +val_325 +val_475 +val_339 +val_455 val_311 val_311 val_311 -val_315 -val_316 val_316 val_316 val_316 -val_316 -val_316 -val_316 -val_316 -val_316 -val_317 -val_317 -val_317 -val_317 -val_318 -val_318 -val_318 -val_318 -val_318 -val_318 +val_302 +val_438 +val_438 +val_438 +val_345 +val_489 +val_489 +val_489 +val_489 +val_378 +val_427 +val_356 +val_399 +val_399 +val_382 +val_382 +val_498 +val_498 +val_498 +val_386 +val_437 +val_469 +val_469 +val_469 +val_469 +val_469 +val_459 +val_459 +val_430 +val_430 +val_430 val_318 val_318 val_318 -val_321 -val_321 -val_321 -val_321 -val_322 -val_322 -val_322 -val_322 -val_323 -val_325 -val_325 -val_325 -val_325 -val_327 -val_327 -val_327 -val_327 -val_327 -val_327 -val_327 -val_327 -val_327 -val_331 -val_331 -val_331 -val_331 val_332 +val_311 +val_311 +val_311 val_333 val_333 -val_333 -val_333 -val_335 -val_336 -val_338 -val_339 -val_341 -val_342 -val_342 -val_342 -val_342 -val_344 -val_344 -val_344 -val_344 -val_345 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 +val_404 +val_404 +val_384 +val_384 +val_384 +val_489 +val_489 +val_489 +val_489 +val_353 +val_353 +val_373 val_348 val_348 val_348 val_348 val_348 +val_466 +val_466 +val_466 +val_411 val_348 val_348 val_348 val_348 val_348 -val_351 -val_353 -val_353 -val_353 -val_353 -val_356 -val_360 -val_362 -val_364 -val_365 -val_366 -val_367 -val_367 -val_367 -val_367 -val_368 -val_369 -val_369 -val_369 -val_369 -val_369 -val_369 -val_369 -val_369 -val_369 -val_373 -val_374 -val_375 -val_377 -val_378 -val_379 -val_382 -val_382 -val_382 -val_382 -val_384 -val_384 -val_384 -val_384 -val_384 -val_384 -val_384 -val_384 -val_384 -val_386 -val_389 -val_392 +val_463 +val_463 +val_431 +val_431 +val_431 +val_496 +val_322 +val_322 +val_468 +val_468 +val_468 +val_468 val_393 -val_394 -val_395 -val_395 -val_395 -val_395 -val_396 -val_396 -val_396 -val_396 -val_396 -val_396 -val_396 -val_396 -val_396 -val_397 -val_397 -val_397 -val_397 -val_399 -val_399 -val_399 -val_399 -val_400 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_402 -val_403 -val_403 -val_403 -val_403 -val_403 -val_403 -val_403 -val_403 -val_403 -val_404 -val_404 +val_454 +val_454 +val_454 +val_418 +val_327 +val_327 +val_327 val_404 val_404 -val_406 -val_406 -val_406 -val_406 -val_406 -val_406 -val_406 -val_406 -val_406 -val_406 -val_406 -val_406 -val_406 -val_406 -val_406 -val_406 -val_407 -val_409 -val_409 -val_409 -val_409 -val_409 -val_409 +val_436 +val_469 +val_469 +val_469 +val_469 +val_469 +val_468 +val_468 +val_468 +val_468 +val_308 +val_481 +val_457 +val_318 +val_318 +val_318 +val_318 +val_318 +val_318 val_409 val_409 val_409 -val_411 -val_413 -val_413 +val_470 +val_369 +val_369 +val_369 +val_316 +val_316 +val_316 val_413 val_413 -val_414 -val_414 -val_414 -val_414 -val_417 -val_417 -val_417 -val_417 -val_417 -val_417 -val_417 -val_417 -val_417 -val_418 +val_490 +val_364 +val_395 +val_395 val_419 -val_421 -val_424 -val_424 -val_424 -val_424 -val_427 -val_429 -val_429 -val_429 -val_429 -val_430 -val_430 -val_430 -val_430 -val_430 -val_430 +val_307 +val_307 +val_435 +val_306 +val_309 +val_309 +val_389 +val_327 +val_327 +val_327 +val_369 +val_369 +val_369 +val_392 +val_331 +val_331 +val_401 +val_401 +val_401 +val_401 +val_401 +val_452 +val_497 +val_402 +val_396 +val_396 +val_396 +val_317 +val_317 +val_395 +val_395 +val_336 +val_472 +val_322 +val_322 +val_498 +val_498 +val_498 +val_321 +val_321 val_430 val_430 val_430 -val_431 -val_431 -val_431 -val_431 -val_431 -val_431 -val_431 -val_431 -val_431 -val_432 -val_435 -val_436 -val_437 -val_438 -val_438 -val_438 -val_438 -val_438 -val_438 -val_438 -val_438 -val_438 -val_439 -val_439 -val_439 -val_439 -val_443 -val_444 -val_446 -val_448 -val_449 -val_452 -val_453 -val_454 -val_454 -val_454 -val_454 -val_454 -val_454 -val_454 -val_454 -val_454 -val_455 -val_457 -val_458 -val_458 +val_489 +val_489 +val_489 +val_489 val_458 val_458 -val_459 -val_459 -val_459 -val_459 -val_460 -val_462 -val_462 -val_462 -val_462 -val_463 -val_463 -val_463 -val_463 -val_466 -val_466 -val_466 -val_466 -val_466 -val_466 -val_466 -val_466 -val_466 -val_467 -val_468 -val_468 -val_468 -val_468 -val_468 -val_468 -val_468 -val_468 -val_468 -val_468 -val_468 -val_468 +val_492 +val_492 +val_449 +val_453 val_468 val_468 val_468 val_468 +val_342 +val_342 +val_368 +val_367 +val_367 +val_344 +val_344 +val_485 +val_487 +val_480 +val_480 +val_480 +val_401 +val_401 +val_401 +val_401 +val_401 +val_438 +val_438 +val_438 +val_467 +val_432 +val_316 +val_316 +val_316 val_469 val_469 val_469 val_469 val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_470 -val_472 -val_475 -val_477 -val_478 -val_478 +val_463 +val_463 +val_331 +val_331 +val_321 +val_321 +val_335 +val_466 +val_466 +val_466 +val_366 +val_403 +val_403 +val_403 +val_483 +val_406 +val_406 +val_406 +val_406 +val_409 +val_409 +val_409 +val_406 +val_406 +val_406 +val_406 +val_401 +val_401 +val_401 +val_401 +val_401 +val_348 +val_348 +val_348 +val_348 +val_348 +val_424 +val_424 +val_396 +val_396 +val_396 +val_431 +val_431 +val_431 +val_454 +val_454 +val_454 val_478 val_478 -val_479 -val_480 -val_480 -val_480 +val_431 +val_431 +val_431 +val_424 +val_424 +val_382 +val_382 +val_397 +val_397 val_480 val_480 val_480 +val_351 +val_438 +val_438 +val_438 +val_414 +val_414 +val_491 +val_439 +val_439 +val_360 +val_479 +val_305 +val_417 +val_417 +val_417 +val_444 +val_429 +val_429 +val_443 +val_323 +val_325 +val_325 +val_478 +val_478 +val_468 +val_468 +val_468 +val_468 +val_310 +val_317 +val_317 +val_333 +val_333 +val_493 +val_460 val_480 val_480 val_480 -val_481 -val_482 -val_483 -val_484 -val_485 -val_487 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_490 -val_491 -val_492 -val_492 +val_353 +val_353 +val_462 +val_462 +val_406 +val_406 +val_406 +val_406 +val_454 +val_454 +val_454 +val_375 +val_401 +val_401 +val_401 +val_401 +val_401 +val_421 +val_407 +val_384 +val_384 +val_384 +val_384 +val_384 +val_384 +val_379 +val_462 +val_462 val_492 val_492 -val_493 -val_494 -val_495 -val_496 -val_497 -val_498 -val_498 -val_498 -val_498 -val_498 -val_498 +val_341 val_498 val_498 val_498 +val_458 +val_458 +val_362 +val_348 +val_348 +val_348 +val_348 +val_348 +val_344 +val_344 +val_469 +val_469 +val_469 +val_469 +val_469 +val_315 +val_448 +val_348 +val_348 +val_348 +val_348 +val_348 +val_307 +val_307 +val_414 +val_414 +val_477 +val_403 +val_403 +val_403 +val_400 PREHOOK: query: EXPLAIN FROM src a JOIN src b ON (a.key = b.key) INSERT OVERWRITE TABLE mi1 SELECT a.* WHERE a.key < 100 @@ -1298,7 +1309,8 @@ INSERT OVERWRITE TABLE mi3 PARTITION(ds='2008-04-08', hr='12') SELECT a.key WHER INSERT OVERWRITE DIRECTORY 'target/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage + Stage-9 is a root stage + Stage-4 depends on stages: Stage-9 Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 @@ -1309,10 +1321,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-9 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1323,12 +1333,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -1336,81 +1355,80 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 < 100) (type: boolean) - Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.mi1 - Filter Operator - predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) - Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.mi2 - Filter Operator - predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) - Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.mi3 - Filter Operator - predicate: (_col0 >= 300) (type: boolean) - Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 < 100) (type: boolean) + Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.mi1 + Filter Operator + predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.mi2 + Filter Operator + predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.mi3 + Filter Operator + predicate: (_col0 >= 300) (type: boolean) + Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 91 Data size: 966 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-5 Dependency Collection @@ -2068,479 +2086,479 @@ POSTHOOK: Input: default@mi3@ds=2008-04-08/hr=12 288 2008-04-08 12 288 2008-04-08 12 288 2008-04-08 12 -288 2008-04-08 12 -289 2008-04-08 12 -291 2008-04-08 12 -292 2008-04-08 12 -296 2008-04-08 12 -298 2008-04-08 12 -298 2008-04-08 12 -298 2008-04-08 12 -298 2008-04-08 12 -298 2008-04-08 12 -298 2008-04-08 12 -298 2008-04-08 12 -298 2008-04-08 12 -298 2008-04-08 12 -val_302 -val_305 -val_306 -val_307 -val_307 -val_307 -val_307 -val_308 -val_309 -val_309 -val_309 -val_309 -val_310 -val_311 -val_311 -val_311 +288 2008-04-08 12 +289 2008-04-08 12 +291 2008-04-08 12 +292 2008-04-08 12 +296 2008-04-08 12 +298 2008-04-08 12 +298 2008-04-08 12 +298 2008-04-08 12 +298 2008-04-08 12 +298 2008-04-08 12 +298 2008-04-08 12 +298 2008-04-08 12 +298 2008-04-08 12 +298 2008-04-08 12 val_311 val_311 val_311 +val_409 +val_409 +val_409 +val_484 +val_401 +val_401 +val_401 +val_401 +val_401 +val_369 +val_369 +val_369 +val_406 +val_406 +val_406 +val_406 +val_429 +val_429 +val_374 +val_469 +val_469 +val_469 +val_469 +val_469 +val_495 +val_327 +val_327 +val_327 +val_403 +val_403 +val_403 +val_417 +val_417 +val_417 +val_430 +val_430 +val_430 +val_338 +val_446 +val_459 +val_459 +val_394 +val_482 +val_413 +val_413 +val_494 +val_466 +val_466 +val_466 +val_399 +val_399 +val_396 +val_396 +val_396 +val_417 +val_417 +val_417 +val_489 +val_489 +val_489 +val_489 +val_377 +val_397 +val_397 +val_309 +val_309 +val_365 +val_439 +val_439 +val_342 +val_342 +val_367 +val_367 +val_325 +val_325 +val_475 +val_339 +val_455 val_311 val_311 val_311 -val_315 -val_316 -val_316 -val_316 val_316 val_316 val_316 -val_316 -val_316 -val_316 -val_317 -val_317 -val_317 -val_317 -val_318 -val_318 -val_318 -val_318 -val_318 -val_318 +val_302 +val_438 +val_438 +val_438 +val_345 +val_489 +val_489 +val_489 +val_489 +val_378 +val_427 +val_356 +val_399 +val_399 +val_382 +val_382 +val_498 +val_498 +val_498 +val_386 +val_437 +val_469 +val_469 +val_469 +val_469 +val_469 +val_459 +val_459 +val_430 +val_430 +val_430 val_318 val_318 val_318 -val_321 -val_321 -val_321 -val_321 -val_322 -val_322 -val_322 -val_322 -val_323 -val_325 -val_325 -val_325 -val_325 -val_327 -val_327 -val_327 -val_327 -val_327 -val_327 -val_327 -val_327 -val_327 -val_331 -val_331 -val_331 -val_331 val_332 +val_311 +val_311 +val_311 val_333 val_333 -val_333 -val_333 -val_335 -val_336 -val_338 -val_339 -val_341 -val_342 -val_342 -val_342 -val_342 -val_344 -val_344 -val_344 -val_344 -val_345 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 -val_348 +val_404 +val_404 +val_384 +val_384 +val_384 +val_489 +val_489 +val_489 +val_489 +val_353 +val_353 +val_373 val_348 val_348 val_348 val_348 val_348 +val_466 +val_466 +val_466 +val_411 val_348 val_348 val_348 val_348 val_348 -val_351 -val_353 -val_353 -val_353 -val_353 -val_356 -val_360 -val_362 -val_364 -val_365 -val_366 -val_367 -val_367 -val_367 -val_367 -val_368 -val_369 -val_369 -val_369 +val_463 +val_463 +val_431 +val_431 +val_431 +val_496 +val_322 +val_322 +val_468 +val_468 +val_468 +val_468 +val_393 +val_454 +val_454 +val_454 +val_418 +val_327 +val_327 +val_327 +val_404 +val_404 +val_436 +val_469 +val_469 +val_469 +val_469 +val_469 +val_468 +val_468 +val_468 +val_468 +val_308 +val_481 +val_457 +val_318 +val_318 +val_318 +val_318 +val_318 +val_318 +val_409 +val_409 +val_409 +val_470 val_369 val_369 val_369 +val_316 +val_316 +val_316 +val_413 +val_413 +val_490 +val_364 +val_395 +val_395 +val_419 +val_307 +val_307 +val_435 +val_306 +val_309 +val_309 +val_389 +val_327 +val_327 +val_327 val_369 val_369 val_369 -val_373 -val_374 -val_375 -val_377 -val_378 -val_379 -val_382 -val_382 -val_382 -val_382 -val_384 -val_384 -val_384 -val_384 -val_384 -val_384 -val_384 -val_384 -val_384 -val_386 -val_389 val_392 -val_393 -val_394 -val_395 -val_395 -val_395 -val_395 -val_396 -val_396 -val_396 -val_396 -val_396 -val_396 -val_396 -val_396 -val_396 -val_397 -val_397 -val_397 -val_397 -val_399 -val_399 -val_399 -val_399 -val_400 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 -val_401 +val_331 +val_331 val_401 val_401 val_401 val_401 val_401 +val_452 +val_497 +val_402 +val_396 +val_396 +val_396 +val_317 +val_317 +val_395 +val_395 +val_336 +val_472 +val_322 +val_322 +val_498 +val_498 +val_498 +val_321 +val_321 +val_430 +val_430 +val_430 +val_489 +val_489 +val_489 +val_489 +val_458 +val_458 +val_492 +val_492 +val_449 +val_453 +val_468 +val_468 +val_468 +val_468 +val_342 +val_342 +val_368 +val_367 +val_367 +val_344 +val_344 +val_485 +val_487 +val_480 +val_480 +val_480 val_401 val_401 val_401 val_401 val_401 -val_402 -val_403 -val_403 -val_403 -val_403 -val_403 -val_403 +val_438 +val_438 +val_438 +val_467 +val_432 +val_316 +val_316 +val_316 +val_469 +val_469 +val_469 +val_469 +val_469 +val_463 +val_463 +val_331 +val_331 +val_321 +val_321 +val_335 +val_466 +val_466 +val_466 +val_366 val_403 val_403 val_403 -val_404 -val_404 -val_404 -val_404 -val_406 -val_406 -val_406 -val_406 -val_406 -val_406 -val_406 -val_406 +val_483 val_406 val_406 val_406 val_406 +val_409 +val_409 +val_409 val_406 val_406 val_406 val_406 -val_407 -val_409 -val_409 -val_409 -val_409 -val_409 -val_409 -val_409 -val_409 -val_409 -val_411 -val_413 -val_413 -val_413 -val_413 -val_414 -val_414 -val_414 -val_414 -val_417 -val_417 -val_417 -val_417 -val_417 -val_417 -val_417 -val_417 -val_417 -val_418 -val_419 -val_421 -val_424 -val_424 -val_424 -val_424 -val_427 -val_429 -val_429 -val_429 -val_429 -val_430 -val_430 -val_430 -val_430 -val_430 -val_430 -val_430 -val_430 -val_430 -val_431 -val_431 -val_431 +val_401 +val_401 +val_401 +val_401 +val_401 +val_348 +val_348 +val_348 +val_348 +val_348 +val_424 +val_424 +val_396 +val_396 +val_396 val_431 val_431 val_431 +val_454 +val_454 +val_454 +val_478 +val_478 val_431 val_431 val_431 -val_432 -val_435 -val_436 -val_437 -val_438 -val_438 -val_438 -val_438 -val_438 -val_438 +val_424 +val_424 +val_382 +val_382 +val_397 +val_397 +val_480 +val_480 +val_480 +val_351 val_438 val_438 val_438 +val_414 +val_414 +val_491 val_439 val_439 -val_439 -val_439 -val_443 +val_360 +val_479 +val_305 +val_417 +val_417 +val_417 val_444 -val_446 -val_448 -val_449 -val_452 -val_453 -val_454 -val_454 -val_454 -val_454 -val_454 -val_454 -val_454 -val_454 -val_454 -val_455 -val_457 -val_458 -val_458 -val_458 -val_458 -val_459 -val_459 -val_459 -val_459 -val_460 -val_462 -val_462 -val_462 -val_462 -val_463 -val_463 -val_463 -val_463 -val_466 -val_466 -val_466 -val_466 -val_466 -val_466 -val_466 -val_466 -val_466 -val_467 -val_468 -val_468 -val_468 -val_468 -val_468 -val_468 -val_468 -val_468 -val_468 -val_468 -val_468 -val_468 +val_429 +val_429 +val_443 +val_323 +val_325 +val_325 +val_478 +val_478 val_468 val_468 val_468 val_468 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_469 -val_470 -val_472 -val_475 -val_477 -val_478 -val_478 -val_478 -val_478 -val_479 -val_480 -val_480 -val_480 -val_480 -val_480 -val_480 +val_310 +val_317 +val_317 +val_333 +val_333 +val_493 +val_460 val_480 val_480 val_480 -val_481 -val_482 -val_483 -val_484 -val_485 -val_487 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_489 -val_490 -val_491 -val_492 -val_492 +val_353 +val_353 +val_462 +val_462 +val_406 +val_406 +val_406 +val_406 +val_454 +val_454 +val_454 +val_375 +val_401 +val_401 +val_401 +val_401 +val_401 +val_421 +val_407 +val_384 +val_384 +val_384 +val_384 +val_384 +val_384 +val_379 +val_462 +val_462 val_492 val_492 -val_493 -val_494 -val_495 -val_496 -val_497 -val_498 -val_498 -val_498 -val_498 -val_498 -val_498 +val_341 val_498 val_498 val_498 +val_458 +val_458 +val_362 +val_348 +val_348 +val_348 +val_348 +val_348 +val_344 +val_344 +val_469 +val_469 +val_469 +val_469 +val_469 +val_315 +val_448 +val_348 +val_348 +val_348 +val_348 +val_348 +val_307 +val_307 +val_414 +val_414 +val_477 +val_403 +val_403 +val_403 +val_400 diff --git ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out index 2f052b8..516a212 100644 --- ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out +++ ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out @@ -21,14 +21,13 @@ EXPLAIN WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -39,13 +38,21 @@ STAGE PLANS: Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -53,36 +60,35 @@ STAGE PLANS: Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > 10) and (_col0 < 20)) and (_col5 > 15)) and (_col5 < 25)) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > 10) and (_col0 < 20)) and (_col5 > 15)) and (_col5 < 25)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -135,14 +141,13 @@ POSTHOOK: query: EXPLAIN WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -153,13 +158,21 @@ STAGE PLANS: Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -167,36 +180,35 @@ STAGE PLANS: Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col5 > 15) and (_col5 < 25)) (type: boolean) - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 > 15) and (_col5 < 25)) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out index ae28ff6..afc8192 100644 --- ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out +++ ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out @@ -21,68 +21,74 @@ EXPLAIN WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > '15') and (key < '25')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > '15') and (key < '25')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -255,68 +261,74 @@ POSTHOOK: query: EXPLAIN WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > '15') and (key < '25')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > '15') and (key < '25')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 > '10') and (_col0 < '20')) (type: boolean) - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > '10') and (_col0 < '20')) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out index 9b7193e..cc2710f 100644 --- ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out +++ ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out @@ -27,14 +27,13 @@ EXPLAIN WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -45,65 +44,82 @@ STAGE PLANS: Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: c + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) and (sqrt(_col10) <> 13)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10 + input vertices: + 0 Map 3 + 1 Map 1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) and (sqrt(_col10) <> 13)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -392,14 +408,13 @@ POSTHOOK: query: EXPLAIN WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -410,65 +425,82 @@ STAGE PLANS: Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: c + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col5 > '15') and (_col5 < '25')) and (_col0 > '10')) and (_col0 < '20')) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10 + input vertices: + 0 Map 3 + 1 Map 1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col5 > '15') and (_col5 < '25')) and (_col0 > '10')) and (_col0 < '20')) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out index e71c7f2..5b7f63c 100644 --- ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out +++ ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out @@ -121,14 +121,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -139,13 +138,18 @@ STAGE PLANS: Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: id (type: int) - sort order: + - Map-reduce partition columns: id (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string), value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 20 (type: int) + 1 20 (type: int) + 2 id (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: t2 @@ -153,12 +157,23 @@ STAGE PLANS: Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: 20 (type: int) - sort order: + - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string), value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {id} {key} {value} + keys: + 0 20 (type: int) + 1 20 (type: int) + 2 id (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: t1 @@ -166,34 +181,36 @@ STAGE PLANS: Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: 20 (type: int) - sort order: + + Map Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {id} {key} {value} + keys: + 0 20 (type: int) + 1 20 (type: int) + 2 id (type: int) + outputColumnNames: _col1, _col2, _col7, _col8, _col12, _col13, _col14 + input vertices: + 1 Map 2 + 2 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string), value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} - 2 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} - outputColumnNames: _col1, _col2, _col7, _col8, _col12, _col13, _col14 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -206,14 +223,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -224,13 +240,18 @@ STAGE PLANS: Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: id (type: int) - sort order: + - Map-reduce partition columns: id (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string), value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 20 (type: int) + 1 20 (type: int) + 2 id (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: t2 @@ -238,12 +259,23 @@ STAGE PLANS: Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: 20 (type: int) - sort order: + - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string), value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {id} {key} {value} + keys: + 0 20 (type: int) + 1 20 (type: int) + 2 id (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: t1 @@ -251,34 +283,36 @@ STAGE PLANS: Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: 20 (type: int) - sort order: + + Map Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {id} {key} {value} + keys: + 0 20 (type: int) + 1 20 (type: int) + 2 id (type: int) + outputColumnNames: _col1, _col2, _col7, _col8, _col12, _col13, _col14 + input vertices: + 1 Map 2 + 2 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string), value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join0 to 2 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} - 2 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} - outputColumnNames: _col1, _col2, _col7, _col8, _col12, _col13, _col14 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ptf_streaming.q.out ql/src/test/results/clientpositive/spark/ptf_streaming.q.out index cb94983..ef7ae88 100644 --- ql/src/test/results/clientpositive/spark/ptf_streaming.q.out +++ ql/src/test/results/clientpositive/spark/ptf_streaming.q.out @@ -4,7 +4,9 @@ PREHOOK: Output: noopstreaming POSTHOOK: query: create temporary function noopstreaming as 'org.apache.hadoop.hive.ql.udf.ptf.NoopStreaming$NoopStreamingResolver' POSTHOOK: type: CREATEFUNCTION POSTHOOK: Output: noopstreaming -PREHOOK: query: --1. test1 +PREHOOK: query: -- SORT_QUERY_RESULTS + +--1. test1 select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -16,7 +18,9 @@ from noopstreaming(on part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: --1. test1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +--1. test1 select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -118,20 +122,6 @@ order by p_name POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve -33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s -42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull -86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ 110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously 112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car @@ -142,10 +132,24 @@ POSTHOOK: Input: default@part 132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about 146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu 155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir 195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r diff --git ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out index 74f03b8..edab30d 100644 --- ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out +++ ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out @@ -3,15 +3,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from (select * from src cluster by key) a join src b on a.key = b.key limit 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -22,13 +20,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src @@ -46,42 +54,41 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {key} {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/router_join_ppr.q.out ql/src/test/results/clientpositive/spark/router_join_ppr.q.out index ae91b17..8024f52 100644 --- ql/src/test/results/clientpositive/spark/router_join_ppr.q.out +++ ql/src/test/results/clientpositive/spark/router_join_ppr.q.out @@ -105,45 +105,48 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) - auto parallelism: false + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + filter mappings: + 1 [0, 1] + filter predicates: + 0 + 1 {(ds = '2008-04-08')} + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -151,13 +154,11 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -167,29 +168,102 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + filter mappings: + 1 [0, 1] + filter predicates: + 0 + 1 {(ds = '2008-04-08')} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -230,12 +304,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 - hr 11 + ds 2008-04-08 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -276,12 +350,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -320,37 +394,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -358,11 +409,13 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -372,71 +425,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - filter mappings: - 1 [0, 1] - filter predicates: - 0 - 1 {(VALUE._col1 = '2008-04-08')} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + /srcpart/ds=2008-04-09/hr=11 [b] + /srcpart/ds=2008-04-09/hr=12 [b] Stage: Stage-0 Fetch Operator @@ -589,42 +597,46 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -632,11 +644,13 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -646,55 +660,29 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 11 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -733,14 +721,77 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [a] + /srcpart/ds=2008-04-08/hr=12 [a] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col7, _col8 + input vertices: + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -748,13 +799,11 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -764,64 +813,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [a] - /srcpart/ds=2008-04-08/hr=12 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col7, _col8 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + /src [b] Stage: Stage-0 Fetch Operator @@ -970,45 +981,43 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1016,13 +1025,11 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1032,29 +1039,97 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1093,35 +1168,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1129,11 +1183,13 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1143,66 +1199,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] Stage: Stage-0 Fetch Operator @@ -1351,42 +1365,46 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} {ds} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1394,11 +1412,13 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1408,55 +1428,29 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string), ds (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 11 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1497,12 +1491,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 - hr 12 + ds 2008-04-09 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1543,12 +1537,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 - hr 11 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1587,14 +1581,79 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [a] + /srcpart/ds=2008-04-08/hr=12 [a] + /srcpart/ds=2008-04-09/hr=11 [a] + /srcpart/ds=2008-04-09/hr=12 [a] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} {ds} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col2, _col7, _col8 + input vertices: + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (((_col0 > 10) and (_col0 < 20)) and (_col2 = '2008-04-08')) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1602,13 +1661,11 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1618,66 +1675,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [a] - /srcpart/ds=2008-04-08/hr=12 [a] - /srcpart/ds=2008-04-09/hr=11 [a] - /srcpart/ds=2008-04-09/hr=12 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col7, _col8 - Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: (((_col0 > 10) and (_col0 < 20)) and (_col2 = '2008-04-08')) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + /src [b] Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/sample8.q.out ql/src/test/results/clientpositive/spark/sample8.q.out index 17e3204..338bd8d 100644 --- ql/src/test/results/clientpositive/spark/sample8.q.out +++ ql/src/test/results/clientpositive/spark/sample8.q.out @@ -85,33 +85,35 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: t - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: s + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: (((((hash(key) & 2147483647) % 1) = 0) and value is not null) and (((hash(key) & 2147483647) % 10) = 0)) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + predicate: (((((hash(key) & 2147483647) % 10) = 0) and value is not null) and (((hash(key) & 2147483647) % 1) = 0)) (type: boolean) + Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {key} {value} + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,14 +163,79 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [s] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: true + predicate: (((((hash(key) & 2147483647) % 1) = 0) and value is not null) and (((hash(key) & 2147483647) % 10) = 0)) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col7, _col8 + input vertices: + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col7 = _col0) and (_col8 = _col1)) (type: boolean) + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -209,12 +276,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 - hr 11 + ds 2008-04-08 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -255,12 +322,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -299,39 +366,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [t] - /srcpart/ds=2008-04-08/hr=12 [t] - /srcpart/ds=2008-04-09/hr=11 [t] - /srcpart/ds=2008-04-09/hr=12 [t] - Map 3 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: true - predicate: (((((hash(key) & 2147483647) % 10) = 0) and value is not null) and (((hash(key) & 2147483647) % 1) = 0)) (type: boolean) - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 - hr 11 + ds 2008-04-09 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -371,47 +413,10 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [s] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col7, _col8 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col7 = _col0) and (_col8 = _col1)) (type: boolean) - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + /srcpart/ds=2008-04-08/hr=11 [t] + /srcpart/ds=2008-04-08/hr=12 [t] + /srcpart/ds=2008-04-09/hr=11 [t] + /srcpart/ds=2008-04-09/hr=12 [t] Stage: Stage-0 Fetch Operator @@ -800,14 +805,13 @@ POSTHOOK: query: EXPLAIN SELECT * FROM src TABLESAMPLE(100 ROWS) a JOIN src1 TABLESAMPLE(10 ROWS) b ON a.key=b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -819,13 +823,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -834,33 +846,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -891,14 +902,13 @@ POSTHOOK: query: EXPLAIN SELECT * FROM src TABLESAMPLE(100 ROWS) a, src1 TABLESAMPLE(10 ROWS) b WHERE a.key=b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -910,13 +920,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -925,36 +943,35 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = _col5) (type: boolean) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = _col5) (type: boolean) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/semijoin.q.out ql/src/test/results/clientpositive/spark/semijoin.q.out index 447cc6c..551b445 100644 --- ql/src/test/results/clientpositive/spark/semijoin.q.out +++ ql/src/test/results/clientpositive/spark/semijoin.q.out @@ -119,15 +119,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -147,12 +145,23 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -160,30 +169,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -225,15 +233,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -253,12 +259,23 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -266,30 +283,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -333,15 +349,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -361,12 +375,23 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -374,30 +399,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -433,15 +457,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -461,12 +483,23 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: int) + 1 _col1 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -474,30 +507,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -544,15 +576,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -572,12 +602,23 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -585,30 +626,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -647,32 +687,16 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: t3 @@ -689,31 +713,55 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col1 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 @@ -750,15 +798,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -778,12 +824,23 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -791,30 +848,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col1 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -850,15 +906,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -878,12 +932,23 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -891,30 +956,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -955,15 +1019,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -983,12 +1045,23 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -996,29 +1069,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 1 + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -1073,15 +1146,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1101,12 +1172,23 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (2 * _col0) (type: int) - sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 (2 * _col0) (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -1114,30 +1196,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 (2 * _col0) (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -1177,32 +1258,16 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: c @@ -1219,12 +1284,18 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 _col0 (type: int) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: a @@ -1232,35 +1303,62 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Left Semi Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 4 + 2 Map 3 + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Reducer 3 - Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 @@ -1310,15 +1408,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1338,12 +1434,23 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int), value (type: string) + 1 _col0 (type: int), _col1 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -1351,29 +1458,29 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: string) - Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int), value (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -1423,15 +1530,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1451,12 +1556,18 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + 2 + keys: + 0 key (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c @@ -1473,12 +1584,25 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + 2 + keys: + 0 key (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a @@ -1486,32 +1610,34 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - 2 - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Map Join Operator + condition map: + Left Semi Join 0 to 1 + Left Semi Join 0 to 2 + condition expressions: + 0 {key} + 1 + 2 + keys: + 0 key (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 1 + 2 Map 2 + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -1562,15 +1688,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1578,12 +1702,18 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 _col0 (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c @@ -1597,42 +1727,57 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - 2 - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + + Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Semi Join 1 to 2 + condition expressions: + 0 {key} + 1 + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 1 + 2 Map 2 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -1695,28 +1840,16 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t1 a right outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Map 4 + Map 3 Map Operator Tree: TableScan alias: c @@ -1730,42 +1863,73 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 _col0 (type: int) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: a Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Left Semi Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - 2 - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + Left Semi Join 1 to 2 + condition expressions: + 0 {key} + 1 + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 0 Map 4 + 2 Map 3 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -1967,15 +2131,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1992,52 +2154,73 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + 2 + keys: + 0 key (type: int) + 1 _col0 (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + 2 + keys: + 0 key (type: int) + 1 _col0 (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - 2 - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + + Map Join Operator + condition map: + Left Semi Join 0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {key} + 1 + 2 + keys: + 0 key (type: int) + 1 _col0 (type: int) + 2 key (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 1 + 2 Map 2 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -2103,15 +2286,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2128,51 +2309,72 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + 2 + keys: + 0 key (type: int) + 1 _col0 (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - 2 - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: int) + 1 _col0 (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {key} + 1 + 2 + keys: + 0 key (type: int) + 1 _col0 (type: int) + 2 key (type: int) + outputColumnNames: _col0 + input vertices: + 0 Map 4 + 1 Map 1 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -2390,16 +2592,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2419,22 +2618,37 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a @@ -2442,46 +2656,42 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 4 Reduce Operator Tree: Select Operator @@ -2554,14 +2764,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2581,12 +2790,21 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 value (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -2594,33 +2812,32 @@ STAGE PLANS: Filter Operator predicate: (value is not null and (key > 100)) (type: boolean) Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 value (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 1 + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoin.q.out ql/src/test/results/clientpositive/spark/skewjoin.q.out index 23d4b18..45d3d80 100644 --- ql/src/test/results/clientpositive/spark/skewjoin.q.out +++ ql/src/test/results/clientpositive/spark/skewjoin.q.out @@ -79,16 +79,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -99,13 +98,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -113,33 +120,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection @@ -191,14 +198,13 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T4 d ON c.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -209,13 +215,20 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {key} {val} + 3 {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: b @@ -223,13 +236,20 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + 2 {key} {val} + 3 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: c @@ -237,13 +257,25 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {val} + 3 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: a @@ -251,37 +283,40 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 2 to 3 + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {key} {val} + 3 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 1 Map 2 + 2 Map 3 + 3 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 2 to 3 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - 3 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -323,14 +358,13 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T4 d ON c.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -341,13 +375,20 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {key} {val} + 3 {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: b @@ -355,13 +396,20 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + 2 {key} {val} + 3 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: c @@ -369,13 +417,25 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {val} + 3 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: a @@ -383,37 +443,40 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 2 to 3 + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {key} {val} + 3 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + input vertices: + 1 Map 2 + 2 Map 3 + 3 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 2 to 3 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - 3 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -447,15 +510,37 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM T1 a JOIN src c ON c.key+1=a.key SELECT /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} + keys: + 0 UDFToDouble(key) (type: double) + 1 (key + 1) (type: double) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -466,51 +551,36 @@ STAGE PLANS: Filter Operator predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (key + 1) (type: double) - sort order: + - Map-reduce partition columns: (key + 1) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string), val (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} + keys: + 0 UDFToDouble(key) (type: double) + 1 (key + 1) (type: double) + outputColumnNames: _col0, _col1, _col5 + input vertices: + 0 Map 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col5)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col5)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) mode: mergepartial @@ -560,18 +630,16 @@ ON (x.key = Y.key) SELECT sum(hash(Y.key)), sum(hash(Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src @@ -580,15 +648,26 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src @@ -597,40 +676,39 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial @@ -688,77 +766,85 @@ ON (x.key = Y.key and substring(x.value, 5)=substring(y.value, 5)+1) SELECT sum(hash(Y.key)), sum(hash(Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and UDFToDouble(substring(value, 5)) is not null) (type: boolean) + predicate: (key is not null and (substring(value, 5) + 1) is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), UDFToDouble(substring(_col1, 5)) (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), UDFToDouble(substring(_col1, 5)) (type: double) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col1} + keys: + 0 _col0 (type: string), UDFToDouble(substring(_col1, 5)) (type: double) + 1 _col0 (type: string), (substring(_col1, 5) + 1) (type: double) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (substring(value, 5) + 1) is not null) (type: boolean) + predicate: (key is not null and UDFToDouble(substring(value, 5)) is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), (substring(_col1, 5) + 1) (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), (substring(_col1, 5) + 1) (type: double) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string), UDFToDouble(substring(_col1, 5)) (type: double) + 1 _col0 (type: string), (substring(_col1, 5) + 1) (type: double) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col2, _col3 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial @@ -824,15 +910,13 @@ JOIN ON src1.c1 = src3.c5 AND src3.c5 < 80 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -847,71 +931,91 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 80) and (key < 100)) (type: boolean) + predicate: ((key < 100) and (key < 80)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 100) and (key < 80)) (type: boolean) + predicate: ((key < 80) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - 2 - outputColumnNames: _col0, _col3 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {_col0} + 1 {_col1} + 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col3 + input vertices: + 1 Map 1 + 2 Map 4 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator @@ -969,15 +1073,13 @@ POSTHOOK: query: EXPLAIN SELECT /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) FROM T1 k LEFT OUTER JOIN T1 v ON k.key+1=v.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -985,46 +1087,55 @@ STAGE PLANS: TableScan alias: v Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {val} + keys: + 0 (key + 1) (type: double) + 1 UDFToDouble(key) (type: double) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: k Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: (key + 1) (type: double) - sort order: + - Map-reduce partition columns: (key + 1) (type: double) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - value expressions: key (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col1} - outputColumnNames: _col0, _col6 - Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col6 - Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col6)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} + 1 {val} + keys: + 0 (key + 1) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col6 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col6)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out index 5d64049..313aedb 100644 --- ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out +++ ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out @@ -5,18 +5,16 @@ POSTHOOK: query: explain create table noskew as select a.* from src a join src b on a.key=b.key order by a.key limit 30 POSTHOOK: type: CREATETABLE_AS_SELECT STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-2, Stage-0 - Stage-3 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-5 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -27,12 +25,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -40,31 +49,30 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator @@ -86,7 +94,7 @@ STAGE PLANS: Stage: Stage-2 Dependency Collection - Stage: Stage-4 + Stage: Stage-5 Create Table Operator: Create Table columns: key string, value string diff --git ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out index c4a58c1..87c14d3 100644 --- ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out +++ ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out @@ -61,116 +61,126 @@ EXPLAIN SELECT * FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -207,16 +217,14 @@ EXPLAIN SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -227,41 +235,92 @@ STAGE PLANS: Filter Operator predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 3 <- Map 2 (NONE, 0), Map 4 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((key = '2') or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 5 + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key = '2') or (key = '3')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 7 + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 3 + Vertex: Union 3 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan alias: a @@ -269,54 +328,15 @@ STAGE PLANS: Filter Operator predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -361,119 +381,107 @@ INSERT OVERWRITE TABLE DEST1 SELECT * FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 7 - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Union 3 - Vertex: Union 3 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 Stage: Stage-2 Dependency Collection @@ -488,6 +496,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + PREHOOK: query: INSERT OVERWRITE TABLE DEST1 SELECT * FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY @@ -529,17 +559,15 @@ INSERT OVERWRITE TABLE DEST1 SELECT * FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -550,96 +578,86 @@ STAGE PLANS: Filter Operator predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 3 <- Map 2 (NONE, 0), Map 4 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((key = '2') or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 5 + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 7 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key = '2') or (key = '3')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work Union 3 Vertex: Union 3 @@ -656,6 +674,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((key = '2') or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + PREHOOK: query: INSERT OVERWRITE TABLE DEST1 SELECT * FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out index a0fb663..d257788 100644 --- ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out +++ ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out @@ -73,16 +73,14 @@ EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -93,128 +91,158 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + 2 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 3 <- Map 2 (NONE, 0), Map 7 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 6 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 4 + 2 Map 1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Map 7 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 8 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 5 + 2 Map 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 3 + Vertex: Union 3 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 9 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + 2 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 6 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out index 0cfb539..d65a103 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out @@ -47,116 +47,126 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -193,16 +203,14 @@ EXPLAIN SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -213,41 +221,92 @@ STAGE PLANS: Filter Operator predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 3 <- Map 2 (NONE, 0), Map 4 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((key = '2') or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 5 + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key = '2') or (key = '3')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 7 + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 3 + Vertex: Union 3 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan alias: a @@ -255,54 +314,15 @@ STAGE PLANS: Filter Operator predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -341,90 +361,100 @@ EXPLAIN SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) - Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 6 (NONE, 0) + Reducer 3 <- Union 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 4 + Select Operator + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Map 6 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 8 - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Reducer 4 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 5 + Select Operator + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -442,26 +472,30 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Union 3 - Vertex: Union 3 + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -487,17 +521,14 @@ POSTHOOK: query: EXPLAIN SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) - Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -508,68 +539,81 @@ STAGE PLANS: Filter Operator predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 3 <- Map 2 (NONE, 0), Map 5 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((key = '2') or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 6 + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 0 Map 1 + Select Operator + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key = '2') or (key = '3')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 8 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 - 1 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 0 Map 6 + Select Operator + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 4 Reduce Operator Tree: Group By Operator @@ -588,27 +632,31 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 - 1 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) Union 3 Vertex: Union 3 + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((key = '2') or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out index c68f954..dee0aef 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out @@ -49,16 +49,14 @@ explain select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -69,146 +67,160 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key is not null and (key = '8')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 4 <- Map 3 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '8'))) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: array) - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + input vertices: + 0 Map 1 + Select Operator + expressions: _col0 (type: string), _col6 (type: array) + outputColumnNames: _col0, _col1 + Select Operator + SELECT * : (no compute) + Lateral View Forward + Select Operator + SELECT * : (no compute) + expressions: _col0 (type: string), _col1 (type: array) + outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col1 (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and (key = '8')) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: array) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col6 - Select Operator - expressions: _col0 (type: string), _col6 (type: array) - outputColumnNames: _col0, _col1 - Select Operator - SELECT * : (no compute) - Lateral View Forward + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + input vertices: + 0 Map 2 Select Operator - SELECT * : (no compute) - expressions: _col0 (type: string), _col1 (type: array) - outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: _col1 (type: array) - outputColumnNames: _col0 - UDTF Operator - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 + expressions: _col0 (type: string), _col6 (type: array) + outputColumnNames: _col0, _col1 + Select Operator + SELECT * : (no compute) + Lateral View Forward Select Operator - expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col6 - Select Operator - expressions: _col0 (type: string), _col6 (type: array) - outputColumnNames: _col0, _col1 - Select Operator - SELECT * : (no compute) - Lateral View Forward - Select Operator - SELECT * : (no compute) - expressions: _col0 (type: string), _col1 (type: array) - outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: _col1 (type: array) - outputColumnNames: _col0 - UDTF Operator - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 + SELECT * : (no compute) + expressions: _col0 (type: string), _col1 (type: array) + outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + expressions: _col1 (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 4 + Vertex: Union 4 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (key = '8')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out index 0b7fd10..0fc55a6 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out @@ -59,105 +59,208 @@ select * from ) subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4, Stage-5 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 1), Map 13 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 11 (NONE, 0), Reducer 2 (NONE, 0), Reducer 6 (NONE, 0), Reducer 9 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 10 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0), Map 6 (NONE, 0), Map 9 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 12 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Operator + SELECT * : (no compute) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 13 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 1 Map 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Operator + SELECT * : (no compute) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Map 6 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + predicate: (key is not null and (key = '2')) (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 1 Map 8 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Operator + SELECT * : (no compute) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Map 9 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col6 + input vertices: + 1 Map 7 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Operator + SELECT * : (no compute) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: Map 5 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 7 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-5 + Spark +#### A masked pattern was here #### + Vertices: + Map 8 Map Operator Tree: TableScan alias: b @@ -165,120 +268,37 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 8 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-6 + Spark +#### A masked pattern was here #### + Vertices: + Map 7 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Select Operator - SELECT * : (no compute) - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Select Operator - SELECT * : (no compute) - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Select Operator - SELECT * : (no compute) - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Select Operator - SELECT * : (no compute) - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out index 15ff759..1d179ae 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out @@ -49,112 +49,126 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: ((key is not null and val is not null) and (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: ((key is not null and val is not null) and ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out index c4b294b..c1fe941 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out @@ -75,15 +75,13 @@ T1 a join T2 b on a.key = b.key join T3 c on a.val = c.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -94,13 +92,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c @@ -108,13 +109,21 @@ STAGE PLANS: Filter Operator predicate: val is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: val (type: string) - sort order: + - Map-reduce partition columns: val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {key} + keys: + 0 _col1 (type: string) + 1 val (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a @@ -122,49 +131,45 @@ STAGE PLANS: Filter Operator predicate: (key is not null and val is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} {VALUE._col5} - 1 {VALUE._col0} {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {key} {val} + keys: + 0 _col1 (type: string) + 1 val (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out index 9c955bf..52ae41d 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out @@ -79,34 +79,17 @@ T1 a join T2 b on a.key = b.key join T3 c on a.val = c.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 1), Union 3 (PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((key is not null and val is not null) and (not (key = '2'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 5 + Map 4 Map Operator Tree: TableScan alias: b @@ -114,27 +97,51 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Union 2 (PARTITION-LEVEL SORT, 1) + Union 2 <- Map 1 (NONE, 0), Map 7 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 8 + predicate: ((key is not null and val is not null) and (not (key = '2'))) (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + SELECT * : (no compute) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Local Work: + Map Reduce Local Work + Map 6 Map Operator Tree: TableScan alias: c @@ -148,37 +155,34 @@ STAGE PLANS: Map-reduce partition columns: val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string) - Map 9 + Map 7 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (key = '2')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - SELECT * : (no compute) - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) - Reducer 4 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 5 + Select Operator + SELECT * : (no compute) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -199,24 +203,30 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - SELECT * : (no compute) - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) - Union 3 - Vertex: Union 3 + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out index 8b0f7dc..b189489 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out @@ -87,116 +87,126 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + alias: a Filter Operator predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -233,16 +243,14 @@ EXPLAIN SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -253,41 +261,92 @@ STAGE PLANS: Filter Operator predicate: (not ((key = 2) or (key = 3))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {val} + 1 {key} {val} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 3 <- Map 2 (NONE, 0), Map 4 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 5 + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key = 2) or (key = 3)) (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) - Map 7 + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 3 + Vertex: Union 3 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan alias: a @@ -295,54 +354,15 @@ STAGE PLANS: Filter Operator predicate: ((key = 2) or (key = 3)) (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {val} + 1 {key} {val} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -381,90 +401,100 @@ EXPLAIN SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) - Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 6 (NONE, 0) + Reducer 3 <- Union 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 1 Map 4 + Select Operator + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Map 6 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Map 8 - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Reducer 4 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 1 Map 5 + Select Operator + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -482,26 +512,30 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Union 3 - Vertex: Union 3 + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -527,17 +561,14 @@ POSTHOOK: query: EXPLAIN SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) - Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -548,68 +579,81 @@ STAGE PLANS: Filter Operator predicate: (not ((key = 2) or (key = 3))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 3 <- Map 2 (NONE, 0), Map 5 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 6 + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 0 Map 1 + Select Operator + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key = 2) or (key = 3)) (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Map 8 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key = 2) or (key = 3)) (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 - 1 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 0 Map 6 + Select Operator + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 4 Reduce Operator Tree: Group By Operator @@ -628,27 +672,31 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 - 1 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) Union 3 Vertex: Union 3 + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out index 07c17f4..94d0f36 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out @@ -49,112 +49,126 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '3')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out index e4e7169..183254c 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out @@ -53,116 +53,126 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -255,112 +265,126 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '2')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '2')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '2'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out index 708e304..a328792 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out @@ -75,14 +75,13 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -93,13 +92,21 @@ STAGE PLANS: Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string), val (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(key) (type: double) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -107,33 +114,32 @@ STAGE PLANS: Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out index d599cd5..db82d40 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out @@ -51,116 +51,126 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out index 3c21e0b..be89681 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out @@ -57,112 +57,126 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -196,112 +210,126 @@ EXPLAIN SELECT a.*, b.* FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 7 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -339,96 +367,112 @@ EXPLAIN SELECT a.key, count(1) FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val group by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) - Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 6 (NONE, 0) + Reducer 3 <- Union 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work Map 6 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 8 - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Reducer 4 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -447,32 +491,30 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Union 3 - Vertex: Union 3 + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -499,96 +541,112 @@ POSTHOOK: query: EXPLAIN SELECT a.key, count(1) FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val group by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) - Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 6 (NONE, 0) + Reducer 3 <- Union 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work Map 6 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 8 - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), val (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Reducer 4 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -607,32 +665,30 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Union 3 - Vertex: Union 3 + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string), val (type: string) + 1 key (type: string), val (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out index 2ed3634..6bc81a2 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out @@ -51,116 +51,126 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out index 93cdbb8..75301c0 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out @@ -51,116 +51,126 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out index 1df961d..71e80b8 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out @@ -47,116 +47,126 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -191,116 +201,126 @@ EXPLAIN SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out index 7712c0e..d4bc3ac 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out @@ -49,116 +49,126 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out index bdab7de..835db1c 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out @@ -51,116 +51,126 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {val} + 1 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 2 + Vertex: Union 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out index 3d46cf2..917d813 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out @@ -67,16 +67,14 @@ EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -87,128 +85,158 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + 2 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 3 <- Map 2 (NONE, 0), Map 7 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 6 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 4 + 2 Map 1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Map 7 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 8 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 5 + 2 Map 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 3 + Vertex: Union 3 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 9 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + 2 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 6 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out index 76c674c..16fedfd 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out @@ -65,16 +65,14 @@ EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) - Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -85,128 +83,158 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + 2 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Union 3 <- Map 2 (NONE, 0), Map 7 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 6 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 4 + 2 Map 1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Map 7 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a Filter Operator predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 8 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 5 + 2 Map 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Union 3 + Vertex: Union 3 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 9 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {val} + 2 {key} {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 6 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {val} + 1 {key} {val} + 2 {val} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out index 10f64ba..22575cd 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out @@ -195,15 +195,37 @@ select key, count(1) as cnt from T1 group by key join T2 b on subq1.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {val} + keys: + 0 _col0 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -230,21 +252,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -256,33 +266,30 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {key} {val} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out index cadf7ea..f37f146 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out @@ -113,47 +113,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Map 2 <- Map 1 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 - Map 3 - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: string), '2010-10-15' (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types int:string:string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Map 2 Stage: Stage-0 Fetch Operator @@ -274,47 +238,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Map 2 <- Map 1 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 - Map 3 - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: string), '2010-10-15' (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types int:string:string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Map 2 Stage: Stage-0 Fetch Operator @@ -373,17 +301,16 @@ hive_test_smb_bucket2 b ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL POSTHOOK: type: CREATETABLE_AS_SELECT STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-2, Stage-0 - Stage-3 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-5 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -394,13 +321,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -408,38 +343,38 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col6, _col7 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: string), '2010-10-15' (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_mapjoin9_results + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col6, _col7 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: int), _col7 (type: string), '2010-10-15' (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_mapjoin9_results + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection - Stage: Stage-4 + Stage: Stage-5 Create Table Operator: Create Table columns: k1 int, value string, ds string, k2 int diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out index d58d226..1c015c5 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out @@ -53,14 +53,35 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -71,47 +92,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -136,14 +142,13 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -151,44 +156,51 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -218,14 +230,32 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -233,44 +263,32 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -385,14 +403,35 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -403,47 +442,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -468,14 +492,13 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -483,44 +506,51 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -550,14 +580,32 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -565,44 +613,32 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out index 6b62fdc..16ba243 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out @@ -73,14 +73,13 @@ on (a.ds = '1' and b.ds = '2' and a.type = b.type) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -91,12 +90,21 @@ STAGE PLANS: Filter Operator predicate: (((userid is not null and pageid is not null) and postid is not null) and type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: userid (type: int), pageid (type: int), postid (type: int), type (type: string) - sort order: ++++ - Map-reduce partition columns: userid (type: int), pageid (type: int), postid (type: int), type (type: string) - Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {userid} {pageid} {postid} {type} + 1 + keys: + 0 userid (type: int), pageid (type: int), postid (type: int), type (type: string) + 1 userid (type: int), pageid (type: int), postid (type: int), type (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -104,32 +112,32 @@ STAGE PLANS: Filter Operator predicate: (((userid is not null and pageid is not null) and postid is not null) and type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: userid (type: int), pageid (type: int), postid (type: int), type (type: string) - sort order: ++++ - Map-reduce partition columns: userid (type: int), pageid (type: int), postid (type: int), type (type: string) - Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {KEY.reducesinkkey2} {KEY.reducesinkkey3} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {KEY.reducesinkkey2} {KEY.reducesinkkey3} - outputColumnNames: _col0, _col1, _col2, _col3, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string), '1' (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col11 (type: string), '2' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {userid} {pageid} {postid} {type} + 1 {userid} {pageid} {postid} {type} + keys: + 0 userid (type: int), pageid (type: int), postid (type: int), type (type: string) + 1 userid (type: int), pageid (type: int), postid (type: int), type (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col8, _col9, _col10, _col11 + input vertices: + 1 Map 1 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string), '1' (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col11 (type: string), '2' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out index 23835ba..c512205 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out @@ -121,57 +121,57 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: int) - sort order: + - Map-reduce partition columns: value (type: int) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: key (type: string) - auto parallelism: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {value} {key} + keys: + 0 key (type: int) + 1 value (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test_table2 + base file name: test_table1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 16 - bucket_field_name value - columns value,key + bucket_field_name key + columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.test_table2 + name default.test_table1 numFiles 16 - numRows 0 - rawDataSize 0 - serialization.ddl struct test_table2 { i32 value, string key} + numRows 500 + rawDataSize 5312 + serialization.ddl struct test_table1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -184,65 +184,91 @@ STAGE PLANS: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 16 - bucket_field_name value - columns value,key + bucket_field_name key + columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.test_table2 + name default.test_table1 numFiles 16 - numRows 0 - rawDataSize 0 - serialization.ddl struct test_table2 { i32 value, string key} + numRows 500 + rawDataSize 5312 + serialization.ddl struct test_table1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table2 - name: default.test_table2 + name: default.test_table1 + name: default.test_table1 Truncated Path -> Alias: - /test_table2 [b] - Map 4 + /test_table1 [a] + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} {key} + keys: + 0 key (type: int) + 1 value (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test_table1 + base file name: test_table2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 16 - bucket_field_name key - columns key,value + bucket_field_name value + columns value,key columns.comments columns.types int:string #### A masked pattern was here #### - name default.test_table1 + name default.test_table2 numFiles 16 - numRows 500 - rawDataSize 5312 - serialization.ddl struct test_table1 { i32 key, string value} + numRows 0 + rawDataSize 0 + serialization.ddl struct test_table2 { i32 value, string key} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -255,48 +281,26 @@ STAGE PLANS: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 16 - bucket_field_name key - columns key,value + bucket_field_name value + columns value,key columns.comments columns.types int:string #### A masked pattern was here #### - name default.test_table1 + name default.test_table2 numFiles 16 - numRows 500 - rawDataSize 5312 - serialization.ddl struct test_table1 { i32 key, string value} + numRows 0 + rawDataSize 0 + serialization.ddl struct test_table2 { i32 value, string key} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table1 - name: default.test_table1 + name: default.test_table2 + name: default.test_table2 Truncated Path -> Alias: - /test_table1 [a] + /test_table2 [b] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator @@ -409,15 +413,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -430,14 +432,16 @@ STAGE PLANS: isSamplingPred: false predicate: UDFToDouble(value) is not null (type: boolean) Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(value) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(value) (type: double) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: key (type: int), value (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(value) (type: double) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -491,7 +495,14 @@ STAGE PLANS: name: default.test_table4 Truncated Path -> Alias: /test_table4 [b] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -501,14 +512,33 @@ STAGE PLANS: isSamplingPred: false predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: key (type: int), value (type: string) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(value) (type: double) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -562,28 +592,6 @@ STAGE PLANS: name: default.test_table3 Truncated Path -> Alias: /test_table3 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out index 9b6ea3c..972be45 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out @@ -51,65 +51,74 @@ select count(*) from ( ) subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -169,71 +178,80 @@ group by key order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -250,7 +268,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) @@ -319,16 +337,13 @@ select count(*) from ) subq2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -339,12 +354,24 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -352,37 +379,37 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator @@ -471,18 +498,16 @@ select /*+mapjoin(subq1)*/ count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -494,12 +519,23 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: a @@ -511,33 +547,33 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -609,31 +645,16 @@ select /*+mapjoin(subq2)*/ count(*) from on subq2.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 4 + Map 3 Map Operator Tree: TableScan alias: a @@ -645,33 +666,57 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + input vertices: + 0 Map 3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -767,15 +812,13 @@ select /*+mapjoin(subq2)*/ count(*) from on subq2.key = subq4.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -790,12 +833,23 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -807,31 +861,31 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 1 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Select Operator + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator @@ -919,18 +973,16 @@ select /*+mapjoin(subq1)*/ count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -942,12 +994,23 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: a @@ -959,33 +1022,33 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -1047,18 +1110,16 @@ select /*+mapjoin(subq1)*/ count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -1070,12 +1131,23 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: a @@ -1087,33 +1159,33 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -1171,15 +1243,13 @@ select /*+mapjoin(subq1)*/ count(*) from join tbl2 a on subq1.key = a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1194,12 +1264,23 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -1207,31 +1288,31 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + input vertices: + 0 Map 1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator @@ -1287,15 +1368,13 @@ select /*+mapjoin(a)*/ count(*) from join tbl2 a on subq1.key = a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1310,12 +1389,23 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -1323,31 +1413,31 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + input vertices: + 0 Map 1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator @@ -1413,15 +1503,13 @@ select /*+mapjoin(subq1, subq2)*/ count(*) from on (subq1.key = subq3.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1436,11 +1524,17 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan @@ -1453,12 +1547,25 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -1470,33 +1577,35 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 - 1 - 2 - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + input vertices: + 1 Map 4 + 2 Map 1 + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator @@ -1578,31 +1687,16 @@ join tbl2 b on subq2.key = b.key) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 4 + Map 3 Map Operator Tree: TableScan alias: a @@ -1614,33 +1708,57 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + input vertices: + 0 Map 3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out index 46f4ee7..edbd89c 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out @@ -93,41 +93,41 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test_table2 + base file name: test_table1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -139,11 +139,11 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.test_table2 + name default.test_table1 numFiles 16 - numRows 0 - rawDataSize 0 - serialization.ddl struct test_table2 { i32 key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct test_table1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -161,44 +161,70 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.test_table2 + name default.test_table1 numFiles 16 - numRows 0 - rawDataSize 0 - serialization.ddl struct test_table2 { i32 key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct test_table1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table2 - name: default.test_table2 + name: default.test_table1 + name: default.test_table1 Truncated Path -> Alias: - /test_table2 [b] - Map 4 + /test_table1 [a] + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test_table1 + base file name: test_table2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -210,11 +236,11 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.test_table1 + name default.test_table2 numFiles 16 - numRows 500 - rawDataSize 5312 - serialization.ddl struct test_table1 { i32 key, string value} + numRows 0 + rawDataSize 0 + serialization.ddl struct test_table2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -232,43 +258,21 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.test_table1 + name default.test_table2 numFiles 16 - numRows 500 - rawDataSize 5312 - serialization.ddl struct test_table1 { i32 key, string value} + numRows 0 + rawDataSize 0 + serialization.ddl struct test_table2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table1 - name: default.test_table1 + name: default.test_table2 + name: default.test_table2 Truncated Path -> Alias: - /test_table1 [a] + /test_table2 [b] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator @@ -443,41 +447,41 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 71 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), key2 (type: int) - sort order: ++ - Map-reduce partition columns: key (type: int), key2 (type: int) - Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {key2} {value} + keys: + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test_table2 + base file name: test_table1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -489,11 +493,11 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table2 + name default.test_table1 numFiles 16 - numRows 0 - rawDataSize 0 - serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + numRows 500 + rawDataSize 7218 + serialization.ddl struct test_table1 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 @@ -511,44 +515,70 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table2 + name default.test_table1 numFiles 16 - numRows 0 - rawDataSize 0 - serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + numRows 500 + rawDataSize 7218 + serialization.ddl struct test_table1 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table2 - name: default.test_table2 + name: default.test_table1 + name: default.test_table1 Truncated Path -> Alias: - /test_table2 [b] - Map 4 + /test_table1 [a] + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 71 Data size: 7718 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), key2 (type: int) - sort order: ++ - Map-reduce partition columns: key (type: int), key2 (type: int) - Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {key2} {value} + 1 {key} {key2} {value} + keys: + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test_table1 + base file name: test_table2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -560,11 +590,11 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table1 + name default.test_table2 numFiles 16 - numRows 500 - rawDataSize 7218 - serialization.ddl struct test_table1 { i32 key, i32 key2, string value} + numRows 0 + rawDataSize 0 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 @@ -582,43 +612,21 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table1 + name default.test_table2 numFiles 16 - numRows 500 - rawDataSize 7218 - serialization.ddl struct test_table1 { i32 key, i32 key2, string value} + numRows 0 + rawDataSize 0 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table1 - name: default.test_table1 + name: default.test_table2 + name: default.test_table2 Truncated Path -> Alias: - /test_table1 [a] + /test_table2 [b] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator @@ -741,41 +749,41 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 71 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key2 is not null and key is not null) (type: boolean) - Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key2 (type: int), key (type: int) - sort order: ++ - Map-reduce partition columns: key2 (type: int), key (type: int) - Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {key2} {value} + keys: + 0 key2 (type: int), key (type: int) + 1 key2 (type: int), key (type: int) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test_table2 + base file name: test_table1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -787,11 +795,11 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table2 + name default.test_table1 numFiles 16 - numRows 0 - rawDataSize 0 - serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + numRows 500 + rawDataSize 7218 + serialization.ddl struct test_table1 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 @@ -809,44 +817,70 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table2 + name default.test_table1 numFiles 16 - numRows 0 - rawDataSize 0 - serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + numRows 500 + rawDataSize 7218 + serialization.ddl struct test_table1 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table2 - name: default.test_table2 + name: default.test_table1 + name: default.test_table1 Truncated Path -> Alias: - /test_table2 [b] - Map 4 + /test_table1 [a] + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 71 Data size: 7718 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key2 is not null and key is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key2 (type: int), key (type: int) - sort order: ++ - Map-reduce partition columns: key2 (type: int), key (type: int) - Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {key2} {value} + 1 {key} {key2} {value} + keys: + 0 key2 (type: int), key (type: int) + 1 key2 (type: int), key (type: int) + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test_table1 + base file name: test_table2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -858,11 +892,11 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table1 + name default.test_table2 numFiles 16 - numRows 500 - rawDataSize 7218 - serialization.ddl struct test_table1 { i32 key, i32 key2, string value} + numRows 0 + rawDataSize 0 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 @@ -880,43 +914,21 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table1 + name default.test_table2 numFiles 16 - numRows 500 - rawDataSize 7218 - serialization.ddl struct test_table1 { i32 key, i32 key2, string value} + numRows 0 + rawDataSize 0 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table1 - name: default.test_table1 + name: default.test_table2 + name: default.test_table2 Truncated Path -> Alias: - /test_table1 [a] + /test_table2 [b] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator @@ -1039,41 +1051,41 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 71 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: string) - Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: key2 (type: int) - auto parallelism: false + Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key2} + 1 {key} {key2} {value} + keys: + 0 key (type: int), value (type: string) + 1 key (type: int), value (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test_table2 + base file name: test_table1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1085,11 +1097,11 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table2 + name default.test_table1 numFiles 16 - numRows 0 - rawDataSize 0 - serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + numRows 500 + rawDataSize 7218 + serialization.ddl struct test_table1 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 @@ -1107,44 +1119,70 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table2 + name default.test_table1 numFiles 16 - numRows 0 - rawDataSize 0 - serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + numRows 500 + rawDataSize 7218 + serialization.ddl struct test_table1 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table2 - name: default.test_table2 + name: default.test_table1 + name: default.test_table1 Truncated Path -> Alias: - /test_table2 [b] - Map 4 + /test_table1 [a] + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 71 Data size: 7718 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: string) - Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: key2 (type: int) - auto parallelism: false + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {key2} {value} + 1 {key} {key2} {value} + keys: + 0 key (type: int), value (type: string) + 1 key (type: int), value (type: string) + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test_table1 + base file name: test_table2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1156,11 +1194,11 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table1 + name default.test_table2 numFiles 16 - numRows 500 - rawDataSize 7218 - serialization.ddl struct test_table1 { i32 key, i32 key2, string value} + numRows 0 + rawDataSize 0 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 @@ -1178,43 +1216,21 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table1 + name default.test_table2 numFiles 16 - numRows 500 - rawDataSize 7218 - serialization.ddl struct test_table1 { i32 key, i32 key2, string value} + numRows 0 + rawDataSize 0 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table1 - name: default.test_table1 + name: default.test_table2 + name: default.test_table2 Truncated Path -> Alias: - /test_table1 [a] + /test_table2 [b] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {VALUE._col0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out index f56422d..89f13fd 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out @@ -43,15 +43,37 @@ EXPLAIN SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -62,46 +84,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 0 Map 3 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out index 7a26fdc..976221a 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out @@ -181,15 +181,13 @@ JOIN test_table6 f ON a.key = f.key JOIN test_table7 g ON a.key = g.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -200,12 +198,26 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: g @@ -213,12 +225,26 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 5 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: d @@ -226,12 +252,26 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: e @@ -239,12 +279,26 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 7 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + Local Work: + Map Reduce Local Work + Map 5 Map Operator Tree: TableScan alias: b @@ -252,12 +306,26 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 8 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + Local Work: + Map Reduce Local Work + Map 6 Map Operator Tree: TableScan alias: c @@ -265,12 +333,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 9 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 8 <- Map 7 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 7 Map Operator Tree: TableScan alias: a @@ -278,42 +367,52 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 - Inner Join 0 to 4 - Inner Join 0 to 5 - Inner Join 0 to 6 - condition expressions: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - Statistics: Num rows: 33 Data size: 231 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 33 Data size: 231 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + Inner Join 0 to 4 + Inner Join 0 to 5 + Inner Join 0 to 6 + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + input vertices: + 1 Map 5 + 2 Map 6 + 3 Map 3 + 4 Map 4 + 5 Map 1 + 6 Map 2 + Statistics: Num rows: 33 Data size: 231 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 33 Data size: 231 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 8 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out index 9d3000e..32e1cfe 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out @@ -53,14 +53,35 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -71,47 +92,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -138,14 +144,13 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -153,44 +158,51 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -220,14 +232,32 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -235,44 +265,32 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -389,14 +407,35 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -407,47 +446,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -474,14 +498,13 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -489,44 +512,51 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -556,14 +586,32 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -571,44 +619,32 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out index ff620c6..0b6935f 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out @@ -53,43 +53,56 @@ POSTHOOK: query: explain select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 5 (type: int) - sort order: + - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 5 (type: int) + 1 5 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 52 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 5 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Map 5 + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 5 (type: int) + 1 5 (type: int) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: d @@ -97,82 +110,89 @@ STAGE PLANS: Filter Operator predicate: (key = 5) (type: boolean) Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 5 (type: int) - sort order: + - Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 5 (type: int) + 1 5 (type: int) + input vertices: + 0 Map 4 + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 52 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 5 (type: int) - sort order: + - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1 - Statistics: Num rows: 31 Data size: 129 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 = 5) (type: boolean) - Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 5 (type: int) + 1 5 (type: int) + input vertices: + 1 Map 1 + Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} + 1 {_col0} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 31 Data size: 129 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 = 5) (type: boolean) + Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out index 58aefee..1a544b6 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out @@ -53,14 +53,13 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -71,13 +70,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -85,33 +92,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -138,14 +144,13 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_2 a left outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -153,44 +158,51 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -219,14 +231,32 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_2 a right outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -234,44 +264,32 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -387,14 +405,13 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -405,13 +422,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -419,33 +444,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -472,14 +496,13 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_2 a left outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -487,44 +510,51 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -553,14 +583,32 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_2 a right outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -568,44 +616,32 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 2 Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out index 893e4cf..23a327f 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out @@ -53,31 +53,16 @@ POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: c @@ -85,13 +70,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: a @@ -99,35 +89,60 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 3 + 2 Map 2 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -154,14 +169,13 @@ POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -169,57 +183,73 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -246,14 +276,13 @@ POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -261,57 +290,73 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -343,14 +388,13 @@ POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -358,57 +402,73 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 3 + 1 Map 1 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -544,14 +604,13 @@ POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -559,57 +618,73 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 3 + 1 Map 1 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -638,72 +713,87 @@ POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 3 + 2 Map 2 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -734,14 +824,13 @@ POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -749,57 +838,73 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 3 + 1 Map 1 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out index 01e11a5..5d99a65 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out @@ -53,31 +53,16 @@ POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: c @@ -85,13 +70,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: a @@ -99,35 +89,60 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 3 + 2 Map 2 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -154,14 +169,13 @@ POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -169,57 +183,73 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -246,14 +276,13 @@ POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -261,57 +290,73 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -343,14 +388,13 @@ POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -358,57 +402,73 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 3 + 1 Map 1 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -544,14 +604,13 @@ POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -559,57 +618,73 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 3 + 1 Map 1 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -638,72 +713,87 @@ POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 3 + 2 Map 2 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -734,14 +824,13 @@ POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -749,57 +838,73 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 3 + 1 Map 1 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out index 63a16b6..ae7483f 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out @@ -63,16 +63,15 @@ insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -83,13 +82,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -97,34 +104,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection @@ -1235,16 +1241,15 @@ insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1255,13 +1260,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -1269,34 +1282,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection @@ -2423,16 +2435,15 @@ insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2443,13 +2454,21 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -2457,34 +2476,33 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection @@ -2527,16 +2545,15 @@ insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2547,13 +2564,21 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -2561,34 +2586,33 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results + Local Work: + Map Reduce Local Work Stage: Stage-2 Dependency Collection @@ -2629,14 +2653,13 @@ POSTHOOK: query: explain select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key join smb_bucket4_2 c on b.key = c.key where a.key>1000 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2647,13 +2670,18 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: c @@ -2661,13 +2689,23 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: a @@ -2675,35 +2713,36 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 1 + 2 Map 2 + Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out index 78bf282..8ba6039 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out @@ -55,15 +55,13 @@ explain select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key where a.key < 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -74,12 +72,23 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -87,31 +96,31 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out index 9032570..692d679 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out @@ -63,15 +63,13 @@ select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key and a.value=b.value where a.key < 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -82,12 +80,23 @@ STAGE PLANS: Filter Operator predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -95,31 +104,31 @@ STAGE PLANS: Filter Operator predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + input vertices: + 1 Map 1 + Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out index d2504f1..4ff5a35 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out @@ -63,15 +63,13 @@ select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key and a.value=b.value where a.key < 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -82,12 +80,23 @@ STAGE PLANS: Filter Operator predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -95,31 +104,31 @@ STAGE PLANS: Filter Operator predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + input vertices: + 1 Map 1 + Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out index 61e1b1b..8942320 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out @@ -61,15 +61,13 @@ select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key and a.value=b.value where a.key < 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -80,12 +78,23 @@ STAGE PLANS: Filter Operator predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -93,31 +102,31 @@ STAGE PLANS: Filter Operator predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + input vertices: + 1 Map 1 + Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out index f326f78..518da1a 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out @@ -112,15 +112,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -133,13 +131,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -193,7 +194,14 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/part=1 [b] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -203,13 +211,34 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -263,29 +292,6 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out index cd2f225..c7d4613 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out @@ -112,15 +112,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -133,13 +131,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -192,7 +193,14 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/part=1 [b] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -202,13 +210,34 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -262,29 +291,6 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out index feca353..d383aa5 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out @@ -148,15 +148,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -169,13 +167,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -276,7 +277,14 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_2/part=1 [b] /srcbucket_mapjoin_part_2/part=2 [b] - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -286,13 +294,34 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -393,29 +422,6 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] /srcbucket_mapjoin_part_1/part=2 [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out index a9dc3c9..9091ef5 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out @@ -108,15 +108,13 @@ select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key where a.key < 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -127,12 +125,23 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -140,31 +149,31 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator @@ -216,15 +225,13 @@ select /*+ mapjoin(b) */ count(*) from table_desc3 a join table_desc4 b on a.key=b.key and a.value2=b.value2 where a.key < 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -235,12 +242,23 @@ STAGE PLANS: Filter Operator predicate: ((key is not null and value2 is not null) and (key < 10)) (type: boolean) Statistics: Num rows: 41 Data size: 517 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value2 (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value2 (type: string) - Statistics: Num rows: 41 Data size: 517 Basic stats: COMPLETE Column stats: NONE - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string), value2 (type: string) + 1 key (type: string), value2 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -248,31 +266,31 @@ STAGE PLANS: Filter Operator predicate: ((key is not null and value2 is not null) and (key < 10)) (type: boolean) Statistics: Num rows: 41 Data size: 837 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value2 (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value2 (type: string) - Statistics: Num rows: 41 Data size: 837 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 45 Data size: 920 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 45 Data size: 920 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string), value2 (type: string) + 1 key (type: string), value2 (type: string) + input vertices: + 1 Map 1 + Statistics: Num rows: 45 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 45 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/subquery_exists.q.out ql/src/test/results/clientpositive/spark/subquery_exists.q.out index 5956bf3..a325e13 100644 --- ql/src/test/results/clientpositive/spark/subquery_exists.q.out +++ ql/src/test/results/clientpositive/spark/subquery_exists.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: -- no agg, corr +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- no agg, corr explain select * from src b @@ -8,7 +10,9 @@ where exists where b.value = a.value and a.key = b.key and a.value > 'val_9' ) PREHOOK: type: QUERY -POSTHOOK: query: -- no agg, corr +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- no agg, corr explain select * from src b @@ -19,30 +23,16 @@ where exists ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (value is not null and key is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string), key (type: string) - sort order: ++ - Map-reduce partition columns: value (type: string), key (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 2 Map Operator Tree: TableScan alias: a @@ -59,32 +49,54 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 value (type: string), key (type: string) + 1 _col0 (type: string), _col1 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value is not null and key is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 value (type: string), key (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/subquery_in.q.out ql/src/test/results/clientpositive/spark/subquery_in.q.out index 9c4c1d7..67e0b37 100644 --- ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -15,14 +15,13 @@ from src where src.key in (select key from src s1 where s1.key > '9') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -42,12 +41,21 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src @@ -55,33 +63,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -133,30 +140,16 @@ where b.key in ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 2 Map Operator Tree: TableScan alias: a @@ -173,32 +166,54 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -258,15 +273,15 @@ part where part.p_size in ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -281,20 +296,6 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_mfgr (type: string), p_size (type: int) - Map 5 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(p_size) is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(p_size) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(p_size) (type: double) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string), p_size (type: int) Reducer 2 Reduce Operator Tree: Extract @@ -318,6 +319,8 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: struct) Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) @@ -336,32 +339,52 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {VALUE._col1} {VALUE._col5} - 1 - outputColumnNames: _col1, _col5 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Spark HashTable Sink Operator + condition expressions: + 0 {p_name} {p_size} + 1 + keys: + 0 UDFToDouble(p_size) (type: double) + 1 _col0 (type: double) + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(p_size) is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {p_name} {p_size} + 1 + keys: + 0 UDFToDouble(p_size) (type: double) + 1 _col0 (type: double) + outputColumnNames: _col1, _col5 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -410,33 +433,19 @@ from part b where b.p_size in ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Reducer 4 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_size is not null and p_mfgr is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_size (type: int), p_mfgr (type: string) - sort order: ++ - Map-reduce partition columns: p_size (type: int), p_mfgr (type: string) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string) - Map 3 + Map 2 Map Operator Tree: TableScan alias: part @@ -447,28 +456,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_mfgr (type: string), p_size (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {VALUE._col1} {KEY.reducesinkkey1} {KEY.reducesinkkey0} - 1 - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 3 Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE @@ -493,7 +481,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Reducer 5 + Reducer 4 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -513,11 +503,52 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {p_name} {p_mfgr} {p_size} + 1 + keys: + 0 p_size (type: int), p_mfgr (type: string) + 1 _col0 (type: int), _col1 (type: string) + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_size is not null and p_mfgr is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {p_name} {p_mfgr} {p_size} + 1 + keys: + 0 p_size (type: int), p_mfgr (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col1, _col2, _col5 + input vertices: + 1 Reducer 4 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -570,31 +601,18 @@ where b.key in ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 2 Map Operator Tree: TableScan alias: a @@ -616,28 +634,9 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -653,11 +652,52 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -733,16 +773,13 @@ where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -753,94 +790,73 @@ STAGE PLANS: Filter Operator predicate: ((l_partkey is not null and l_orderkey is not null) and (l_linenumber = 1)) (type: boolean) Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: l_partkey (type: int) - sort order: + - Map-reduce partition columns: l_partkey (type: int) - Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE - value expressions: l_orderkey (type: int), l_suppkey (type: int) + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: l_partkey is not null (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_partkey (type: int) - outputColumnNames: l_partkey - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + expressions: l_orderkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: l_partkey (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Map 6 + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + expressions: l_partkey (type: int) + outputColumnNames: l_partkey + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: int) + keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col2} - 1 - outputColumnNames: _col0, _col3 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -851,11 +867,43 @@ STAGE PLANS: expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 1 + Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out index f03a6c6..ca4147d 100644 --- ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: CREATE TABLE src_4( +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE src_4( key STRING, value STRING ) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@src_4 -POSTHOOK: query: CREATE TABLE src_4( +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE src_4( key STRING, value STRING ) @@ -57,7 +61,8 @@ INSERT OVERWRITE TABLE src_5 order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 Stage-3 depends on stages: Stage-2 Stage-1 depends on stages: Stage-3 Stage-4 depends on stages: Stage-1 @@ -65,36 +70,13 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-6 Spark Edges: - Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 9 <- Map 8 (GROUP, 1) - Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 10 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - Map 11 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 3 Map Operator Tree: TableScan alias: a @@ -111,12 +93,16 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Map 7 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: s1 @@ -128,12 +114,16 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 8 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + Map 5 Map Operator Tree: TableScan alias: s1 @@ -152,81 +142,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col5 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - Reducer 9 + Reducer 6 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -245,9 +163,104 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 + 1 + + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + input vertices: + 1 Reducer 6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col5 + input vertices: + 1 Map 4 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is null (type: boolean) + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + Local Work: + Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 Stage: Stage-3 Dependency Collection @@ -314,6 +327,7 @@ POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:strin POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +RUN: Stage-6:MAPRED RUN: Stage-2:MAPRED RUN: Stage-3:DEPENDENCY_COLLECTION RUN: Stage-1:MOVE @@ -495,7 +509,8 @@ INSERT OVERWRITE TABLE src_5 order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 Stage-3 depends on stages: Stage-2 Stage-1 depends on stages: Stage-3 Stage-4 depends on stages: Stage-1 @@ -503,36 +518,13 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-6 Spark Edges: - Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 9 <- Map 8 (GROUP, 1) - Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 10 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - Map 11 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 3 Map Operator Tree: TableScan alias: a @@ -549,12 +541,16 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Map 7 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: s1 @@ -566,12 +562,16 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 8 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + Map 5 Map Operator Tree: TableScan alias: s1 @@ -590,81 +590,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col5 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - Reducer 9 + Reducer 6 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -683,9 +611,104 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 + 1 + + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + input vertices: + 1 Reducer 6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col5 + input vertices: + 1 Map 4 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is null (type: boolean) + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + Local Work: + Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 Stage: Stage-3 Dependency Collection @@ -752,6 +775,7 @@ POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:strin POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +RUN: Stage-6:MAPRED RUN: Stage-2:MAPRED RUN: Stage-3:DEPENDENCY_COLLECTION RUN: Stage-1:MOVE diff --git ql/src/test/results/clientpositive/spark/temp_table_join1.q.out ql/src/test/results/clientpositive/spark/temp_table_join1.q.out index 50b42d9..c601bf7 100644 --- ql/src/test/results/clientpositive/spark/temp_table_join1.q.out +++ ql/src/test/results/clientpositive/spark/temp_table_join1.q.out @@ -33,14 +33,13 @@ FROM src_nontemp src1 JOIN src_nontemp src2 ON (src1.key = src2.key) SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -51,13 +50,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -65,32 +72,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col6 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -129,14 +136,13 @@ FROM src_nontemp src1 JOIN src_temp src2 ON (src1.key = src2.key) SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -147,13 +153,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -161,32 +175,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col6 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -227,14 +241,13 @@ FROM src_temp src1 JOIN src_temp src2 ON (src1.key = src2.key) SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -245,13 +258,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: src1 @@ -259,32 +280,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col6 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/tez_join_tests.q.out ql/src/test/results/clientpositive/spark/tez_join_tests.q.out index e7f9cdf..608382f 100644 --- ql/src/test/results/clientpositive/spark/tez_join_tests.q.out +++ ql/src/test/results/clientpositive/spark/tez_join_tests.q.out @@ -9,17 +9,14 @@ explain select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 5 <- Reducer 4 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -27,84 +24,103 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - Map 7 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: a Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col5, _col6 + input vertices: + 1 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 + Select Operator + expressions: _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Local Work: + Map Reduce Local Work + Reducer 5 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} - 1 {VALUE._col0} {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {key} {value} + keys: + 0 _col1 (type: string) + 1 value (type: string) + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {key} {value} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Reducer 5 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) diff --git ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out index 75e414d..8bb9f1c 100644 --- ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out +++ ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out @@ -9,17 +9,14 @@ explain select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 5 <- Reducer 4 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -27,84 +24,103 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - Map 7 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {value} + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: a Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 + 1 {key} {value} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col5, _col6 + input vertices: + 1 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 + Select Operator + expressions: _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Local Work: + Map Reduce Local Work + Reducer 5 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} - 1 {VALUE._col0} {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {key} {value} + keys: + 0 _col1 (type: string) + 1 value (type: string) + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {key} {value} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Reducer 5 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) diff --git ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out index deec852..f3717dd 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out @@ -31,14 +31,13 @@ POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 WHERE l.cint = 6981 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -49,13 +48,21 @@ STAGE PLANS: Filter Operator predicate: (cint = 6981) (type: boolean) Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 6981 (type: int) - sort order: + - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - value expressions: cdecimal2 (type: decimal(23,14)) - Execution mode: vectorized - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {cdecimal1} + 1 {cdecimal2} + keys: + 0 6981 (type: int) + 1 6981 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: l @@ -63,33 +70,33 @@ STAGE PLANS: Filter Operator predicate: (cint = 6981) (type: boolean) Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 6981 (type: int) - sort order: + - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - value expressions: cdecimal1 (type: decimal(20,10)) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {cdecimal1} + 1 {cdecimal2} + keys: + 0 6981 (type: int) + 1 6981 (type: int) + outputColumnNames: _col1, _col9 + input vertices: + 1 Map 1 + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col1} - 1 {VALUE._col2} - outputColumnNames: _col1, _col9 - Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out index 5617184..529f56a 100644 --- ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out @@ -17,16 +17,13 @@ left outer join alltypesorc hd ) t1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -34,55 +31,80 @@ STAGE PLANS: TableScan alias: hd Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: ctinyint (type: tinyint) - sort order: + - Map-reduce partition columns: ctinyint (type: tinyint) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: tinyint) + 1 ctinyint (type: tinyint) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: c + alias: cd Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: cint (type: int) - sort order: + - Map-reduce partition columns: cint (type: int) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - value expressions: ctinyint (type: tinyint) - Execution mode: vectorized - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {ctinyint} + 1 + keys: + 0 cint (type: int) + 1 cint (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan - alias: cd + alias: c Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: cint (type: int) - sort order: + - Map-reduce partition columns: cint (type: int) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {ctinyint} + 1 + keys: + 0 cint (type: int) + 1 cint (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + input vertices: + 1 Map 4 + Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: tinyint) + 1 ctinyint (type: tinyint) + input vertices: + 1 Map 1 + Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Execution mode: vectorized Reducer 3 Reduce Operator Tree: Group By Operator @@ -102,21 +124,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 96a5cb8..846d7aa 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -23,16 +23,13 @@ where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,94 +40,73 @@ STAGE PLANS: Filter Operator predicate: ((l_partkey is not null and l_orderkey is not null) and (l_linenumber = 1)) (type: boolean) Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: l_partkey (type: int) - sort order: + - Map-reduce partition columns: l_partkey (type: int) - Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE - value expressions: l_orderkey (type: int), l_suppkey (type: int) + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: l_partkey is not null (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_partkey (type: int) - outputColumnNames: l_partkey - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + expressions: l_orderkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: l_partkey (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Map 6 + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + expressions: l_partkey (type: int) + outputColumnNames: l_partkey + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: int) + keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col2} - 1 - outputColumnNames: _col0, _col3 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -141,11 +117,43 @@ STAGE PLANS: expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 1 + Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Stage: Stage-0 @@ -193,16 +201,13 @@ where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -213,18 +218,54 @@ STAGE PLANS: Filter Operator predicate: (((l_partkey is not null and l_orderkey is not null) and l_linenumber is not null) and (l_linenumber = 1)) (type: boolean) Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: l_partkey (type: int) - sort order: + - Map-reduce partition columns: l_partkey (type: int) - Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE - value expressions: l_orderkey (type: int), l_suppkey (type: int) + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator + predicate: ((((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) and (l_linenumber = 1)) (type: boolean) + Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 _col1 (type: int), 1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -241,66 +282,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) and (l_linenumber = 1)) (type: boolean) - Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_orderkey (type: int), 1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), 1 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int) Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col2} - 1 - outputColumnNames: _col0, _col3 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -311,11 +295,43 @@ STAGE PLANS: expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 1 + Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 _col1 (type: int), 1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Stage: Stage-0 diff --git ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out index 882ce5f..3f8305c 100644 --- ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out @@ -101,14 +101,13 @@ POSTHOOK: query: explain select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -119,14 +118,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Execution mode: vectorized - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -134,34 +140,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -190,14 +195,13 @@ POSTHOOK: query: explain select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -208,13 +212,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -222,34 +234,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -288,14 +299,13 @@ explain select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -306,13 +316,21 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: a @@ -320,34 +338,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 85fa250..6bfdff8 100644 --- ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -7,15 +7,13 @@ POSTHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG( JOIN alltypesorc t2 ON t1.cint = t2.cint POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -26,13 +24,23 @@ STAGE PLANS: Filter Operator predicate: cint is not null (type: boolean) Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: cint (type: int) - sort order: + - Map-reduce partition columns: cint (type: int) - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {cint} + 1 + keys: + 0 cint (type: int) + 1 cint (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: t1 @@ -40,35 +48,35 @@ STAGE PLANS: Filter Operator predicate: cint is not null (type: boolean) Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: cint (type: int) - sort order: + - Map-reduce partition columns: cint (type: int) - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {cint} + 1 {cint} + keys: + 0 cint (type: int) + 1 cint (type: int) + outputColumnNames: _col2, _col17 + input vertices: + 1 Map 1 + Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col17 (type: int) + outputColumnNames: _col2, _col17 + Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Local Work: + Map Reduce Local Work Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col2, _col17 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col17 (type: int) - outputColumnNames: _col2, _col17 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out index a575d2e..1d0d0ea 100644 --- ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out @@ -3,33 +3,17 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Spark - Edges: - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: v3 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: csmallint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: csmallint (type: smallint) - sort order: + - Map-reduce partition columns: csmallint (type: smallint) - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 4 + Map 3 Map Operator Tree: TableScan alias: v1 @@ -37,14 +21,21 @@ STAGE PLANS: Filter Operator predicate: (ctinyint is not null and csmallint is not null) (type: boolean) Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: ctinyint (type: tinyint) - sort order: + - Map-reduce partition columns: ctinyint (type: tinyint) - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE - value expressions: csmallint (type: smallint), cdouble (type: double) - Execution mode: vectorized - Map 6 + Spark HashTable Sink Operator + condition expressions: + 0 {csmallint} {cdouble} + 1 {ctinyint} + keys: + 0 ctinyint (type: tinyint) + 1 ctinyint (type: tinyint) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: v2 @@ -52,37 +43,81 @@ STAGE PLANS: Filter Operator predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: ctinyint (type: tinyint) - sort order: + - Map-reduce partition columns: ctinyint (type: tinyint) - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {ctinyint} {csmallint} {cdouble} + 1 {ctinyint} + keys: + 0 ctinyint (type: tinyint) + 1 ctinyint (type: tinyint) + outputColumnNames: _col0, _col1, _col5, _col15 + input vertices: + 0 Map 3 + Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = _col15) (type: boolean) + Statistics: Num rows: 3379 Data size: 103739 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: smallint), _col5 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3379 Data size: 103739 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 + keys: + 0 _col0 (type: smallint) + 1 csmallint (type: smallint) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: v3 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: csmallint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col1} + 1 + keys: + 0 _col0 (type: smallint) + 1 csmallint (type: smallint) + outputColumnNames: _col1 + input vertices: + 0 Map 4 + Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: double) + outputColumnNames: _col1 + Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Local Work: + Map Reduce Local Work Execution mode: vectorized Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col1 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: double) - outputColumnNames: _col1 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial @@ -100,29 +135,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5, _col15 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = _col15) (type: boolean) - Statistics: Num rows: 3379 Data size: 103739 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: smallint), _col5 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3379 Data size: 103739 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: smallint) - sort order: + - Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 3379 Data size: 103739 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) Stage: Stage-0 Fetch Operator @@ -138,4 +150,4 @@ POSTHOOK: query: select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -6.065190932485957E11 +6.06519093248863E11 diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index e1122c4..b0cd342 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: DROP TABLE part_staging +PREHOOK: query: -- SORT_QUERY_RESULTS + +DROP TABLE part_staging PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE part_staging +POSTHOOK: query: -- SORT_QUERY_RESULTS + +DROP TABLE part_staging POSTHOOK: type: DROPTABLE PREHOOK: query: DROP TABLE part_orc PREHOOK: type: DROPTABLE @@ -502,36 +506,35 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: p1 + alias: p2 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -580,25 +583,53 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [p1] - Execution mode: vectorized - Map 5 + /part_orc [p2] + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + input vertices: + 1 Map 4 + Position of Big Table: 0 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -647,32 +678,9 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [p2] + /part_orc [p1] Execution mode: vectorized Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Extract @@ -691,7 +699,7 @@ STAGE PLANS: tag: -1 value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false - Reducer 4 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Extract @@ -1930,15 +1938,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1951,13 +1957,16 @@ STAGE PLANS: isSamplingPred: false predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} + 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2007,8 +2016,14 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [p1] - Execution mode: vectorized - Map 3 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: part_orc @@ -2071,43 +2086,9 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int:string:string:string:string:int:string:double:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Reducer 4 + Reducer 3 + Local Work: + Map Reduce Local Work Needs Tagging: false Reduce Operator Tree: Extract @@ -2122,14 +2103,45 @@ STAGE PLANS: isSamplingPred: false predicate: _col0 is not null (type: boolean) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} + 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types int:string:string:string:string:int:string:double:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -2153,20 +2165,6 @@ order by p_name POSTHOOK: type: QUERY POSTHOOK: Input: default@part_orc #### A masked pattern was here #### -15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve -33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s -42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull -86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ 110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously 112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car @@ -2177,10 +2175,24 @@ POSTHOOK: Input: default@part_orc 132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about 146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu 155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir 195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 8. testJoinRight explain extended @@ -2243,34 +2255,31 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p1 + alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2319,22 +2328,88 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [p1] - Execution mode: vectorized - Map 3 + /part_orc [part_orc] + Reducer 3 + Local Work: + Map Reduce Local Work + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} + keys: + 0 p_partkey (type: int) + 1 _col0 (type: int) + Position of Big Table: 0 + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: part_orc + alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} + keys: + 0 p_partkey (type: int) + 1 _col0 (type: int) + outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + input vertices: + 1 Reducer 3 + Position of Big Table: 0 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types int:string:string:string:string:int:string:double:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2383,66 +2458,8 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [part_orc] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int:string:string:string:string:int:string:double:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Extract - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false + /part_orc [p1] + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -2466,20 +2483,6 @@ order by p_name POSTHOOK: type: QUERY POSTHOOK: Input: default@part_orc #### A masked pattern was here #### -15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve -33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s -42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull -86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ 110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously 112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car @@ -2490,10 +2493,24 @@ POSTHOOK: Input: default@part_orc 132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about 146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu 155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir 195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap explain extended @@ -4123,16 +4140,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -4145,13 +4159,16 @@ STAGE PLANS: isSamplingPred: false predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} {_col2} {_col5} {_col7} + 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4201,8 +4218,15 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [p1] - Execution mode: vectorized - Map 4 + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: part_orc @@ -4265,26 +4289,46 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col6} - 1 - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false Reducer 3 + Local Work: + Map Reduce Local Work + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col1} {_col2} {_col5} {_col7} + 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5, _col7 + input vertices: + 1 Map 1 + Position of Big Table: 0 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: false + Reducer 4 Needs Tagging: false Reduce Operator Tree: Extract @@ -4316,29 +4360,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 5 - Needs Tagging: false - Reduce Operator Tree: - Extract - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false Stage: Stage-0 Fetch Operator @@ -4602,31 +4623,31 @@ order by p_name) POSTHOOK: type: QUERY POSTHOOK: Input: default@part_orc #### A masked pattern was here #### -Manufacturer#4 almond antique gainsboro frosted violet 10 -Manufacturer#5 almond azure blanched chiffon midnight 23 -Manufacturer#2 almond antique violet turquoise frosted 40 -Manufacturer#1 almond aquamarine pink moccasin thistle 42 +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 Manufacturer#1 almond aquamarine burnished black steel 28 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 +Manufacturer#2 almond antique violet chocolate turquoise 14 +Manufacturer#2 almond antique violet turquoise frosted 40 Manufacturer#2 almond aquamarine midnight light salmon 2 Manufacturer#2 almond aquamarine rose maroon antique 25 -Manufacturer#3 almond antique misty red olive 1 -Manufacturer#4 almond aquamarine floral ivory bisque 27 -Manufacturer#1 almond antique chartreuse lavender yellow 34 -Manufacturer#1 almond antique salmon chartreuse burlywood 6 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 +Manufacturer#3 almond antique chartreuse khaki white 17 Manufacturer#3 almond antique forest lavender goldenrod 14 -Manufacturer#5 almond antique medium spring khaki 6 Manufacturer#3 almond antique metallic orange dim 19 -Manufacturer#4 almond antique violet mint lemon 39 -Manufacturer#5 almond antique sky peru orange 2 -Manufacturer#4 almond azure aquamarine papaya violet 12 -Manufacturer#2 almond antique violet chocolate turquoise 14 +Manufacturer#3 almond antique misty red olive 1 Manufacturer#3 almond antique olive coral navajo 45 +Manufacturer#4 almond antique gainsboro frosted violet 10 +Manufacturer#4 almond antique violet mint lemon 39 +Manufacturer#4 almond aquamarine floral ivory bisque 27 Manufacturer#4 almond aquamarine yellow dodger mint 7 -Manufacturer#5 almond aquamarine dodger light gainsboro 46 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 +Manufacturer#4 almond azure aquamarine papaya violet 12 Manufacturer#5 almond antique blue firebrick mint 31 -Manufacturer#3 almond antique chartreuse khaki white 17 -Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#5 almond antique medium spring khaki 6 +Manufacturer#5 almond antique sky peru orange 2 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 +Manufacturer#5 almond azure blanched chiffon midnight 23 PREHOOK: query: -- 16. testViewAsTableInputToPTF create view IF NOT EXISTS mfgr_price_view as select p_mfgr, p_brand, diff --git ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out index 85fa250..6bfdff8 100644 --- ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out @@ -7,15 +7,13 @@ POSTHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG( JOIN alltypesorc t2 ON t1.cint = t2.cint POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -26,13 +24,23 @@ STAGE PLANS: Filter Operator predicate: cint is not null (type: boolean) Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: cint (type: int) - sort order: + - Map-reduce partition columns: cint (type: int) - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 4 + Spark HashTable Sink Operator + condition expressions: + 0 {cint} + 1 + keys: + 0 cint (type: int) + 1 cint (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: t1 @@ -40,35 +48,35 @@ STAGE PLANS: Filter Operator predicate: cint is not null (type: boolean) Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: cint (type: int) - sort order: + - Map-reduce partition columns: cint (type: int) - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {cint} + 1 {cint} + keys: + 0 cint (type: int) + 1 cint (type: int) + outputColumnNames: _col2, _col17 + input vertices: + 1 Map 1 + Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col17 (type: int) + outputColumnNames: _col2, _col17 + Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Local Work: + Map Reduce Local Work Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col2, _col17 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col17 (type: int) - outputColumnNames: _col2, _col17 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/subquery_exists.q.out ql/src/test/results/clientpositive/subquery_exists.q.out index f939c7d..3989b27 100644 --- ql/src/test/results/clientpositive/subquery_exists.q.out +++ ql/src/test/results/clientpositive/subquery_exists.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: -- no agg, corr +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- no agg, corr explain select * from src b @@ -8,7 +10,9 @@ where exists where b.value = a.value and a.key = b.key and a.value > 'val_9' ) PREHOOK: type: QUERY -POSTHOOK: query: -- no agg, corr +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- no agg, corr explain select * from src b diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.out ql/src/test/results/clientpositive/subquery_multiinsert.q.out index 9d68139..b678fda 100644 --- ql/src/test/results/clientpositive/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: CREATE TABLE src_4( +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE src_4( key STRING, value STRING ) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@src_4 -POSTHOOK: query: CREATE TABLE src_4( +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE src_4( key STRING, value STRING ) @@ -881,17 +885,17 @@ POSTHOOK: query: select * from src_4 POSTHOOK: type: QUERY POSTHOOK: Input: default@src_4 #### A masked pattern was here #### -98 val_98 +90 val_90 +90 val_90 +90 val_90 92 val_92 -96 val_96 95 val_95 -98 val_98 -90 val_90 95 val_95 -90 val_90 +96 val_96 97 val_97 -90 val_90 97 val_97 +98 val_98 +98 val_98 PREHOOK: query: select * from src_5 PREHOOK: type: QUERY PREHOOK: Input: default@src_5 diff --git ql/src/test/results/clientpositive/tez/subquery_exists.q.out ql/src/test/results/clientpositive/tez/subquery_exists.q.out index 24f2d10..15691b5 100644 --- ql/src/test/results/clientpositive/tez/subquery_exists.q.out +++ ql/src/test/results/clientpositive/tez/subquery_exists.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: -- no agg, corr +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- no agg, corr explain select * from src b @@ -8,7 +10,9 @@ where exists where b.value = a.value and a.key = b.key and a.value > 'val_9' ) PREHOOK: type: QUERY -POSTHOOK: query: -- no agg, corr +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- no agg, corr explain select * from src b diff --git ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out index 0aeb53a..c61b5f6 100644 --- ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: DROP TABLE part_staging +PREHOOK: query: -- SORT_QUERY_RESULTS + +DROP TABLE part_staging PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE part_staging +POSTHOOK: query: -- SORT_QUERY_RESULTS + +DROP TABLE part_staging POSTHOOK: type: DROPTABLE PREHOOK: query: DROP TABLE part_orc PREHOOK: type: DROPTABLE @@ -2155,20 +2159,6 @@ order by p_name POSTHOOK: type: QUERY POSTHOOK: Input: default@part_orc #### A masked pattern was here #### -15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve -33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s -42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull -86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ 110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously 112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car @@ -2179,10 +2169,24 @@ POSTHOOK: Input: default@part_orc 132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about 146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu 155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir 195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 8. testJoinRight explain extended @@ -2469,20 +2473,6 @@ order by p_name POSTHOOK: type: QUERY POSTHOOK: Input: default@part_orc #### A masked pattern was here #### -15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve -33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s -42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull -86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ 110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously 112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car @@ -2493,10 +2483,24 @@ POSTHOOK: Input: default@part_orc 132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about 146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu 155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir 195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap explain extended diff --git ql/src/test/results/clientpositive/vectorized_ptf.q.out ql/src/test/results/clientpositive/vectorized_ptf.q.out index f0e5b91..8462c74 100644 --- ql/src/test/results/clientpositive/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/vectorized_ptf.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: DROP TABLE part_staging +PREHOOK: query: -- SORT_QUERY_RESULTS + +DROP TABLE part_staging PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE part_staging +POSTHOOK: query: -- SORT_QUERY_RESULTS + +DROP TABLE part_staging POSTHOOK: type: DROPTABLE PREHOOK: query: DROP TABLE part_orc PREHOOK: type: DROPTABLE @@ -2427,20 +2431,6 @@ order by p_name POSTHOOK: type: QUERY POSTHOOK: Input: default@part_orc #### A masked pattern was here #### -15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve -33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s -42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull -86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ 110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously 112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car @@ -2451,10 +2441,24 @@ POSTHOOK: Input: default@part_orc 132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about 146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu 155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir 195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 8. testJoinRight explain extended @@ -2774,20 +2778,6 @@ order by p_name POSTHOOK: type: QUERY POSTHOOK: Input: default@part_orc #### A masked pattern was here #### -15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve -33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s -42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull -86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ 110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously 112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car @@ -2798,10 +2788,24 @@ POSTHOOK: Input: default@part_orc 132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about 146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu 155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir 195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap explain extended