diff --git data/files/flights_tiny.txt data/files/flights_tiny.txt old mode 100644 new mode 100755 diff --git data/files/part.rc data/files/part.rc old mode 100644 new mode 100755 diff --git data/files/part.seq data/files/part.seq old mode 100644 new mode 100755 diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index 4663886..a3a041b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.optimizer; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -68,7 +67,6 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PTFDesc; import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFExpressionDef; -import org.apache.hadoop.hive.ql.plan.PTFDesc.ShapeDetails; import org.apache.hadoop.hive.ql.plan.PTFDesc.WindowExpressionDef; import org.apache.hadoop.hive.ql.plan.PTFDesc.WindowFunctionDef; import org.apache.hadoop.hive.ql.plan.PTFDesc.WindowTableFunctionDef; @@ -79,8 +77,6 @@ import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * Factory for generating the different node processors used by ColumnPruner. @@ -163,16 +159,11 @@ public static ColumnPrunerGroupByProc getGroupByProc() { /** * - Pruning can only be done for Windowing. PTFs are black boxes, - * we assume all columns are needed. + * we assume all columns are needed. * - add column names referenced in WindowFn args and in WindowFn expressions - * to the pruned list of the child Select Op. - * - Prune the Column names & types serde properties in each of the Shapes in the PTF Chain: - * - the InputDef's output shape - * - Window Tabl Functions: window output shape & output shape. - * - Why is pruning the Column names & types in the serde properties enough? - * - because during runtime we rebuild the OIs using these properties. + * to the pruned list of the child Select Op. * - finally we set the prunedColList on the ColumnPrunerContx; - * and update the RR & signature on the PTFOp. + * and update the RR & signature on the PTFOp. */ public static class ColumnPrunerPTFProc implements NodeProcessor { public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, @@ -194,10 +185,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, //we create a copy of prunedCols to create a list of pruned columns for PTFOperator prunedCols = new ArrayList(prunedCols); prunedColumnsList(prunedCols, def); - setSerdePropsOfShape(def.getInput().getOutputShape(), prunedCols); - setSerdePropsOfShape(def.getOutputFromWdwFnProcessing(), prunedCols); - setSerdePropsOfShape(def.getOutputShape(), prunedCols); - RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(op).getRowResolver(); RowResolver newRR = buildPrunedRR(prunedCols, oldRR, sig); cppCtx.getPrunedColLists().put(op, prunedInputList(prunedCols, def)); @@ -255,47 +242,6 @@ private void prunedColumnsList(List prunedCols, WindowTableFunctionDef t } } - private List getLowerCasePrunedCols(List prunedCols){ - List lowerCasePrunedCols = new ArrayList(); - for (String col : prunedCols) { - lowerCasePrunedCols.add(col.toLowerCase()); - } - return lowerCasePrunedCols; - } - - /* - * reconstruct Column names & types list based on the prunedCols list. - */ - private void setSerdePropsOfShape(ShapeDetails shp, List prunedCols) { - List columnNames = Arrays.asList(shp.getSerdeProps().get( - org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS).split(",")); - List columnTypes = TypeInfoUtils - .getTypeInfosFromTypeString(shp.getSerdeProps().get( - org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES)); - /* - * fieldNames in OI are lower-cased. So we compare lower cased names for now. - */ - prunedCols = getLowerCasePrunedCols(prunedCols); - - StringBuilder cNames = new StringBuilder(); - StringBuilder cTypes = new StringBuilder(); - - boolean addComma = false; - for(int i=0; i < columnNames.size(); i++) { - if ( prunedCols.contains(columnNames.get(i)) ) { - cNames.append(addComma ? "," : ""); - cTypes.append(addComma ? "," : ""); - cNames.append(columnNames.get(i)); - cTypes.append(columnTypes.get(i)); - addComma = true; - } - } - shp.getSerdeProps().put( - org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS, cNames.toString()); - shp.getSerdeProps().put( - org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES, cTypes.toString()); - } - /* * from the prunedCols list filter out columns that refer to WindowFns or WindowExprs * the returned list is set as the prunedList needed by the PTFOp. diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java index 8e2fd59..5944261 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java @@ -909,7 +909,7 @@ protected static SerDe createLazyBinarySerDe(Configuration cfg, } @SuppressWarnings({"unchecked"}) - private static void addOIPropertiestoSerDePropsMap(StructObjectInspector OI, + public static void addOIPropertiestoSerDePropsMap(StructObjectInspector OI, Map serdePropsMap) { if ( serdePropsMap == null ) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java index 0108efe..ee1d977 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Properties; @@ -27,6 +28,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.PTFTranslator; import org.apache.hadoop.hive.ql.parse.PTFTranslator.LeadLagInfo; import org.apache.hadoop.hive.ql.parse.WindowingExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.plan.PTFDesc.BoundaryDef; @@ -56,6 +58,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.util.ReflectionUtils; +@SuppressWarnings("deprecation") public class PTFDeserializer { PTFDesc ptfDesc; @@ -83,7 +86,7 @@ public void initializePTFChain(PartitionedTableFunctionDef tblFnDef) throws Hive while ( !ptfChain.isEmpty() ) { currentDef = ptfChain.pop(); if ( currentDef instanceof PTFQueryInputDef) { - initialize((PTFQueryInputDef)currentDef); + initialize((PTFQueryInputDef)currentDef, inputOI); } else if ( currentDef instanceof WindowTableFunctionDef) { initializeWindowing((WindowTableFunctionDef)currentDef); @@ -101,8 +104,6 @@ public void initializeWindowing(WindowTableFunctionDef def) throws HiveException * 1. setup resolve, make connections */ TableFunctionEvaluator tEval = def.getTFunction(); - /*WindowingTableFunctionResolver tResolver = (WindowingTableFunctionResolver) - FunctionRegistry.getTableFunctionResolver(def.getName());*/ WindowingTableFunctionResolver tResolver = (WindowingTableFunctionResolver) constructResolver(def.getResolverClassName()); tResolver.initialize(ptfDesc, def, tEval); @@ -141,7 +142,7 @@ public void initializeWindowing(WindowTableFunctionDef def) throws HiveException StructObjectInspector wdwOutOI = ObjectInspectorFactory.getStandardStructObjectInspector( aliases, fieldOIs); tResolver.setWdwProcessingOutputOI(wdwOutOI); - initialize(def.getOutputFromWdwFnProcessing()); + initialize(def.getOutputFromWdwFnProcessing(), wdwOutOI); } else { def.setOutputFromWdwFnProcessing(inpShape); @@ -161,8 +162,8 @@ public void initializeWindowing(WindowTableFunctionDef def) throws HiveException /* * 4. give Evaluator chance to setup for Output execution; setup Output shape. */ - initialize(def.getOutputShape()); tResolver.initializeOutputOI(); + initialize(def.getOutputShape(), tEval.getOutputOI()); /* * If we have windowExpressions then we convert to Std. Object to process; @@ -175,9 +176,9 @@ public void initializeWindowing(WindowTableFunctionDef def) throws HiveException } } - protected void initialize(PTFQueryInputDef def) throws HiveException { + protected void initialize(PTFQueryInputDef def, StructObjectInspector OI) throws HiveException { ShapeDetails outShape = def.getOutputShape(); - initialize(outShape); + initialize(outShape, OI); } protected void initialize(PartitionedTableFunctionDef def) throws HiveException { @@ -206,7 +207,7 @@ protected void initialize(PartitionedTableFunctionDef def) throws HiveException if (tEval.isTransformsRawInput()) { tResolver.initializeRawInputOI(); - initialize(def.getRawInputShape()); + initialize(def.getRawInputShape(), tEval.getRawInputOI()); } else { def.setRawInputShape(inpShape); @@ -218,7 +219,7 @@ protected void initialize(PartitionedTableFunctionDef def) throws HiveException * 4. give Evaluator chance to setup for Output execution; setup Output shape. */ tResolver.initializeOutputOI(); - initialize(def.getOutputShape()); + initialize(def.getOutputShape(), tEval.getOutputOI()); } static void setupWdwFnEvaluator(WindowFunctionDef def) throws HiveException @@ -286,10 +287,11 @@ private ObjectInspector initExprNodeEvaluator(ExprNodeEvaluator exprEval, return outOI; } - protected void initialize(ShapeDetails shp) throws HiveException { + protected void initialize(ShapeDetails shp, StructObjectInspector OI) throws HiveException { String serdeClassName = shp.getSerdeClassName(); Properties serDeProps = new Properties(); - Map serdePropsMap = shp.getSerdeProps(); + Map serdePropsMap = new LinkedHashMap(); + PTFTranslator.addOIPropertiestoSerDePropsMap(OI, serdePropsMap); for (String serdeName : serdePropsMap.keySet()) { serDeProps.setProperty(serdeName, serdePropsMap.get(serdeName)); }