diff --git ql/src/java/org/apache/hadoop/hive/ql/Driver.java ql/src/java/org/apache/hadoop/hive/ql/Driver.java index be105c1..33a1833 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -617,10 +617,8 @@ private void doAuthorization(BaseSemanticAnalyzer sem) if (tbl.isPartitioned() && tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) { String alias_id = topOpMap.getKey(); - PrunedPartitionList partsList = PartitionPruner.prune(parseCtx - .getTopToTable().get(topOp), parseCtx.getOpToPartPruner() - .get(topOp), parseCtx.getConf(), alias_id, parseCtx - .getPrunedPartitions()); + PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp, + parseCtx, alias_id); Set parts = new HashSet(); parts.addAll(partsList.getConfirmedPartns()); parts.addAll(partsList.getUnknownPartns()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index cf8bd9d..9eeeb37 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -23,7 +23,6 @@ import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -34,6 +33,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.MapredWork; @@ -52,7 +52,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; @@ -76,41 +75,30 @@ } private final transient LongWritable deserialize_error_count = new LongWritable(); - private transient Deserializer deserializer; - - private transient Object[] rowWithPart; - private transient Writable[] vcValues; - private transient List vcs; - private transient Object[] rowWithPartAndVC; - private transient StructObjectInspector tblRowObjectInspector; - // convert from partition to table schema - private transient Converter partTblObjectInspectorConverter; - private transient boolean isPartitioned; - private Map opCtxMap; - private final Set listInputPaths = new HashSet(); - - private Map, ArrayList> operatorToPaths; + private final Map opCtxMap = new HashMap(); private final Map, MapOpCtx> childrenOpToOpCtxMap = new HashMap, MapOpCtx>(); - private ArrayList> extraChildrenToClose = null; + private transient MapOpCtx current; + private transient List> extraChildrenToClose = null; private static class MapInputPath { String path; String alias; - Operator op; + Operator op; + PartitionDesc partDesc; /** * @param path * @param alias * @param op */ - public MapInputPath(String path, String alias, - Operator op) { + public MapInputPath(String path, String alias, Operator op, PartitionDesc partDesc) { this.path = path; this.alias = alias; this.op = op; + this.partDesc = partDesc; } @Override @@ -131,92 +119,36 @@ public int hashCode() { ret += (op == null) ? 0 : op.hashCode(); return ret; } - - public Operator getOp() { - return op; - } - - public void setOp(Operator op) { - this.op = op; - } } private static class MapOpCtx { - private final boolean isPartitioned; - private final StructObjectInspector tblRawRowObjectInspector; // without partition - private final StructObjectInspector partObjectInspector; // partition - private StructObjectInspector rowObjectInspector; - private final Converter partTblObjectInspectorConverter; - private final Object[] rowWithPart; - private Object[] rowWithPartAndVC; - private final Deserializer deserializer; - private String tableName; - private String partName; - - /** - * @param isPartitioned - * @param rowObjectInspector - * @param rowWithPart - */ - public MapOpCtx(boolean isPartitioned, - StructObjectInspector rowObjectInspector, - StructObjectInspector tblRawRowObjectInspector, - StructObjectInspector partObjectInspector, - Object[] rowWithPart, - Object[] rowWithPartAndVC, - Deserializer deserializer, - Converter partTblObjectInspectorConverter) { - this.isPartitioned = isPartitioned; - this.rowObjectInspector = rowObjectInspector; - this.tblRawRowObjectInspector = tblRawRowObjectInspector; - this.partObjectInspector = partObjectInspector; - this.rowWithPart = rowWithPart; - this.rowWithPartAndVC = rowWithPartAndVC; - this.deserializer = deserializer; - this.partTblObjectInspectorConverter = partTblObjectInspectorConverter; - } - /** - * @return the isPartitioned - */ - public boolean isPartitioned() { - return isPartitioned; - } + StructObjectInspector tblRawRowObjectInspector; // columns + StructObjectInspector partObjectInspector; // partition columns + StructObjectInspector vcsObjectInspector; // virtual columns + StructObjectInspector rowObjectInspector; - /** - * @return the rowObjectInspector - */ - public StructObjectInspector getRowObjectInspector() { - return rowObjectInspector; - } + Converter partTblObjectInspectorConverter; - public StructObjectInspector getTblRawRowObjectInspector() { - return tblRawRowObjectInspector; - } + Object[] rowWithPart; + Object[] rowWithPartAndVC; + Deserializer deserializer; - /** - * @return the rowWithPart - */ - public Object[] getRowWithPart() { - return rowWithPart; - } + String tableName; + String partName; + List vcs; + Writable[] vcValues; - /** - * @return the rowWithPartAndVC - */ - public Object[] getRowWithPartAndVC() { - return rowWithPartAndVC; + private boolean isPartitioned() { + return partObjectInspector != null; } - /** - * @return the deserializer - */ - public Deserializer getDeserializer() { - return deserializer; + private boolean hasVC() { + return vcsObjectInspector != null; } - public Converter getPartTblObjectInspectorConverter() { - return partTblObjectInspectorConverter; + private Object readRow(Writable value) throws SerDeException { + return partTblObjectInspectorConverter.convert(deserializer.deserialize(value)); } } @@ -236,57 +168,51 @@ public void initializeAsRoot(Configuration hconf, MapredWork mrwork) initialize(hconf, null); } - private MapOpCtx initObjectInspector(MapredWork conf, - Configuration hconf, String onefile, Map convertedOI) - throws HiveException, - ClassNotFoundException, InstantiationException, IllegalAccessException, - SerDeException { - PartitionDesc pd = conf.getPathToPartitionInfo().get(onefile); - LinkedHashMap partSpec = pd.getPartSpec(); + private MapOpCtx initObjectInspector(Configuration hconf, MapInputPath ctx, + Map convertedOI) throws Exception { + + PartitionDesc pd = ctx.partDesc; + TableDesc td = pd.getTableDesc(); + + MapOpCtx opCtx = new MapOpCtx(); // Use table properties in case of unpartitioned tables, // and the union of table properties and partition properties, with partition // taking precedence - Properties partProps = - (pd.getPartSpec() == null || pd.getPartSpec().isEmpty()) ? - pd.getTableDesc().getProperties() : pd.getOverlayedProperties(); + Properties partProps = isPartitioned(pd) ? + pd.getOverlayedProperties() : pd.getTableDesc().getProperties(); + + Map partSpec = pd.getPartSpec(); + + opCtx.tableName = String.valueOf(partProps.getProperty("name")); + opCtx.partName = String.valueOf(partSpec); Class serdeclass = pd.getDeserializerClass(); if (serdeclass == null) { - String className = pd.getSerdeClassName(); - if ((className == null) || (className.isEmpty())) { - throw new HiveException( - "SerDe class or the SerDe class name is not set for table: " - + pd.getProperties().getProperty("name")); - } + String className = checkSerdeClassName(pd.getSerdeClassName(), opCtx.tableName); serdeclass = hconf.getClassByName(className); } - String tableName = String.valueOf(partProps.getProperty("name")); - String partName = String.valueOf(partSpec); - Deserializer partDeserializer = (Deserializer) serdeclass.newInstance(); - partDeserializer.initialize(hconf, partProps); - StructObjectInspector partRawRowObjectInspector = (StructObjectInspector) partDeserializer - .getObjectInspector(); + opCtx.deserializer = (Deserializer) serdeclass.newInstance(); + opCtx.deserializer.initialize(hconf, partProps); - StructObjectInspector tblRawRowObjectInspector = convertedOI.get(pd.getTableDesc()); + StructObjectInspector partRawRowObjectInspector = + (StructObjectInspector) opCtx.deserializer.getObjectInspector(); - partTblObjectInspectorConverter = - ObjectInspectorConverters.getConverter(partRawRowObjectInspector, - tblRawRowObjectInspector); + opCtx.tblRawRowObjectInspector = convertedOI.get(td); + + opCtx.partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter( + partRawRowObjectInspector, opCtx.tblRawRowObjectInspector); - MapOpCtx opCtx = null; // Next check if this table has partitions and if so // get the list of partition names as well as allocate // the serdes for the partition columns - String pcols = partProps - .getProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); + String pcols = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); // Log LOG = LogFactory.getLog(MapOperator.class.getName()); if (pcols != null && pcols.length() > 0) { String[] partKeys = pcols.trim().split("/"); List partNames = new ArrayList(partKeys.length); Object[] partValues = new Object[partKeys.length]; - List partObjectInspectors = new ArrayList( - partKeys.length); + List partObjectInspectors = new ArrayList(partKeys.length); for (int i = 0; i < partKeys.length; i++) { String key = partKeys[i]; partNames.add(key); @@ -297,97 +223,44 @@ private MapOpCtx initObjectInspector(MapredWork conf, } else { partValues[i] = new Text(partSpec.get(key)); } - partObjectInspectors - .add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + partObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); } - StructObjectInspector partObjectInspector = ObjectInspectorFactory + opCtx.rowWithPart = new Object[] {null, partValues}; + opCtx.partObjectInspector = ObjectInspectorFactory .getStandardStructObjectInspector(partNames, partObjectInspectors); - - Object[] rowWithPart = new Object[2]; - rowWithPart[1] = partValues; - StructObjectInspector rowObjectInspector = ObjectInspectorFactory - .getUnionStructObjectInspector(Arrays - .asList(new StructObjectInspector[] {tblRawRowObjectInspector, partObjectInspector})); - // LOG.info("dump " + tableName + " " + partName + " " + - // rowObjectInspector.getTypeName()); - opCtx = new MapOpCtx(true, rowObjectInspector, tblRawRowObjectInspector, partObjectInspector, - rowWithPart, null, partDeserializer, partTblObjectInspectorConverter); - } else { - // LOG.info("dump2 " + tableName + " " + partName + " " + - // rowObjectInspector.getTypeName()); - opCtx = new MapOpCtx(false, tblRawRowObjectInspector, tblRawRowObjectInspector, null, null, - null, partDeserializer, partTblObjectInspectorConverter); - } - opCtx.tableName = tableName; - opCtx.partName = partName; - return opCtx; - } - - /** - * Set the inspectors given a input. Since a mapper can span multiple partitions, the inspectors - * need to be changed if the input changes - **/ - private void setInspectorInput(MapInputPath inp) { - Operator op = inp.getOp(); - - deserializer = opCtxMap.get(inp).getDeserializer(); - isPartitioned = opCtxMap.get(inp).isPartitioned(); - rowWithPart = opCtxMap.get(inp).getRowWithPart(); - rowWithPartAndVC = opCtxMap.get(inp).getRowWithPartAndVC(); - tblRowObjectInspector = opCtxMap.get(inp).getRowObjectInspector(); - partTblObjectInspectorConverter = opCtxMap.get(inp).getPartTblObjectInspectorConverter(); - if (listInputPaths.contains(inp)) { - return; } - listInputPaths.add(inp); - // The op may not be a TableScan for mapjoins // Consider the query: select /*+MAPJOIN(a)*/ count(*) FROM T1 a JOIN T2 b ON a.key = b.key; // In that case, it will be a Select, but the rowOI need not be ammended - if (op instanceof TableScanOperator) { - StructObjectInspector tblRawRowObjectInspector = - opCtxMap.get(inp).getTblRawRowObjectInspector(); - StructObjectInspector partObjectInspector = opCtxMap.get(inp).partObjectInspector; - TableScanOperator tsOp = (TableScanOperator) op; + if (ctx.op instanceof TableScanOperator) { + TableScanOperator tsOp = (TableScanOperator) ctx.op; TableScanDesc tsDesc = tsOp.getConf(); - if (tsDesc != null) { - this.vcs = tsDesc.getVirtualCols(); - if (vcs != null && vcs.size() > 0) { - List vcNames = new ArrayList(vcs.size()); - this.vcValues = new Writable[vcs.size()]; - List vcsObjectInspectors = new ArrayList(vcs.size()); - for (int i = 0; i < vcs.size(); i++) { - VirtualColumn vc = vcs.get(i); - vcsObjectInspectors.add( - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - ((PrimitiveTypeInfo) vc.getTypeInfo()).getPrimitiveCategory())); - vcNames.add(vc.getName()); - } - StructObjectInspector vcStructObjectInspector = ObjectInspectorFactory - .getStandardStructObjectInspector(vcNames, - vcsObjectInspectors); - if (isPartitioned) { - this.rowWithPartAndVC = new Object[3]; - this.rowWithPartAndVC[1] = this.rowWithPart[1]; - } else { - this.rowWithPartAndVC = new Object[2]; - } - if (partObjectInspector == null) { - this.tblRowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays - .asList(new StructObjectInspector[] { - tblRowObjectInspector, vcStructObjectInspector})); - } else { - this.tblRowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays - .asList(new StructObjectInspector[] { - tblRawRowObjectInspector, partObjectInspector, - vcStructObjectInspector})); - } - opCtxMap.get(inp).rowObjectInspector = this.tblRowObjectInspector; - opCtxMap.get(inp).rowWithPartAndVC = this.rowWithPartAndVC; + if (tsDesc != null && tsDesc.hasVirtualCols()) { + opCtx.vcs = tsDesc.getVirtualCols(); + opCtx.vcValues = new Writable[opCtx.vcs.size()]; + opCtx.vcsObjectInspector = VirtualColumn.getVCSObjectInspector(opCtx.vcs); + if (opCtx.isPartitioned()) { + opCtx.rowWithPartAndVC = Arrays.copyOfRange(opCtx.rowWithPart, 0, 3); + } else { + opCtx.rowWithPartAndVC = new Object[2]; } } } + if (!opCtx.hasVC() && !opCtx.isPartitioned()) { + opCtx.rowObjectInspector = opCtx.tblRawRowObjectInspector; + return opCtx; + } + List inspectors = new ArrayList(); + inspectors.add(opCtx.tblRawRowObjectInspector); + if (opCtx.isPartitioned()) { + inspectors.add(opCtx.partObjectInspector); + } + if (opCtx.hasVC()) { + inspectors.add(opCtx.vcsObjectInspector); + } + opCtx.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(inspectors); + return opCtx; } // Return the mapping for table descriptor to the expected table OI @@ -403,25 +276,18 @@ private void setInspectorInput(MapInputPath inp) { Map tableDescOI = new HashMap(); Set identityConverterTableDesc = new HashSet(); - try - { + try { for (String onefile : conf.getPathToAliases().keySet()) { PartitionDesc pd = conf.getPathToPartitionInfo().get(onefile); TableDesc tableDesc = pd.getTableDesc(); Properties tblProps = tableDesc.getProperties(); // If the partition does not exist, use table properties - Properties partProps = - (pd.getPartSpec() == null || pd.getPartSpec().isEmpty()) ? - tblProps : pd.getOverlayedProperties(); + Properties partProps = isPartitioned(pd) ? pd.getOverlayedProperties() : tblProps; Class sdclass = pd.getDeserializerClass(); if (sdclass == null) { - String className = pd.getSerdeClassName(); - if ((className == null) || (className.isEmpty())) { - throw new HiveException( - "SerDe class or the SerDe class name is not set for table: " - + pd.getProperties().getProperty("name")); - } + String className = checkSerdeClassName(pd.getSerdeClassName(), + pd.getProperties().getProperty("name")); sdclass = hconf.getClassByName(className); } @@ -435,12 +301,8 @@ private void setInspectorInput(MapInputPath inp) { (identityConverterTableDesc.contains(tableDesc))) { sdclass = tableDesc.getDeserializerClass(); if (sdclass == null) { - String className = tableDesc.getSerdeClassName(); - if ((className == null) || (className.isEmpty())) { - throw new HiveException( - "SerDe class or the SerDe class name is not set for table: " - + tableDesc.getProperties().getProperty("name")); - } + String className = checkSerdeClassName(tableDesc.getSerdeClassName(), + tableDesc.getProperties().getProperty("name")); sdclass = hconf.getClassByName(className); } Deserializer tblDeserializer = (Deserializer) sdclass.newInstance(); @@ -448,7 +310,7 @@ private void setInspectorInput(MapInputPath inp) { tblRawRowObjectInspector = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI( partRawRowObjectInspector, - (StructObjectInspector) tblDeserializer.getObjectInspector()); + tblDeserializer.getObjectInspector()); if (identityConverterTableDesc.contains(tableDesc)) { if (!partRawRowObjectInspector.equals(tblRawRowObjectInspector)) { @@ -468,35 +330,47 @@ else if (partRawRowObjectInspector.equals(tblRawRowObjectInspector)) { return tableDescOI; } + private boolean isPartitioned(PartitionDesc pd) { + return pd.getPartSpec() != null && !pd.getPartSpec().isEmpty(); + } + + private String checkSerdeClassName(String className, String tableName) throws HiveException { + if (className == null || className.isEmpty()) { + throw new HiveException( + "SerDe class or the SerDe class name is not set for table: " + tableName); + } + return className; + } + public void setChildren(Configuration hconf) throws HiveException { Path fpath = new Path(HiveConf.getVar(hconf, HiveConf.ConfVars.HADOOPMAPFILENAME)); - ArrayList> children = - new ArrayList>(); - opCtxMap = new HashMap(); - operatorToPaths = new HashMap, ArrayList>(); + List> children = + new ArrayList>(); - statsMap.put(Counter.DESERIALIZE_ERRORS, deserialize_error_count); Map convertedOI = getConvertedOI(hconf); + try { - for (String onefile : conf.getPathToAliases().keySet()) { - MapOpCtx opCtx = initObjectInspector(conf, hconf, onefile, convertedOI); + for (Map.Entry> entry : conf.getPathToAliases().entrySet()) { + String onefile = entry.getKey(); + List aliases = entry.getValue(); + Path onepath = new Path(onefile); - List aliases = conf.getPathToAliases().get(onefile); + PartitionDesc partDesc = conf.getPathToPartitionInfo().get(onefile); for (String onealias : aliases) { - Operator op = conf.getAliasToWork().get( - onealias); + Operator op = conf.getAliasToWork().get(onealias); LOG.info("Adding alias " + onealias + " to work list for file " + onefile); - MapInputPath inp = new MapInputPath(onefile, onealias, op); - opCtxMap.put(inp, opCtx); - if (operatorToPaths.get(op) == null) { - operatorToPaths.put(op, new ArrayList()); + MapInputPath inp = new MapInputPath(onefile, onealias, op, partDesc); + if (opCtxMap.containsKey(inp)) { + continue; } - operatorToPaths.get(op).add(onefile); + MapOpCtx opCtx = initObjectInspector(hconf, inp, convertedOI); + opCtxMap.put(inp, opCtx); + op.setParentOperators(new ArrayList>()); op.getParentOperators().add(this); // check for the operators who will process rows coming to this Map @@ -505,9 +379,9 @@ public void setChildren(Configuration hconf) throws HiveException { children.add(op); childrenOpToOpCtxMap.put(op, opCtx); LOG.info("dump " + op.getName() + " " - + opCtxMap.get(inp).getRowObjectInspector().getTypeName()); + + opCtxMap.get(inp).rowObjectInspector.getTypeName()); } - setInspectorInput(inp); + current = opCtx; // just need for TestOperators.testMapOperator } } @@ -530,6 +404,8 @@ public void setChildren(Configuration hconf) throws HiveException { public void initializeOp(Configuration hconf) throws HiveException { // set that parent initialization is done and call initialize on children state = State.INIT; + statsMap.put(Counter.DESERIALIZE_ERRORS, deserialize_error_count); + List> children = getChildOperators(); for (Entry, MapOpCtx> entry : childrenOpToOpCtxMap @@ -537,33 +413,28 @@ public void initializeOp(Configuration hconf) throws HiveException { Operator child = entry.getKey(); MapOpCtx mapOpCtx = entry.getValue(); // Add alias, table name, and partitions to hadoop conf so that their - // children will - // inherit these - HiveConf.setVar(hconf, HiveConf.ConfVars.HIVETABLENAME, - mapOpCtx.tableName); - HiveConf.setVar(hconf, HiveConf.ConfVars.HIVEPARTITIONNAME, - mapOpCtx.partName); - child.initialize(hconf, new ObjectInspector[] {mapOpCtx.getRowObjectInspector()}); + // children will inherit these + HiveConf.setVar(hconf, HiveConf.ConfVars.HIVETABLENAME, mapOpCtx.tableName); + HiveConf.setVar(hconf, HiveConf.ConfVars.HIVEPARTITIONNAME, mapOpCtx.partName); + child.initialize(hconf, new ObjectInspector[] {mapOpCtx.rowObjectInspector}); } for (Entry entry : opCtxMap.entrySet()) { - // Add alias, table name, and partitions to hadoop conf so that their - // children will - // inherit these - HiveConf.setVar(hconf, HiveConf.ConfVars.HIVETABLENAME, - entry.getValue().tableName); - HiveConf.setVar(hconf, HiveConf.ConfVars.HIVEPARTITIONNAME, entry - .getValue().partName); MapInputPath input = entry.getKey(); + MapOpCtx mapOpCtx = entry.getValue(); + // Add alias, table name, and partitions to hadoop conf so that their + // children will inherit these + HiveConf.setVar(hconf, HiveConf.ConfVars.HIVETABLENAME, mapOpCtx.tableName); + HiveConf.setVar(hconf, HiveConf.ConfVars.HIVEPARTITIONNAME, mapOpCtx.partName); + Operator op = input.op; - // op is not in the children list, so need to remember it and close it - // afterwards if (children.indexOf(op) == -1) { + // op is not in the children list, so need to remember it and close it afterwards if (extraChildrenToClose == null) { extraChildrenToClose = new ArrayList>(); } extraChildrenToClose.add(op); - op.initialize(hconf, new ObjectInspector[] {entry.getValue().getRowObjectInspector()}); + op.initialize(hconf, new ObjectInspector[] {entry.getValue().rowObjectInspector}); } } } @@ -580,56 +451,61 @@ public void closeOp(boolean abort) throws HiveException { } } - // Change the serializer etc. since it is a new file, and split can span - // multiple files/partitions. + // Find context for current input file @Override public void cleanUpInputFileChangedOp() throws HiveException { - Path fpath = new Path((new Path(this.getExecContext().getCurrentInputFile())) - .toUri().getPath()); + Path fpath = normalizePath(getExecContext().getCurrentInputFile()); for (String onefile : conf.getPathToAliases().keySet()) { - Path onepath = new Path(new Path(onefile).toUri().getPath()); + Path onepath = normalizePath(onefile); // check for the operators who will process rows coming to this Map // Operator - if (!onepath.toUri().relativize(fpath.toUri()).equals(fpath.toUri())) { - String onealias = conf.getPathToAliases().get(onefile).get(0); - Operator op = - conf.getAliasToWork().get(onealias); - - LOG.info("Processing alias " + onealias + " for file " + onefile); - - MapInputPath inp = new MapInputPath(onefile, onealias, op); - setInspectorInput(inp); - break; + if (onepath.toUri().relativize(fpath.toUri()).equals(fpath.toUri())) { + // not from this + continue; + } + PartitionDesc partDesc = conf.getPathToPartitionInfo().get(onefile); + for (String onealias : conf.getPathToAliases().get(onefile)) { + Operator op = conf.getAliasToWork().get(onealias); + MapInputPath inp = new MapInputPath(onefile, onealias, op, partDesc); + MapOpCtx context = opCtxMap.get(inp); + if (context != null) { + current = context; + LOG.info("Processing alias " + onealias + " for file " + onefile); + return; + } } } + throw new IllegalStateException("Invalid path " + fpath); + } + + private Path normalizePath(String onefile) { + return new Path(new Path(onefile).toUri().getPath()); } public void process(Writable value) throws HiveException { // A mapper can span multiple files/partitions. // The serializers need to be reset if the input file changed - if ((this.getExecContext() != null) && - this.getExecContext().inputFileChanged()) { + ExecMapperContext context = getExecContext(); + if (context != null && context.inputFileChanged()) { // The child operators cleanup if input file has changed cleanUpInputFileChanged(); } - ExecMapperContext context = getExecContext(); - Object row = null; + Object row; try { - if (null != this.rowWithPartAndVC) { - this.rowWithPartAndVC[0] = - partTblObjectInspectorConverter.convert(deserializer.deserialize(value)); - int vcPos = isPartitioned ? 2 : 1; + row = current.readRow(value); + if (current.hasVC()) { + current.rowWithPartAndVC[0] = row; if (context != null) { - populateVirtualColumnValues(context, vcs, vcValues, deserializer); + populateVirtualColumnValues(context, current.vcs, current.vcValues, current.deserializer); } - this.rowWithPartAndVC[vcPos] = this.vcValues; - } else if (!isPartitioned) { - row = partTblObjectInspectorConverter.convert(deserializer.deserialize((Writable) value)); - } else { - rowWithPart[0] = - partTblObjectInspectorConverter.convert(deserializer.deserialize((Writable) value)); + int vcPos = current.isPartitioned() ? 2 : 1; + current.rowWithPartAndVC[vcPos] = current.vcValues; + row = current.rowWithPartAndVC; + } else if (current.isPartitioned()) { + current.rowWithPart[0] = row; + row = current.rowWithPart; } } catch (Exception e) { // Serialize the row and output. @@ -649,24 +525,12 @@ public void process(Writable value) throws HiveException { // The row has been converted to comply with table schema, irrespective of partition schema. // So, use tblOI (and not partOI) for forwarding try { - if (null != this.rowWithPartAndVC) { - forward(this.rowWithPartAndVC, this.tblRowObjectInspector); - } else if (!isPartitioned) { - forward(row, tblRowObjectInspector); - } else { - forward(rowWithPart, tblRowObjectInspector); - } + forward(row, current.rowObjectInspector); } catch (Exception e) { // Serialize the row and output the error message. String rowString; try { - if (null != rowWithPartAndVC) { - rowString = SerDeUtils.getJSONString(rowWithPartAndVC, tblRowObjectInspector); - } else if (!isPartitioned) { - rowString = SerDeUtils.getJSONString(row, tblRowObjectInspector); - } else { - rowString = SerDeUtils.getJSONString(rowWithPart, tblRowObjectInspector); - } + rowString = SerDeUtils.getJSONString(row, current.rowObjectInspector); } catch (Exception e2) { rowString = "[Error getting row data with exception " + StringUtils.stringifyException(e2) + " ]"; diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java index a739641..18a4b02 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java @@ -24,6 +24,10 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -47,6 +51,8 @@ public static VirtualColumn GROUPINGID = new VirtualColumn("GROUPING__ID", (PrimitiveTypeInfo) TypeInfoFactory.intTypeInfo); + public static VirtualColumn[] VIRTUAL_COLUMNS = + new VirtualColumn[] {FILENAME, BLOCKOFFSET, ROWOFFSET, RAWDATASIZE, GROUPINGID}; private String name; private PrimitiveTypeInfo typeInfo; @@ -125,4 +131,15 @@ public boolean equals(Object o) { && this.typeInfo.getTypeName().equals(c.getTypeInfo().getTypeName()); } + + public static StructObjectInspector getVCSObjectInspector(List vcs) { + List names = new ArrayList(vcs.size()); + List inspectors = new ArrayList(vcs.size()); + for (VirtualColumn vc : vcs) { + names.add(vc.getName()); + inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + vc.getTypeInfo().getPrimitiveCategory())); + } + return ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java index e5df744..0991847 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java @@ -45,7 +45,6 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.QB; @@ -270,14 +269,7 @@ protected boolean checkConvertBucketMapJoin( if (tbl.isPartitioned()) { PrunedPartitionList prunedParts; try { - prunedParts = pGraphContext.getOpToPartList().get(tso); - if (prunedParts == null) { - prunedParts = - PartitionPruner.prune(tbl, pGraphContext.getOpToPartPruner().get(tso), - pGraphContext.getConf(), alias, - pGraphContext.getPrunedPartitions()); - pGraphContext.getOpToPartList().put(tso, prunedParts); - } + prunedParts = pGraphContext.getPrunedPartitions(alias, tso); } catch (HiveException e) { // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java index cc9de54..fda2f84 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java @@ -42,7 +42,6 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.QB; @@ -310,15 +309,9 @@ private boolean isEligibleForBucketSortMergeJoin( Table tbl = topToTable.get(tso); if (tbl.isPartitioned()) { - PrunedPartitionList prunedParts = null; + PrunedPartitionList prunedParts; try { - prunedParts = pGraphContext.getOpToPartList().get(tso); - if (prunedParts == null) { - prunedParts = PartitionPruner.prune(tbl, pGraphContext - .getOpToPartPruner().get(tso), pGraphContext.getConf(), alias, - pGraphContext.getPrunedPartitions()); - pGraphContext.getOpToPartList().put(tso, prunedParts); - } + prunedParts = pGraphContext.getPrunedPartitions(alias, tso); } catch (HiveException e) { LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java index 5320143..1bed28f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java @@ -70,10 +70,7 @@ public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp) } else { // For partitioned tables, get the size of all the partitions - PrunedPartitionList partsList = - PartitionPruner.prune(parseCtx.getTopToTable().get(topOp), - parseCtx.getOpToPartPruner().get(topOp), parseCtx.getConf(), - null, parseCtx.getPrunedPartitions()); + PrunedPartitionList partsList = PartitionPruner.prune(topOp, parseCtx, null); numPartitions = partsList.getNotDeniedPartns().size(); long totalSize = 0; for (Partition part : partsList.getNotDeniedPartns()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java index e1f8718..6caabdc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java @@ -457,7 +457,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // nothing to be done for filters - the output schema does not change. if (op instanceof TableScanOperator) { assert !useBucketSortPositions; - Table srcTable = pGraphContext.getTopToTable().get(op); + TableScanOperator ts = (TableScanOperator) op; + Table srcTable = pGraphContext.getTopToTable().get(ts); // Find the positions of the bucketed columns in the table corresponding // to the select list. @@ -496,7 +497,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } if (srcTable.isPartitioned()) { - PrunedPartitionList prunedParts = pGraphContext.getOpToPartList().get(op); + PrunedPartitionList prunedParts = pGraphContext.getOpToPartList().get(ts); List partitions = prunedParts.getNotDeniedPartns(); // Support for dynamic partitions can be added later diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index b5a9291..da25615 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -443,13 +443,8 @@ public static void setTaskPlan(String alias_id, if (partsList == null) { try { - partsList = parseCtx.getOpToPartList().get((TableScanOperator) topOp); - if (partsList == null) { - partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp), - parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(), - alias_id, parseCtx.getPrunedPartitions()); - parseCtx.getOpToPartList().put((TableScanOperator) topOp, partsList); - } + TableScanOperator tsOp = (TableScanOperator) topOp; + partsList = PartitionPruner.prune(tsOp, parseCtx, alias_id); } catch (SemanticException e) { throw e; } catch (HiveException e) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java index c136ea8..ce03660 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java @@ -61,8 +61,6 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { Map> topOps = pctx.getTopOps(); GlobalLimitCtx globalLimitCtx = pctx.getGlobalLimitCtx(); Map opToPartPruner = pctx.getOpToPartPruner(); - Map opToPartList = pctx.getOpToPartList(); - Map prunedPartitions = pctx.getPrunedPartitions(); Map nameToSplitSample = pctx.getNameToSplitSample(); Map topToTable = pctx.getTopToTable(); @@ -106,15 +104,10 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { if (PartitionPruner.onlyContainsPartnCols(tab, opToPartPruner.get(ts))) { - PrunedPartitionList partsList = null; + PrunedPartitionList partsList; try { - partsList = opToPartList.get(ts); - if (partsList == null) { - partsList = PartitionPruner.prune(tab, - opToPartPruner.get(ts), conf, (String) topOps.keySet() - .toArray()[0], prunedPartitions); - opToPartList.put(ts, partsList); - } + String alias = (String) topOps.keySet().toArray()[0]; + partsList = PartitionPruner.prune(ts, pctx, alias); } catch (HiveException e) { // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java index f2d253e..670e39c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java @@ -49,7 +49,6 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -389,17 +388,9 @@ else if ((expr instanceof ExprNodeConstantDesc) || List bucketCols = table.getBucketCols(); return matchBucketSortCols(groupByCols, bucketCols, sortCols); } else { - PrunedPartitionList partsList = null; + PrunedPartitionList partsList; try { - partsList = pGraphContext.getOpToPartList().get(tableScanOp); - if (partsList == null) { - partsList = PartitionPruner.prune(table, - pGraphContext.getOpToPartPruner().get(tableScanOp), - pGraphContext.getConf(), - table.getTableName(), - pGraphContext.getPrunedPartitions()); - pGraphContext.getOpToPartList().put(tableScanOp, partsList); - } + partsList = pGraphContext.getPrunedPartitions(table.getTableName(), tableScanOp); } catch (HiveException e) { LOG.error(StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java index b882f87..d33ea91 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java @@ -60,10 +60,7 @@ public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp) } else { // For partitioned tables, get the size of all the partitions - PrunedPartitionList partsList = - PartitionPruner.prune(parseCtx.getTopToTable().get(topOp), - parseCtx.getOpToPartPruner().get(topOp), parseCtx.getConf(), - null, parseCtx.getPrunedPartitions()); + PrunedPartitionList partsList = PartitionPruner.prune(topOp, parseCtx, null); for (Partition part : partsList.getNotDeniedPartns()) { currentSize += getSize(conf, part); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/LBPartitionProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/LBPartitionProcFactory.java index 0a0802c..41d27fa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/LBPartitionProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/LBPartitionProcFactory.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; /** * Walk through top operators in tree to find all partitions. @@ -55,25 +54,14 @@ protected void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop, //Run partition pruner to get partitions ParseContext parseCtx = owc.getParseContext(); - PrunedPartitionList prunedPartList = parseCtx.getOpToPartList().get(top); - if (prunedPartList == null) { - // We never pruned the partition. Try to prune it. - ExprNodeDesc ppr_pred = parseCtx.getOpToPartPruner().get(top); - if (ppr_pred != null) { - try { - prunedPartList = PartitionPruner.prune(parseCtx.getTopToTable().get(top), - ppr_pred, parseCtx.getConf(), - (String) parseCtx.getTopOps().keySet() - .toArray()[0], parseCtx.getPrunedPartitions()); - if (prunedPartList != null) { - owc.getParseContext().getOpToPartList().put(top, prunedPartList); - } - } catch (HiveException e) { - // Has to use full name to make sure it does not conflict with - // org.apache.commons.lang.StringUtils - throw new SemanticException(e.getMessage(), e); - } - } + PrunedPartitionList prunedPartList; + try { + String alias = (String) parseCtx.getTopOps().keySet().toArray()[0]; + prunedPartList = PartitionPruner.prune(top, parseCtx, alias); + } catch (HiveException e) { + // Has to use full name to make sure it does not conflict with + // org.apache.commons.lang.StringUtils + throw new SemanticException(e.getMessage(), e); } if (prunedPartList != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcCtx.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcCtx.java index 5991199..db99786 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcCtx.java @@ -22,21 +22,30 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; /** * The processor context for partition condition remover. This contains * partition pruned for the table scan and table alias. */ public class PcrExprProcCtx implements NodeProcessorCtx { + /** * The table alias that is being currently processed. */ private final String tabAlias; private final List partList; + private final List vcs; public PcrExprProcCtx(String tabAlias, List partList) { + this(tabAlias, partList, null); + } + + public PcrExprProcCtx(String tabAlias, List partList, List vcs) { + super(); this.tabAlias = tabAlias; this.partList = partList; + this.vcs = vcs; } public String getTabAlias() { @@ -46,4 +55,8 @@ public String getTabAlias() { public List getPartList() { return partList; } + + public List getVirtualColumns() { + return vcs; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java index 0539765..4ab8325 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.ppr.PartExprEvalUtils; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -57,7 +58,8 @@ * It also generates node by Modifying expr trees with partition conditions removed */ public final class PcrExprProcFactory { - static Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws SemanticException { + static Object evalExprWithPart(ExprNodeDesc expr, Partition p, List vcs) + throws SemanticException { StructObjectInspector rowObjectInspector; Table tbl = p.getTable(); LinkedHashMap partSpec = p.getSpec(); @@ -70,7 +72,7 @@ static Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws SemanticEx } try { - return PartExprEvalUtils.evalExprWithPart(expr, partSpec, rowObjectInspector); + return PartExprEvalUtils.evalExprWithPart(expr, partSpec, vcs, rowObjectInspector); } catch (HiveException e) { throw new SemanticException(e); } @@ -323,7 +325,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // a result, we update the state of the node to be TRUE of FALSE Boolean[] results = new Boolean[ctx.getPartList().size()]; for (int i = 0; i < ctx.getPartList().size(); i++) { - results[i] = (Boolean) evalExprWithPart(fd, ctx.getPartList().get(i)); + results[i] = (Boolean) evalExprWithPart(fd, ctx.getPartList().get(i), + ctx.getVirtualColumns()); } return getResultWrapFromResults(results, fd, nodeOutputs); } @@ -333,7 +336,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // to be a CONSTANT node with value to be the agreed result. Object[] results = new Object[ctx.getPartList().size()]; for (int i = 0; i < ctx.getPartList().size(); i++) { - results[i] = evalExprWithPart(fd, ctx.getPartList().get(i)); + results[i] = evalExprWithPart(fd, ctx.getPartList().get(i), ctx.getVirtualColumns()); } Object result = ifResultsAgree(results); if (result == null) { @@ -421,17 +424,19 @@ public static NodeProcessor getColumnProcessor() { * @param tabAlias * the table alias * @param parts - * the list of all pruned partitions for the + * the list of all pruned partitions for the table + * @param vcs + * virtual columns referenced * @param pred * expression tree of the target filter operator * @return the node information of the root expression * @throws SemanticException */ public static NodeInfoWrapper walkExprTree( - String tabAlias, ArrayList parts, ExprNodeDesc pred) + String tabAlias, ArrayList parts, List vcs, ExprNodeDesc pred) throws SemanticException { // Create the walker, the rules dispatcher and the context. - PcrExprProcCtx pprCtx = new PcrExprProcCtx(tabAlias, parts); + PcrExprProcCtx pprCtx = new PcrExprProcCtx(tabAlias, parts, vcs); Map exprRules = new LinkedHashMap(); exprRules.put( diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrOpProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrOpProcFactory.java index 660389c..bae748f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrOpProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrOpProcFactory.java @@ -31,7 +31,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -100,27 +100,15 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } - PrunedPartitionList prunedPartList = owc.getParseContext().getOpToPartList().get(top); - if (prunedPartList == null) { - // We never pruned the partition. Try to prune it. - ExprNodeDesc ppr_pred = owc.getParseContext().getOpToPartPruner().get(top); - if (ppr_pred == null) { - // no partition predicate found, skip. - return null; - } - try { - prunedPartList = PartitionPruner.prune(owc.getParseContext().getTopToTable().get(top), - ppr_pred, owc.getParseContext().getConf(), - (String) owc.getParseContext().getTopOps().keySet() - .toArray()[0], owc.getParseContext().getPrunedPartitions()); - if (prunedPartList != null) { - owc.getParseContext().getOpToPartList().put(top, prunedPartList); - } - } catch (HiveException e) { - // Has to use full name to make sure it does not conflict with - // org.apache.commons.lang.StringUtils - throw new SemanticException(e.getMessage(), e); - } + ParseContext pctx = owc.getParseContext(); + PrunedPartitionList prunedPartList; + try { + String alias = (String) owc.getParseContext().getTopOps().keySet().toArray()[0]; + prunedPartList = pctx.getPrunedPartitions(alias, top); + } catch (HiveException e) { + // Has to use full name to make sure it does not conflict with + // org.apache.commons.lang.StringUtils + throw new SemanticException(e.getMessage(), e); } // Otherwise this is not a sampling predicate. We need to process it. @@ -147,7 +135,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, partitions.addAll(prunedPartList.getUnknownPartns()); PcrExprProcFactory.NodeInfoWrapper wrapper = PcrExprProcFactory.walkExprTree( - alias, partitions, predicate); + alias, partitions, top.getConf().getVirtualCols(), predicate); if (wrapper.state == PcrExprProcFactory.WalkState.TRUE) { owc.getOpToRemove().add(new PcrOpWalkerCtx.OpToDeleteInfo(pop, fop)); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java index 4fc1a97..a756f4e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -44,9 +45,11 @@ * @return value returned by the expression * @throws HiveException */ - static synchronized public Object evalExprWithPart(ExprNodeDesc expr, LinkedHashMap partSpec, + static synchronized public Object evalExprWithPart(ExprNodeDesc expr, + LinkedHashMap partSpec, List vcs, StructObjectInspector rowObjectInspector) throws HiveException { - Object[] rowWithPart = new Object[2]; + boolean hasVC = vcs != null && !vcs.isEmpty(); + Object[] rowWithPart = new Object[hasVC ? 3 : 2]; // Create the row object ArrayList partNames = new ArrayList(); ArrayList partValues = new ArrayList(); @@ -61,10 +64,12 @@ static synchronized public Object evalExprWithPart(ExprNodeDesc expr, LinkedHash .getStandardStructObjectInspector(partNames, partObjectInspectors); rowWithPart[1] = partValues; - ArrayList ois = new ArrayList( - 2); + ArrayList ois = new ArrayList(2); ois.add(rowObjectInspector); ois.add(partObjectInspector); + if (hasVC) { + ois.add(VirtualColumn.getVCSObjectInspector(vcs)); + } StructObjectInspector rowWithPartObjectInspector = ObjectInspectorFactory .getUnionStructObjectInspector(ois); @@ -79,9 +84,9 @@ static synchronized public Object evalExprWithPart(ExprNodeDesc expr, LinkedHash } static synchronized public Map prepareExpr( - ExprNodeDesc expr, List partNames, + ExprNodeDesc expr, List partNames, List vcs, StructObjectInspector rowObjectInspector) throws HiveException { - + boolean hasVC = vcs != null && !vcs.isEmpty(); // Create the row object List partObjectInspectors = new ArrayList(); for (int i = 0; i < partNames.size(); i++) { @@ -90,9 +95,12 @@ static synchronized public Object evalExprWithPart(ExprNodeDesc expr, LinkedHash StructObjectInspector partObjectInspector = ObjectInspectorFactory .getStandardStructObjectInspector(partNames, partObjectInspectors); - List ois = new ArrayList(2); + List ois = new ArrayList(3); ois.add(rowObjectInspector); ois.add(partObjectInspector); + if (hasVC) { + ois.add(VirtualColumn.getVCSObjectInspector(vcs)); + } StructObjectInspector rowWithPartObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(ois); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index 4fa5f2a..3727576 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -36,12 +36,14 @@ import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.PrunerUtils; import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.parse.ParseContext; @@ -130,6 +132,17 @@ public static boolean onlyContainsPartnCols(Table tab, ExprNodeDesc expr) { } /** + * Get the partition list for the TS operator that satisfies the partition pruner + * condition. + */ + public static PrunedPartitionList prune(TableScanOperator ts, ParseContext parseCtx, + String alias) throws HiveException { + return prune(parseCtx.getTopToTable().get(ts), + parseCtx.getOpToPartPruner().get(ts), parseCtx.getConf(), alias, + ts.getConf().getVirtualCols(), parseCtx.getPrunedPartitions()); + } + + /** * Get the partition list for the table that satisfies the partition pruner * condition. * @@ -141,12 +154,16 @@ public static boolean onlyContainsPartnCols(Table tab, ExprNodeDesc expr) { * for checking whether "strict" mode is on. * @param alias * for generating error message only. + * @param vcs + * virtual columns referenced + * @param prunedPartitionsMap + * cached result for the table * @return the partition list for the table that satisfies the partition * pruner condition. * @throws HiveException */ - public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, - HiveConf conf, String alias, + private static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, + HiveConf conf, String alias, List vcs, Map prunedPartitionsMap) throws HiveException { LOG.trace("Started pruning partiton"); LOG.trace("dbname = " + tab.getDbName()); @@ -168,10 +185,6 @@ public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, LinkedHashSet denied_parts = new LinkedHashSet(); try { - StructObjectInspector rowObjectInspector = (StructObjectInspector) tab - .getDeserializer().getObjectInspector(); - Object[] rowWithPart = new Object[2]; - if (tab.isPartitioned()) { // If the "strict" mode is on, we have to provide partition pruner for // each table. @@ -216,8 +229,10 @@ public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, } else { LOG.info(ErrorMsg.INVALID_JDO_FILTER_EXPRESSION.getMsg("by condition '" + message + "'")); + StructObjectInspector rowObjectInspector = (StructObjectInspector) tab + .getDeserializer().getObjectInspector(); pruneBySequentialScan(tab, true_parts, unkn_parts, denied_parts, - prunerExpr, rowObjectInspector, conf); + prunerExpr, rowObjectInspector, vcs, conf); } } } @@ -253,7 +268,7 @@ static private ExprNodeDesc compactExpr(ExprNodeDesc expr) { GenericUDF udf = ((ExprNodeGenericFuncDesc)expr).getGenericUDF(); if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr) { - List children = ((ExprNodeGenericFuncDesc)expr).getChildren(); + List children = expr.getChildren(); ExprNodeDesc left = children.get(0); children.set(0, compactExpr(left)); ExprNodeDesc right = children.get(1); @@ -300,11 +315,13 @@ static private void pruneByPushDown(Table tab, Set true_parts, String * @param denied_parts pruned out partitions. * @param prunerExpr the SQL predicate that involves partition columns. * @param rowObjectInspector object inspector used by the evaluator + * @param vcs virtual columns referenced * @param conf Hive Configuration object, can not be NULL. * @throws Exception */ - static private void pruneBySequentialScan(Table tab, Set true_parts, Set unkn_parts, - Set denied_parts, ExprNodeDesc prunerExpr, StructObjectInspector rowObjectInspector, HiveConf conf) + static private void pruneBySequentialScan(Table tab, Set true_parts, + Set unkn_parts, Set denied_parts, ExprNodeDesc prunerExpr, + StructObjectInspector rowObjectInspector, List vcs, HiveConf conf) throws Exception { List trueNames = null; @@ -320,15 +337,17 @@ static private void pruneBySequentialScan(Table tab, Set true_parts, List pCols = tab.getPartCols(); List partCols = new ArrayList(pCols.size()); List values = new ArrayList(pCols.size()); - Object[] objectWithPart = new Object[2]; String defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME); + + boolean hasVC = vcs != null && !vcs.isEmpty(); + Object[] objectWithPart = new Object[hasVC ? 3 : 2]; for (FieldSchema pCol : pCols) { partCols.add(pCol.getName()); } Map handle = PartExprEvalUtils.prepareExpr( - prunerExpr, partCols, rowObjectInspector); + prunerExpr, partCols, vcs, rowObjectInspector); for (String partName : partNames) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index f05b5b8..c35552f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -601,8 +601,7 @@ public PrunedPartitionList getPrunedPartitions(String alias, TableScanOperator t throws HiveException { PrunedPartitionList partsList = opToPartList.get(ts); if (partsList == null) { - partsList = PartitionPruner.prune(topToTable.get(ts), - opToPartPruner.get(ts), conf, alias, prunedPartitions); + partsList = PartitionPruner.prune(ts, this, alias); opToPartList.put(ts, partsList); } return partsList; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index c5f96a5..ec2f8f2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -137,6 +137,10 @@ public void addVirtualCols(List virtualCols) { this.virtualCols.addAll(virtualCols); } + public boolean hasVirtualCols() { + return virtualCols != null && !virtualCols.isEmpty(); + } + public void setStatsAggPrefix(String k) { statsAggKeyPrefix = k; } diff --git ql/src/test/queries/clientpositive/ppd_vc.q ql/src/test/queries/clientpositive/ppd_vc.q new file mode 100644 index 0000000..d029d28 --- /dev/null +++ ql/src/test/queries/clientpositive/ppd_vc.q @@ -0,0 +1,7 @@ +--HIVE-3926 PPD on virtual column of partitioned table is not working + +explain extended select * from srcpart where BLOCK__OFFSET__INSIDE__FILE<100; +select * from srcpart where BLOCK__OFFSET__INSIDE__FILE<100; + +explain extended select * from src a join (select *,BLOCK__OFFSET__INSIDE__FILE from srcpart where BLOCK__OFFSET__INSIDE__FILE<100) b on a.key=b.key AND b.BLOCK__OFFSET__INSIDE__FILE<50; +select * from src a join (select *,BLOCK__OFFSET__INSIDE__FILE from srcpart where BLOCK__OFFSET__INSIDE__FILE<100) b on a.key=b.key AND b.BLOCK__OFFSET__INSIDE__FILE<50; diff --git ql/src/test/results/clientpositive/ppd_vc.q.out ql/src/test/results/clientpositive/ppd_vc.q.out new file mode 100644 index 0000000..48b1239 --- /dev/null +++ ql/src/test/results/clientpositive/ppd_vc.q.out @@ -0,0 +1,723 @@ +PREHOOK: query: --HIVE-3926 PPD on virtual column of partitioned table is not working + +explain extended select * from srcpart where BLOCK__OFFSET__INSIDE__FILE<100 +PREHOOK: type: QUERY +POSTHOOK: query: --HIVE-3926 PPD on virtual column of partitioned table is not working + +explain extended select * from srcpart where BLOCK__OFFSET__INSIDE__FILE<100 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE) 100)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + srcpart + TableScan + alias: srcpart + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (BLOCK__OFFSET__INSIDE__FILE < 100) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: ds + type: string + expr: hr + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 4 + numPartitions 4 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 23248 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 4 + numPartitions 4 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 23248 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 4 + numPartitions 4 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 23248 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 4 + numPartitions 4 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 23248 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + /srcpart/ds=2008-04-09/hr=11 [srcpart] + /srcpart/ds=2008-04-09/hr=12 [srcpart] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * from srcpart where BLOCK__OFFSET__INSIDE__FILE<100 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpart where BLOCK__OFFSET__INSIDE__FILE<100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +238 val_238 2008-04-08 11 +86 val_86 2008-04-08 11 +311 val_311 2008-04-08 11 +27 val_27 2008-04-08 11 +165 val_165 2008-04-08 11 +409 val_409 2008-04-08 11 +255 val_255 2008-04-08 11 +278 val_278 2008-04-08 11 +98 val_98 2008-04-08 11 +238 val_238 2008-04-08 12 +86 val_86 2008-04-08 12 +311 val_311 2008-04-08 12 +27 val_27 2008-04-08 12 +165 val_165 2008-04-08 12 +409 val_409 2008-04-08 12 +255 val_255 2008-04-08 12 +278 val_278 2008-04-08 12 +98 val_98 2008-04-08 12 +238 val_238 2008-04-09 11 +86 val_86 2008-04-09 11 +311 val_311 2008-04-09 11 +27 val_27 2008-04-09 11 +165 val_165 2008-04-09 11 +409 val_409 2008-04-09 11 +255 val_255 2008-04-09 11 +278 val_278 2008-04-09 11 +98 val_98 2008-04-09 11 +238 val_238 2008-04-09 12 +86 val_86 2008-04-09 12 +311 val_311 2008-04-09 12 +27 val_27 2008-04-09 12 +165 val_165 2008-04-09 12 +409 val_409 2008-04-09 12 +255 val_255 2008-04-09 12 +278 val_278 2008-04-09 12 +98 val_98 2008-04-09 12 +PREHOOK: query: explain extended select * from src a join (select *,BLOCK__OFFSET__INSIDE__FILE from srcpart where BLOCK__OFFSET__INSIDE__FILE<100) b on a.key=b.key AND b.BLOCK__OFFSET__INSIDE__FILE<50 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from src a join (select *,BLOCK__OFFSET__INSIDE__FILE from srcpart where BLOCK__OFFSET__INSIDE__FILE<100) b on a.key=b.key AND b.BLOCK__OFFSET__INSIDE__FILE<50 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (< (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE) 100)))) b) (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (< (. (TOK_TABLE_OR_COL b) BLOCK__OFFSET__INSIDE__FILE) 50)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + b:srcpart + TableScan + alias: srcpart + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((BLOCK__OFFSET__INSIDE__FILE < 100) and (BLOCK__OFFSET__INSIDE__FILE < 50)) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: ds + type: string + expr: hr + type: string + expr: BLOCK__OFFSET__INSIDE__FILE + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: bigint + Needs Tagging: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 4 + numPartitions 4 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 23248 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 4 + numPartitions 4 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 23248 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 4 + numPartitions 4 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 23248 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 4 + numPartitions 4 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 23248 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5, _col6, _col7, _col8 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + expr: _col6 + type: string + expr: _col7 + type: string + expr: _col8 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:string:string:string:string:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Truncated Path -> Alias: + /src [a] + /srcpart/ds=2008-04-08/hr=11 [b:srcpart] + /srcpart/ds=2008-04-08/hr=12 [b:srcpart] + /srcpart/ds=2008-04-09/hr=11 [b:srcpart] + /srcpart/ds=2008-04-09/hr=12 [b:srcpart] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * from src a join (select *,BLOCK__OFFSET__INSIDE__FILE from srcpart where BLOCK__OFFSET__INSIDE__FILE<100) b on a.key=b.key AND b.BLOCK__OFFSET__INSIDE__FILE<50 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select * from src a join (select *,BLOCK__OFFSET__INSIDE__FILE from srcpart where BLOCK__OFFSET__INSIDE__FILE<100) b on a.key=b.key AND b.BLOCK__OFFSET__INSIDE__FILE<50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +165 val_165 165 val_165 2008-04-08 11 44 +165 val_165 165 val_165 2008-04-09 11 44 +165 val_165 165 val_165 2008-04-09 12 44 +165 val_165 165 val_165 2008-04-08 12 44 +165 val_165 165 val_165 2008-04-08 11 44 +165 val_165 165 val_165 2008-04-09 11 44 +165 val_165 165 val_165 2008-04-09 12 44 +165 val_165 165 val_165 2008-04-08 12 44 +238 val_238 238 val_238 2008-04-08 11 0 +238 val_238 238 val_238 2008-04-08 12 0 +238 val_238 238 val_238 2008-04-09 12 0 +238 val_238 238 val_238 2008-04-09 11 0 +238 val_238 238 val_238 2008-04-08 11 0 +238 val_238 238 val_238 2008-04-08 12 0 +238 val_238 238 val_238 2008-04-09 12 0 +238 val_238 238 val_238 2008-04-09 11 0 +27 val_27 27 val_27 2008-04-08 12 34 +27 val_27 27 val_27 2008-04-08 11 34 +27 val_27 27 val_27 2008-04-09 11 34 +27 val_27 27 val_27 2008-04-09 12 34 +311 val_311 311 val_311 2008-04-08 11 22 +311 val_311 311 val_311 2008-04-09 11 22 +311 val_311 311 val_311 2008-04-09 12 22 +311 val_311 311 val_311 2008-04-08 12 22 +311 val_311 311 val_311 2008-04-08 11 22 +311 val_311 311 val_311 2008-04-09 11 22 +311 val_311 311 val_311 2008-04-09 12 22 +311 val_311 311 val_311 2008-04-08 12 22 +311 val_311 311 val_311 2008-04-08 11 22 +311 val_311 311 val_311 2008-04-09 11 22 +311 val_311 311 val_311 2008-04-09 12 22 +311 val_311 311 val_311 2008-04-08 12 22 +86 val_86 86 val_86 2008-04-09 11 12 +86 val_86 86 val_86 2008-04-08 11 12 +86 val_86 86 val_86 2008-04-09 12 12 +86 val_86 86 val_86 2008-04-08 12 12