diff --git beeline/src/java/org/apache/hive/beeline/Commands.java beeline/src/java/org/apache/hive/beeline/Commands.java index def26b6..291adba 100644 --- beeline/src/java/org/apache/hive/beeline/Commands.java +++ beeline/src/java/org/apache/hive/beeline/Commands.java @@ -780,6 +780,7 @@ private boolean execute(String line, boolean call) { logThread.start(); hasResults = stmnt.execute(sql); logThread.interrupt(); + logThread.join(DEFAULT_QUERY_PROGRESS_THREAD_TIMEOUT); } } @@ -847,6 +848,7 @@ public void run() { return; } catch (InterruptedException e) { beeLine.debug("Getting log thread is interrupted, since query is done!"); + showRemainingLogsIfAny(hiveStatement); return; } } diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e19e43f..cbdbdda 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2493,12 +2493,6 @@ private void initialize(Class cls) { // Overlay the values of any system properties whose names appear in the list of ConfVars applySystemProperties(); - if(this.get("hive.metastore.local", null) != null) { - l4j.warn("DEPRECATED: Configuration property hive.metastore.local no longer has any " + - "effect. Make sure to provide a valid value for hive.metastore.uris if you are " + - "connecting to a remote metastore."); - } - if ((this.get("hive.metastore.ds.retry.attempts") != null) || this.get("hive.metastore.ds.retry.interval") != null) { l4j.warn("DEPRECATED: hive.metastore.ds.retry.* no longer has any effect. " + diff --git contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out index 4123c5a..856dcfe 100644 --- contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out +++ contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out @@ -26,11 +26,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5, 1) (type: string), '(' (type: string), key (type: string), ':' (type: string), value (type: string), ')' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: substr(value, 5, 1) (type: string), key (type: string), value (type: string) + outputColumnNames: _col0, _col2, _col4 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: example_group_concat(_col1, _col2, _col3, _col4, _col5) + aggregations: example_group_concat('(', _col2, ':', _col4, ')') keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git contrib/src/test/results/clientpositive/udaf_example_max_n.q.out contrib/src/test/results/clientpositive/udaf_example_max_n.q.out index 4e911ed..05e0028 100644 --- contrib/src/test/results/clientpositive/udaf_example_max_n.q.out +++ contrib/src/test/results/clientpositive/udaf_example_max_n.q.out @@ -26,11 +26,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string), 10 (type: int), if((UDFToDouble(substr(value, 5)) > 250.0), null, substr(value, 5)) (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: substr(value, 5) (type: string), if((UDFToDouble(substr(value, 5)) > 250.0), null, substr(value, 5)) (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: example_max_n(_col0, _col1), example_max_n(_col2, _col1) + aggregations: example_max_n(_col0, 10), example_max_n(_col2, 10) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE diff --git contrib/src/test/results/clientpositive/udaf_example_min_n.q.out contrib/src/test/results/clientpositive/udaf_example_min_n.q.out index c732838..aba1569 100644 --- contrib/src/test/results/clientpositive/udaf_example_min_n.q.out +++ contrib/src/test/results/clientpositive/udaf_example_min_n.q.out @@ -26,11 +26,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string), 10 (type: int), if((UDFToDouble(substr(value, 5)) < 250.0), null, substr(value, 5)) (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: substr(value, 5) (type: string), if((UDFToDouble(substr(value, 5)) < 250.0), null, substr(value, 5)) (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: example_min_n(_col0, _col1), example_min_n(_col2, _col1) + aggregations: example_min_n(_col0, 10), example_min_n(_col2, 10) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE diff --git data/files/datatypes.txt data/files/datatypes.txt index 0228a27..458c5bd 100644 --- data/files/datatypes.txt +++ data/files/datatypes.txt @@ -1,3 +1,3 @@ -\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N --1false-1.1\N\N\N-1-1-1.0-1\N\N\N\N\N\N\N\N +\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N +-1false-1.1\N\N\N-1-1-1.0-1\N\N\N\N\N\N\N\N\N 1true1.11121x2ykva92.2111.01abcd1111213142212212x1abcd22012-04-22 09:00:00.123456789123456789.0123456YWJjZA==2013-01-01abc123abc123X'01FF' diff --git doap_Hive.rdf doap_Hive.rdf index b773570..e69de29 100644 --- doap_Hive.rdf +++ doap_Hive.rdf @@ -1,58 +0,0 @@ - - - - - - 2011-11-09 - - Apache Hive - - - The Apache Hive (TM) data warehouse software facilitates querying and managing large datasets residing in distributed storage. - The Apache Hive (TM) data warehouse software facilitates querying and managing large datasets residing in distributed storage. Built on top of Apache Hadoop (TM), it provides - -* tools to enable easy data extract/transform/load (ETL) -* a mechanism to impose structure on a variety of data formats -* access to files stored either directly in Apache HDFS (TM) or in other data storage systems such as Apache HBase (TM) -* query execution via MapReduce - -Hive defines a simple SQL-like query language, called HiveQL, that enables users familiar with SQL to query the data. At the same time, this language also allows programmers who are familiar with the MapReduce framework to be able to plug in their custom mappers and reducers to perform more sophisticated analysis that may not be supported by the built-in capabilities of the language. HiveQL can also be extended with custom scalar functions (UDF's), aggregations (UDAF's), and table functions (UDTF's). - - - - - Java - - - - - - - - - - John Sichi - - - - - diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/hooks/CheckColumnAccessHook.java itests/util/src/main/java/org/apache/hadoop/hive/ql/hooks/CheckColumnAccessHook.java index f6058e4..adbb531 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/ql/hooks/CheckColumnAccessHook.java +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/hooks/CheckColumnAccessHook.java @@ -20,7 +20,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; -import java.util.HashMap; +import java.util.LinkedHashMap; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.conf.HiveConf; @@ -61,9 +61,8 @@ public void run(HookContext hookContext) { Map> tableToColumnAccessMap = columnAccessInfo.getTableToColumnAccessMap(); - // We need a new map to ensure output is always produced in the same order. - // This makes tests that use this hook deterministic. - Map outputOrderedMap = new HashMap(); + // Must be deterministic order map for consistent test output across Java versions + Map outputOrderedMap = new LinkedHashMap(); for (Map.Entry> tableAccess : tableToColumnAccessMap.entrySet()) { StringBuilder perTableInfo = new StringBuilder(); diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/hooks/CheckTableAccessHook.java itests/util/src/main/java/org/apache/hadoop/hive/ql/hooks/CheckTableAccessHook.java index 8e19fad..7d1fc3d 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/ql/hooks/CheckTableAccessHook.java +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/hooks/CheckTableAccessHook.java @@ -19,7 +19,7 @@ import java.util.List; import java.util.Map; -import java.util.HashMap; +import java.util.LinkedHashMap; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.conf.HiveConf; @@ -61,9 +61,8 @@ public void run(HookContext hookContext) { Map, Map>> operatorToTableAccessMap = tableAccessInfo.getOperatorToTableAccessMap(); - // We need a new map to ensure output is always produced in the same order. - // This makes tests that use this hook deterministic. - Map outputOrderedMap = new HashMap(); + // Must be deterministic order map for consistent q-test output across Java versions + Map outputOrderedMap = new LinkedHashMap(); for (Map.Entry, Map>> tableAccess: operatorToTableAccessMap.entrySet()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/Driver.java ql/src/java/org/apache/hadoop/hive/ql/Driver.java index d12dfe5..fa40082 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -222,7 +222,7 @@ public static Schema getSchema(BaseSemanticAnalyzer sem, HiveConf conf) { String tableName = "result"; List lst = null; try { - lst = MetaStoreUtils.getFieldsFromDeserializer(tableName, td.getDeserializer()); + lst = MetaStoreUtils.getFieldsFromDeserializer(tableName, td.getDeserializer(conf)); } catch (Exception e) { LOG.warn("Error getting schema: " + org.apache.hadoop.util.StringUtils.stringifyException(e)); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index 6338c3c..0ccab02 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -26,8 +26,10 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Properties; import org.apache.commons.lang3.StringEscapeUtils; +import com.google.common.collect.Iterators; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configurable; @@ -49,14 +51,13 @@ import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde2.Deserializer; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.serde2.objectinspector.DelegatedObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.SerDeSpec; import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -70,74 +71,81 @@ import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.StringUtils; +import org.apache.hive.common.util.AnnotationUtils; /** * FetchTask implementation. **/ public class FetchOperator implements Serializable { - static Log LOG = LogFactory.getLog(FetchOperator.class.getName()); - static LogHelper console = new LogHelper(LOG); + static final Log LOG = LogFactory.getLog(FetchOperator.class.getName()); + static final LogHelper console = new LogHelper(LOG); public static final String FETCH_OPERATOR_DIRECTORY_LIST = "hive.complete.dir.list"; - private boolean isNativeTable; private FetchWork work; - protected Operator operator; // operator tree for processing row further (option) - private int splitNum; - private PartitionDesc currPart; - private TableDesc currTbl; - private boolean tblDataDone; - private FooterBuffer footerBuffer = null; - private int headerCount = 0; - private int footerCount = 0; - - private boolean hasVC; - private boolean isPartitioned; + private Operator operator; // operator tree for processing row further (optional) + + private final boolean hasVC; + private final boolean isStatReader; + private final boolean isPartitioned; + private final boolean isNonNativeTable; private StructObjectInspector vcsOI; private List vcCols; private ExecMapperContext context; + private transient Deserializer tableSerDe; + private transient StructObjectInspector tableOI; + private transient StructObjectInspector partKeyOI; + private transient StructObjectInspector convertedOI; + + private transient Iterator iterPath; + private transient Iterator iterPartDesc; + private transient Iterator iterSplits = Iterators.emptyIterator(); + + private transient Path currPath; + private transient PartitionDesc currDesc; + private transient Deserializer currSerDe; + private transient Converter ObjectConverter; private transient RecordReader currRecReader; - private transient FetchInputFormatSplit[] inputSplits; - private transient InputFormat inputFormat; + private transient JobConf job; private transient WritableComparable key; private transient Writable value; private transient Object[] vcValues; - private transient Deserializer serde; - private transient Deserializer tblSerde; - private transient Converter partTblObjectInspectorConverter; - private transient Iterator iterPath; - private transient Iterator iterPartDesc; - private transient Path currPath; - private transient StructObjectInspector objectInspector; - private transient StructObjectInspector rowObjectInspector; - private transient ObjectInspector partitionedTableOI; + private transient int headerCount; + private transient int footerCount; + private transient FooterBuffer footerBuffer; + + private transient StructObjectInspector outputOI; private transient Object[] row; - public FetchOperator() { - } - - public FetchOperator(FetchWork work, JobConf job) { - this.job = job; - this.work = work; - initialize(); + public FetchOperator(FetchWork work, JobConf job) throws HiveException { + this(work, job, null, null); } public FetchOperator(FetchWork work, JobConf job, Operator operator, - List vcCols) { + List vcCols) throws HiveException { this.job = job; this.work = work; this.operator = operator; this.vcCols = vcCols; + this.hasVC = vcCols != null && !vcCols.isEmpty(); + this.isStatReader = work.getTblDesc() == null; + this.isPartitioned = !isStatReader && work.isPartitioned(); + this.isNonNativeTable = !isStatReader && work.getTblDesc().isNonNative(); initialize(); } - private void initialize() { - if (hasVC = vcCols != null && !vcCols.isEmpty()) { + private void initialize() throws HiveException { + if (isStatReader) { + outputOI = work.getStatRowOI(); + return; + } + if (hasVC) { List names = new ArrayList(vcCols.size()); List inspectors = new ArrayList(vcCols.size()); for (VirtualColumn vc : vcCols) { @@ -147,8 +155,6 @@ private void initialize() { vcsOI = ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors); vcValues = new Object[vcCols.size()]; } - isPartitioned = work.isPartitioned(); - tblDataDone = false; if (hasVC && isPartitioned) { row = new Object[3]; } else if (hasVC || isPartitioned) { @@ -156,21 +162,27 @@ private void initialize() { } else { row = new Object[1]; } - if (work.getTblDesc() != null) { - isNativeTable = !work.getTblDesc().isNonNative(); + if (isPartitioned) { + iterPath = work.getPartDir().iterator(); + iterPartDesc = work.getPartDesc().iterator(); } else { - isNativeTable = true; + iterPath = Arrays.asList(work.getTblDir()).iterator(); + iterPartDesc = Iterators.cycle(new PartitionDesc(work.getTblDesc(), null)); } - setupExecContext(); + outputOI = setupOutputObjectInspector(); + context = setupExecContext(operator, work.getPathLists()); } - private void setupExecContext() { + private ExecMapperContext setupExecContext(Operator operator, List paths) { + ExecMapperContext context = null; if (hasVC || work.getSplitSample() != null) { context = new ExecMapperContext(job); if (operator != null) { operator.setExecContext(context); } } + setFetchOperatorContext(job, paths); + return context; } public FetchWork getWork() { @@ -181,42 +193,6 @@ public void setWork(FetchWork work) { this.work = work; } - public int getSplitNum() { - return splitNum; - } - - public void setSplitNum(int splitNum) { - this.splitNum = splitNum; - } - - public PartitionDesc getCurrPart() { - return currPart; - } - - public void setCurrPart(PartitionDesc currPart) { - this.currPart = currPart; - } - - public TableDesc getCurrTbl() { - return currTbl; - } - - public void setCurrTbl(TableDesc currTbl) { - this.currTbl = currTbl; - } - - public boolean isTblDataDone() { - return tblDataDone; - } - - public void setTblDataDone(boolean tblDataDone) { - this.tblDataDone = tblDataDone; - } - - public boolean isEmptyTable() { - return work.getTblDir() == null && (work.getPartDir() == null || work.getPartDir().isEmpty()); - } - /** * A cache of InputFormat instances. */ @@ -243,146 +219,54 @@ static InputFormat getInputFormatFromCache(Class inputFor return format; } - private StructObjectInspector getRowInspectorFromTable(TableDesc table) throws Exception { - Deserializer serde = table.getDeserializerClass().newInstance(); - SerDeUtils.initializeSerDeWithoutErrorCheck(serde, job, table.getProperties(), null); - return createRowInspector(getStructOIFrom(serde.getObjectInspector())); - } - - private StructObjectInspector getRowInspectorFromPartition(PartitionDesc partition, - ObjectInspector partitionOI) throws Exception { - - String pcols = partition.getTableDesc().getProperties().getProperty( + private StructObjectInspector getPartitionKeyOI(TableDesc tableDesc) throws Exception { + String pcols = tableDesc.getProperties().getProperty( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); - String[] partKeys = pcols.trim().split("/"); - String pcolTypes = partition.getTableDesc().getProperties().getProperty( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); - String[] partKeyTypes = pcolTypes.trim().split(":"); - row[1] = createPartValue(partKeys, partition.getPartSpec(), partKeyTypes); + String pcolTypes = tableDesc.getProperties().getProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); - return createRowInspector(getStructOIFrom(partitionOI), partKeys, partKeyTypes); - } - - private StructObjectInspector getRowInspectorFromPartitionedTable(TableDesc table) - throws Exception { - Deserializer serde = table.getDeserializerClass().newInstance(); - SerDeUtils.initializeSerDe(serde, job, table.getProperties(), null); - String pcols = table.getProperties().getProperty( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); String[] partKeys = pcols.trim().split("/"); - String pcolTypes = table.getProperties().getProperty( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); - String[] partKeyTypes = pcolTypes.trim().split(":"); - row[1] = null; - return createRowInspector(getStructOIFrom(serde.getObjectInspector()), partKeys, partKeyTypes); - } - - private StructObjectInspector getStructOIFrom(ObjectInspector current) throws SerDeException { - if (objectInspector != null) { - current = DelegatedObjectInspectorFactory.reset(objectInspector, current); - } else { - current = DelegatedObjectInspectorFactory.wrap(current); - } - return objectInspector = (StructObjectInspector) current; - } - - private StructObjectInspector createRowInspector(StructObjectInspector current) - throws SerDeException { - return hasVC ? ObjectInspectorFactory.getUnionStructObjectInspector( - Arrays.asList(current, vcsOI)) : current; - } - - private StructObjectInspector createRowInspector(StructObjectInspector current, String[] partKeys, String[] partKeyTypes) - throws SerDeException { - List partNames = new ArrayList(); - List partObjectInspectors = new ArrayList(); + String[] partKeyTypes = pcolTypes.trim().split(":"); + ObjectInspector[] inspectors = new ObjectInspector[partKeys.length]; for (int i = 0; i < partKeys.length; i++) { - String key = partKeys[i]; - partNames.add(key); - ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( + inspectors[i] = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i])); - partObjectInspectors.add(oi); } - StructObjectInspector partObjectInspector = ObjectInspectorFactory - .getStandardStructObjectInspector(partNames, partObjectInspectors); - - return ObjectInspectorFactory.getUnionStructObjectInspector( - hasVC ? Arrays.asList(current, partObjectInspector, vcsOI) : - Arrays.asList(current, partObjectInspector)); - } - - private Object[] createPartValue(String[] partKeys, Map partSpec, String[] partKeyTypes) { - Object[] partValues = new Object[partKeys.length]; - for (int i = 0; i < partKeys.length; i++) { - String key = partKeys[i]; - ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( - TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i])); - partValues[i] = - ObjectInspectorConverters. - getConverter(PrimitiveObjectInspectorFactory. - javaStringObjectInspector, oi).convert(partSpec.get(key)); + return ObjectInspectorFactory.getStandardStructObjectInspector( + Arrays.asList(partKeys), Arrays.asList(inspectors)); + } + + private Object[] createPartValue(PartitionDesc partDesc, StructObjectInspector partOI) { + Map partSpec = partDesc.getPartSpec(); + List fields = partOI.getAllStructFieldRefs(); + Object[] partValues = new Object[fields.size()]; + for (int i = 0; i < partValues.length; i++) { + StructField field = fields.get(i); + String value = partSpec.get(field.getFieldName()); + ObjectInspector oi = field.getFieldObjectInspector(); + partValues[i] = ObjectInspectorConverters.getConverter( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi).convert(value); } return partValues; } - private void getNextPath() throws Exception { - // first time - if (iterPath == null) { - if (work.isNotPartitioned()) { - if (!tblDataDone) { - currPath = work.getTblDir(); - currTbl = work.getTblDesc(); - if (isNativeTable) { - FileSystem fs = currPath.getFileSystem(job); - if (fs.exists(currPath)) { - FileStatus[] fStats = listStatusUnderPath(fs, currPath); - for (FileStatus fStat : fStats) { - if (fStat.getLen() > 0) { - tblDataDone = true; - break; - } - } - } - } else { - tblDataDone = true; - } - - if (!tblDataDone) { - currPath = null; - } - return; - } else { - currTbl = null; - currPath = null; - } - return; - } else { - setFetchOperatorContext(job, work.getPartDir()); - iterPath = work.getPartDir().iterator(); - iterPartDesc = work.getPartDesc().iterator(); - } - } - + private boolean getNextPath() throws Exception { while (iterPath.hasNext()) { - Path nxt = iterPath.next(); - PartitionDesc prt = null; - if (iterPartDesc != null) { - prt = iterPartDesc.next(); + currPath = iterPath.next(); + currDesc = iterPartDesc.next(); + if (isNonNativeTable) { + return true; } - FileSystem fs = nxt.getFileSystem(job); - if (fs.exists(nxt)) { - FileStatus[] fStats = listStatusUnderPath(fs, nxt); - for (FileStatus fStat : fStats) { + FileSystem fs = currPath.getFileSystem(job); + if (fs.exists(currPath)) { + for (FileStatus fStat : listStatusUnderPath(fs, currPath)) { if (fStat.getLen() > 0) { - currPath = nxt; - if (iterPartDesc != null) { - currPart = prt; - } - return; + return true; } } } } + return false; } /** @@ -390,119 +274,53 @@ private void getNextPath() throws Exception { * This helps InputFormats make decisions based on the scope of the complete * operation. * @param conf the configuration to modify - * @param partDirs the list of partition directories + * @param paths the list of input directories */ - static void setFetchOperatorContext(JobConf conf, - ArrayList partDirs) { - if (partDirs != null) { + static void setFetchOperatorContext(JobConf conf, List paths) { + if (paths != null) { StringBuilder buff = new StringBuilder(); - boolean first = true; - for(Path p: partDirs) { - if (first) { - first = false; - } else { + for (Path path : paths) { + if (buff.length() > 0) { buff.append('\t'); } - buff.append(StringEscapeUtils.escapeJava(p.toString())); + buff.append(StringEscapeUtils.escapeJava(path.toString())); } conf.set(FETCH_OPERATOR_DIRECTORY_LIST, buff.toString()); } } - /** - * A cache of Object Inspector Settable Properties. - */ - private static Map oiSettableProperties = new HashMap(); - private RecordReader getRecordReader() throws Exception { - if (currPath == null) { - getNextPath(); - if (currPath == null) { + if (!iterSplits.hasNext()) { + FetchInputFormatSplit[] splits = getNextSplits(); + if (splits == null) { return null; } - - // not using FileInputFormat.setInputPaths() here because it forces a - // connection - // to the default file system - which may or may not be online during pure - // metadata - // operations - job.set("mapred.input.dir", org.apache.hadoop.util.StringUtils.escapeString(currPath - .toString())); - - // Fetch operator is not vectorized and as such turn vectorization flag off so that - // non-vectorized record reader is created below. - if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) { - HiveConf.setBoolVar(job, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); - } - - PartitionDesc partDesc; - if (currTbl == null) { - partDesc = currPart; + if (!isPartitioned || convertedOI == null) { + currSerDe = tableSerDe; + ObjectConverter = null; } else { - partDesc = new PartitionDesc(currTbl, null); - } - - Class formatter = partDesc.getInputFileFormatClass(); - inputFormat = getInputFormatFromCache(formatter, job); - Utilities.copyTableJobPropertiesToConf(partDesc.getTableDesc(), job); - InputSplit[] splits = inputFormat.getSplits(job, 1); - FetchInputFormatSplit[] inputSplits = new FetchInputFormatSplit[splits.length]; - for (int i = 0; i < splits.length; i++) { - inputSplits[i] = new FetchInputFormatSplit(splits[i], formatter.getName()); - } - if (work.getSplitSample() != null) { - inputSplits = splitSampling(work.getSplitSample(), inputSplits); - } - this.inputSplits = inputSplits; - - splitNum = 0; - serde = partDesc.getDeserializer(job); - SerDeUtils.initializeSerDe(serde, job, partDesc.getTableDesc().getProperties(), - partDesc.getProperties()); - - if (currTbl != null) { - tblSerde = serde; + currSerDe = needConversion(currDesc) ? currDesc.getDeserializer(job) : tableSerDe; + ObjectInspector inputOI = currSerDe.getObjectInspector(); + ObjectConverter = ObjectInspectorConverters.getConverter(inputOI, convertedOI); } - else { - tblSerde = currPart.getTableDesc().getDeserializerClass().newInstance(); - SerDeUtils.initializeSerDe(tblSerde, job, currPart.getTableDesc().getProperties(), null); + if (isPartitioned) { + row[1] = createPartValue(currDesc, partKeyOI); } - - ObjectInspector outputOI = ObjectInspectorConverters.getConvertedOI( - serde.getObjectInspector(), - partitionedTableOI == null ? tblSerde.getObjectInspector() : partitionedTableOI, - oiSettableProperties); - - partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter( - serde.getObjectInspector(), outputOI); + iterSplits = Arrays.asList(splits).iterator(); if (LOG.isDebugEnabled()) { LOG.debug("Creating fetchTask with deserializer typeinfo: " - + serde.getObjectInspector().getTypeName()); + + currSerDe.getObjectInspector().getTypeName()); LOG.debug("deserializer properties:\ntable properties: " + - partDesc.getTableDesc().getProperties() + "\npartition properties: " + - partDesc.getProperties()); - } - - if (currPart != null) { - getRowInspectorFromPartition(currPart, outputOI); - } - } - - if (splitNum >= inputSplits.length) { - if (currRecReader != null) { - currRecReader.close(); - currRecReader = null; + currDesc.getTableDesc().getProperties() + "\npartition properties: " + + currDesc.getProperties()); } - currPath = null; - return getRecordReader(); } - final FetchInputFormatSplit target = inputSplits[splitNum]; + final FetchInputFormatSplit target = iterSplits.next(); @SuppressWarnings("unchecked") - final RecordReader reader = - inputFormat.getRecordReader(target.getInputSplit(), job, Reporter.NULL); + final RecordReader reader = target.getRecordReader(job); if (hasVC || work.getSplitSample() != null) { currRecReader = new HiveRecordReader(reader, job) { @Override @@ -517,23 +335,52 @@ public boolean doNext(WritableComparable key, Writable value) throws IOException } }; ((HiveContextAwareRecordReader)currRecReader). - initIOContext(target, job, inputFormat.getClass(), reader); + initIOContext(target, job, target.inputFormat.getClass(), reader); } else { currRecReader = reader; } - splitNum++; key = currRecReader.createKey(); value = currRecReader.createValue(); + headerCount = footerCount = 0; return currRecReader; } + protected FetchInputFormatSplit[] getNextSplits() throws Exception { + while (getNextPath()) { + // not using FileInputFormat.setInputPaths() here because it forces a connection to the + // default file system - which may or may not be online during pure metadata operations + job.set("mapred.input.dir", StringUtils.escapeString(currPath.toString())); + + // Fetch operator is not vectorized and as such turn vectorization flag off so that + // non-vectorized record reader is created below. + HiveConf.setBoolVar(job, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); + + Class formatter = currDesc.getInputFileFormatClass(); + Utilities.copyTableJobPropertiesToConf(currDesc.getTableDesc(), job); + InputFormat inputFormat = getInputFormatFromCache(formatter, job); + + InputSplit[] splits = inputFormat.getSplits(job, 1); + FetchInputFormatSplit[] inputSplits = new FetchInputFormatSplit[splits.length]; + for (int i = 0; i < splits.length; i++) { + inputSplits[i] = new FetchInputFormatSplit(splits[i], inputFormat); + } + if (work.getSplitSample() != null) { + inputSplits = splitSampling(work.getSplitSample(), inputSplits); + } + if (inputSplits.length > 0) { + return inputSplits; + } + } + return null; + } + private FetchInputFormatSplit[] splitSampling(SplitSample splitSample, FetchInputFormatSplit[] splits) { long totalSize = 0; for (FetchInputFormatSplit split: splits) { totalSize += split.getLength(); } - List result = new ArrayList(); + List result = new ArrayList(splits.length); long targetSize = splitSample.getTargetSize(totalSize); int startIndex = splitSample.getSeedNum() % splits.length; long size = 0; @@ -557,18 +404,18 @@ public boolean doNext(WritableComparable key, Writable value) throws IOException * Currently only used by FetchTask. **/ public boolean pushRow() throws IOException, HiveException { - if(work.getRowsComputedUsingStats() != null) { + if (work.getRowsComputedUsingStats() != null) { for (List row : work.getRowsComputedUsingStats()) { operator.processOp(row, 0); } - operator.flush(); + flushRow(); return true; } InspectableObject row = getNextRow(); if (row != null) { pushRow(row); } else { - operator.flush(); + flushRow(); } return row != null; } @@ -577,6 +424,10 @@ protected void pushRow(InspectableObject row) throws HiveException { operator.processOp(row.o, 0); } + protected void flushRow() throws HiveException { + operator.flush(); + } + private transient final InspectableObject inspectable = new InspectableObject(); /** @@ -602,28 +453,16 @@ public InspectableObject getNextRow() throws IOException { * If file contains footer, used FooterBuffer to cache and remove footer * records at the end of the file. */ - headerCount = 0; - footerCount = 0; - TableDesc table = null; - if (currTbl != null) { - table = currTbl; - } else if (currPart != null) { - table = currPart.getTableDesc(); - } - if (table != null) { - headerCount = Utilities.getHeaderCount(table); - footerCount = Utilities.getFooterCount(table, job); - } + headerCount = Utilities.getHeaderCount(currDesc.getTableDesc()); + footerCount = Utilities.getFooterCount(currDesc.getTableDesc(), job); // Skip header lines. opNotEOF = Utilities.skipHeader(currRecReader, headerCount, key, value); // Initialize footer buffer. - if (opNotEOF) { - if (footerCount > 0) { - footerBuffer = new FooterBuffer(); - opNotEOF = footerBuffer.initializeBuffer(job, currRecReader, footerCount, key, value); - } + if (opNotEOF && footerCount > 0) { + footerBuffer = new FooterBuffer(); + opNotEOF = footerBuffer.initializeBuffer(job, currRecReader, footerCount, key, value); } } @@ -640,25 +479,24 @@ public InspectableObject getNextRow() throws IOException { if (opNotEOF) { if (operator != null && context != null && context.inputFileChanged()) { // The child operators cleanup if input file has changed - try { - operator.cleanUpInputFileChanged(); - } catch (HiveException e) { - throw new IOException(e); - } + operator.cleanUpInputFileChanged(); } if (hasVC) { - vcValues = MapOperator.populateVirtualColumnValues(context, vcCols, vcValues, serde); - row[isPartitioned ? 2 : 1] = vcValues; + row[isPartitioned ? 2 : 1] = + MapOperator.populateVirtualColumnValues(context, vcCols, vcValues, currSerDe); + } + Object deserialized = currSerDe.deserialize(value); + if (ObjectConverter != null) { + deserialized = ObjectConverter.convert(deserialized); } - row[0] = partTblObjectInspectorConverter.convert(serde.deserialize(value)); if (hasVC || isPartitioned) { + row[0] = deserialized; inspectable.o = row; - inspectable.oi = rowObjectInspector; - return inspectable; + } else { + inspectable.o = deserialized; } - inspectable.o = row[0]; - inspectable.oi = tblSerde.getObjectInspector(); + inspectable.oi = currSerDe.getObjectInspector(); return inspectable; } else { currRecReader.close(); @@ -688,13 +526,13 @@ public void clearFetchContext() throws HiveException { context.clear(); context = null; } - this.currTbl = null; this.currPath = null; this.iterPath = null; this.iterPartDesc = null; + this.iterSplits = Iterators.emptyIterator(); } catch (Exception e) { throw new HiveException("Failed with exception " + e.getMessage() - + org.apache.hadoop.util.StringUtils.stringifyException(e)); + + StringUtils.stringifyException(e)); } } @@ -703,25 +541,33 @@ public void clearFetchContext() throws HiveException { */ public void setupContext(List paths) { this.iterPath = paths.iterator(); - if (work.isNotPartitioned()) { - this.currTbl = work.getTblDesc(); + List partitionDescs; + if (!isPartitioned) { + this.iterPartDesc = Iterators.cycle(new PartitionDesc(work.getTblDesc(), null)); } else { this.iterPartDesc = work.getPartDescs(paths).iterator(); } - setupExecContext(); + this.context = setupExecContext(operator, paths); } /** * returns output ObjectInspector, never null */ - public ObjectInspector getOutputObjectInspector() throws HiveException { - if(null != work.getStatRowOI()) { - return work.getStatRowOI(); - } + public ObjectInspector getOutputObjectInspector() { + return outputOI; + } + + private StructObjectInspector setupOutputObjectInspector() throws HiveException { + TableDesc tableDesc = work.getTblDesc(); try { - if (work.isNotPartitioned()) { - return getRowInspectorFromTable(work.getTblDesc()); + tableSerDe = tableDesc.getDeserializer(job, true); + tableOI = (StructObjectInspector) tableSerDe.getObjectInspector(); + if (!isPartitioned) { + return getTableRowOI(tableOI); } + partKeyOI = getPartitionKeyOI(tableDesc); + + PartitionDesc partDesc = new PartitionDesc(tableDesc, null); List listParts = work.getPartDesc(); // Chose the table descriptor if none of the partitions is present. // For eg: consider the query: @@ -729,39 +575,50 @@ public ObjectInspector getOutputObjectInspector() throws HiveException { // Both T1 and T2 and partitioned tables, but T1 does not have any partitions // FetchOperator is invoked for T1, and listParts is empty. In that case, // use T1's schema to get the ObjectInspector. - if (listParts == null || listParts.isEmpty()) { - return getRowInspectorFromPartitionedTable(work.getTblDesc()); + if (listParts == null || listParts.isEmpty() || !needConversion(tableDesc, listParts)) { + return getPartitionedRowOI(tableOI); } + convertedOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI( + tableOI, tableOI, null, false); + return getPartitionedRowOI(convertedOI); + } catch (Exception e) { + throw new HiveException("Failed with exception " + e.getMessage() + + StringUtils.stringifyException(e)); + } + } - // Choose any partition. It's OI needs to be converted to the table OI - // Whenever a new partition is being read, a new converter is being created - PartitionDesc partition = listParts.get(0); - Deserializer tblSerde = partition.getTableDesc().getDeserializerClass().newInstance(); - SerDeUtils.initializeSerDe(tblSerde, job, partition.getTableDesc().getProperties(), null); - - partitionedTableOI = null; - ObjectInspector tableOI = tblSerde.getObjectInspector(); - - // Get the OI corresponding to all the partitions - for (PartitionDesc listPart : listParts) { - partition = listPart; - Deserializer partSerde = listPart.getDeserializer(job); - SerDeUtils.initializeSerDe(partSerde, job, partition.getTableDesc().getProperties(), - listPart.getProperties()); - - partitionedTableOI = ObjectInspectorConverters.getConvertedOI( - partSerde.getObjectInspector(), tableOI, oiSettableProperties); - if (!partitionedTableOI.equals(tableOI)) { - break; + private StructObjectInspector getTableRowOI(StructObjectInspector valueOI) { + return hasVC ? ObjectInspectorFactory.getUnionStructObjectInspector( + Arrays.asList(valueOI, vcsOI)) : valueOI; + } + + private StructObjectInspector getPartitionedRowOI(StructObjectInspector valueOI) { + return ObjectInspectorFactory.getUnionStructObjectInspector( + hasVC ? Arrays.asList(valueOI, partKeyOI, vcsOI) : Arrays.asList(valueOI, partKeyOI)); + } + + private boolean needConversion(PartitionDesc partitionDesc) { + return needConversion(partitionDesc.getTableDesc(), Arrays.asList(partitionDesc)); + } + + // if table and all partitions have the same schema and serde, no need to convert + private boolean needConversion(TableDesc tableDesc, List partDescs) { + Class tableSerDe = tableDesc.getDeserializerClass(); + String[] schemaProps = AnnotationUtils.getAnnotation(tableSerDe, SerDeSpec.class).schemaProps(); + Properties tableProps = tableDesc.getProperties(); + for (PartitionDesc partitionDesc : partDescs) { + if (!tableSerDe.getName().equals(partitionDesc.getDeserializerClassName())) { + return true; + } + Properties partProps = partitionDesc.getProperties(); + for (String schemaProp : schemaProps) { + if (!org.apache.commons.lang3.StringUtils.equals( + tableProps.getProperty(schemaProp), partProps.getProperty(schemaProp))) { + return true; } } - return getRowInspectorFromPartition(partition, partitionedTableOI); - } catch (Exception e) { - throw new HiveException("Failed with exception " + e.getMessage() - + org.apache.hadoop.util.StringUtils.stringifyException(e)); - } finally { - currPart = null; } + return false; } /** @@ -797,11 +654,17 @@ public ObjectInspector getOutputObjectInspector() throws HiveException { // shrinked size for this split. counter part of this in normal mode is // InputSplitShim.shrinkedLength. // what's different is that this is evaluated by unit of row using RecordReader.getPos() - // and that is evaluated by unit of split using InputSplt.getLength(). + // and that is evaluated by unit of split using InputSplit.getLength(). private long shrinkedLength = -1; + private InputFormat inputFormat; + + public FetchInputFormatSplit(InputSplit split, InputFormat inputFormat) { + super(split, inputFormat.getClass().getName()); + this.inputFormat = inputFormat; + } - public FetchInputFormatSplit(InputSplit split, String name) { - super(split, name); + public RecordReader getRecordReader(JobConf job) throws IOException { + return inputFormat.getRecordReader(getInputSplit(), job, Reporter.NULL); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index 4814fc3..ded0849 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -52,6 +52,7 @@ import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc; import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc; import org.apache.hadoop.hive.ql.plan.LimitDesc; +import org.apache.hadoop.hive.ql.plan.ListSinkDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MuxDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -121,6 +122,8 @@ OrcFileMergeOperator.class)); opvec.add(new OpTuple(CommonMergeJoinDesc.class, CommonMergeJoinOperator.class)); + opvec.add(new OpTuple(ListSinkDesc.class, + ListSinkOperator.class)); } static { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionKeySampler.java ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionKeySampler.java index 5d126a5..96f4530 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionKeySampler.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionKeySampler.java @@ -133,20 +133,20 @@ public void writePartitionKeys(Path path, HiveConf conf, JobConf job) throws IOE } // random sampling - public static FetchSampler createSampler(FetchWork work, HiveConf conf, JobConf job, - Operator operator) { + public static FetchOperator createSampler(FetchWork work, HiveConf conf, JobConf job, + Operator operator) throws HiveException { int sampleNum = conf.getIntVar(HiveConf.ConfVars.HIVESAMPLINGNUMBERFORORDERBY); float samplePercent = conf.getFloatVar(HiveConf.ConfVars.HIVESAMPLINGPERCENTFORORDERBY); if (samplePercent < 0.0 || samplePercent > 1.0) { throw new IllegalArgumentException("Percentile value must be within the range of 0 to 1."); } - FetchSampler sampler = new FetchSampler(work, job, operator); + RandomSampler sampler = new RandomSampler(work, job, operator); sampler.setSampleNum(sampleNum); sampler.setSamplePercent(samplePercent); return sampler; } - private static class FetchSampler extends FetchOperator { + private static class RandomSampler extends FetchOperator { private int sampleNum = 1000; private float samplePercent = 0.1f; @@ -154,7 +154,8 @@ public static FetchSampler createSampler(FetchWork work, HiveConf conf, JobConf private int sampled; - public FetchSampler(FetchWork work, JobConf job, Operator operator) { + public RandomSampler(FetchWork work, JobConf job, Operator operator) + throws HiveException { super(work, job, operator, null); } @@ -174,7 +175,7 @@ public boolean pushRow() throws IOException, HiveException { if (sampled < sampleNum) { return true; } - operator.flush(); + flushRow(); return false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java index ca65a8e..94ae932 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java @@ -19,8 +19,6 @@ package org.apache.hadoop.hive.ql.exec.mr; import java.io.IOException; -import java.lang.management.ManagementFactory; -import java.lang.management.MemoryMXBean; import java.net.URLClassLoader; import java.util.Arrays; import java.util.List; @@ -30,8 +28,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.ql.exec.FetchOperator; import org.apache.hadoop.hive.ql.exec.MapOperator; import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.ObjectCache; @@ -54,7 +50,7 @@ /** * ExecMapper is the generic Map class for Hive. Together with ExecReducer it is * the bridge between the map-reduce framework and the Hive operator pipeline at - * execution time. It's main responsabilities are: + * execution time. It's main responsibilities are: * * - Load and setup the operator pipeline from XML * - Run the pipeline by transforming key value pairs to records and forwarding them to the operators @@ -66,7 +62,6 @@ private static final String PLAN_KEY = "__MAP_PLAN__"; private MapOperator mo; - private Map fetchOperators; private OutputCollector oc; private JobConf jc; private boolean abort = false; @@ -74,7 +69,6 @@ public static final Log l4j = LogFactory.getLog(ExecMapper.class); private static boolean done; - // used to log memory usage periodically private MapredLocalWork localWork = null; private boolean isLogInfoEnabled = false; @@ -213,15 +207,6 @@ public void close() { } } - if (fetchOperators != null) { - MapredLocalWork localWork = mo.getConf().getMapRedLocalWork(); - for (Map.Entry entry : fetchOperators.entrySet()) { - Operator forwardOp = localWork - .getAliasToWork().get(entry.getKey()); - forwardOp.close(abort); - } - } - ReportStats rps = new ReportStats(rp, jc); mo.preorderMap(rps); return; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 033f463..d06bdb9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -770,9 +770,14 @@ public Path getDefaultDestDir(Configuration conf) throws LoginException, IOExcep String hdfsDirPathStr = jarPathStr; Path hdfsDirPath = new Path(hdfsDirPathStr); - FileStatus fstatus = fs.getFileStatus(hdfsDirPath); - if (!fstatus.isDir()) { - throw new IOException(ErrorMsg.INVALID_DIR.format(hdfsDirPath.toString())); + try { + FileStatus fstatus = fs.getFileStatus(hdfsDirPath); + if (!fstatus.isDir()) { + throw new IOException(ErrorMsg.INVALID_DIR.format(hdfsDirPath.toString())); + } + } catch (FileNotFoundException e) { + // directory does not exist, create it + fs.mkdirs(hdfsDirPath); } Path retPath = new Path(hdfsDirPath.toString() + "/.hiveJars"); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index 24300d1..8207599 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Map; import java.util.LinkedHashSet; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -32,12 +33,16 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.ListSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.ScriptOperator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -158,7 +163,7 @@ private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) thr // all we can handle is LimitOperator, FilterOperator SelectOperator and final FS // // for non-aggressive mode (minimal) - // 1. samping is not allowed + // 1. sampling is not allowed // 2. for partitioned table, all filters should be targeted to partition column // 3. SelectOperator should use only simple cast/column access private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, @@ -171,53 +176,52 @@ private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, if (!aggressive && qb.hasTableSample(alias)) { return null; } - Table table = pctx.getTopToTable().get(ts); if (table == null) { return null; } ReadEntity parent = PlanUtils.getParentViewInfo(alias, pctx.getViewAliasToInput()); if (!table.isPartitioned()) { - return checkOperators(new FetchData(parent, table, splitSample), ts, aggressive, false); + FetchData fetch = new FetchData(ts, parent, table, splitSample); + return checkOperators(fetch, aggressive, false); } boolean bypassFilter = false; if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVEOPTPPD)) { ExprNodeDesc pruner = pctx.getOpToPartPruner().get(ts); - bypassFilter = PartitionPruner.onlyContainsPartnCols(table, pruner); - } - if (aggressive || bypassFilter) { - PrunedPartitionList pruned = pctx.getPrunedPartitions(alias, ts); - if (aggressive || !pruned.hasUnknownPartitions()) { - bypassFilter &= !pruned.hasUnknownPartitions(); - return checkOperators(new FetchData(parent, table, pruned, splitSample, bypassFilter), ts, - aggressive, bypassFilter); + if (PartitionPruner.onlyContainsPartnCols(table, pruner)) { + bypassFilter = !pctx.getPrunedPartitions(alias, ts).hasUnknownPartitions(); } } - return null; + if (!aggressive && !bypassFilter) { + return null; + } + PrunedPartitionList partitions = pctx.getPrunedPartitions(alias, ts); + FetchData fetch = new FetchData(ts, parent, table, partitions, splitSample, bypassFilter); + return checkOperators(fetch, aggressive, bypassFilter); } - private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean aggressive, - boolean bypassFilter) { + private FetchData checkOperators(FetchData fetch, boolean aggressive, boolean bypassFilter) { + if (aggressive) { + return isConvertible(fetch) ? fetch : null; + } + return checkOperators(fetch, fetch.scanOp, bypassFilter); + } + + private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean bypassFilter) { if (ts.getChildOperators().size() != 1) { return null; } Operator op = ts.getChildOperators().get(0); for (; ; op = op.getChildOperators().get(0)) { if (op instanceof SelectOperator) { - if (!aggressive) { - if (!checkExpressions((SelectOperator) op)) { - break; - } + if (!checkExpressions((SelectOperator) op)) { + return null; } continue; } - if (aggressive) { - if (!(op instanceof LimitOperator || op instanceof FilterOperator)) { - break; - } - } else if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter))) { + if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter))) { break; } @@ -227,7 +231,6 @@ private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean } if (op instanceof FileSinkOperator) { - fetch.scanOp = ts; fetch.fileSink = op; return fetch; } @@ -237,6 +240,9 @@ private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean private boolean checkExpressions(SelectOperator op) { SelectDesc desc = op.getConf(); + if (desc.isSelectStar() || desc.isSelStarNoCompute()) { + return true; + } for (ExprNodeDesc expr : desc.getColList()) { if (!checkExpression(expr)) { return false; @@ -264,22 +270,53 @@ private boolean checkExpression(ExprNodeDesc expr) { return false; } + private boolean isConvertible(FetchData fetch) { + return isConvertible(fetch, fetch.scanOp, new HashSet>()); + } + + private boolean isConvertible(FetchData fetch, Operator operator, Set> traversed) { + if (operator instanceof ReduceSinkOperator || operator instanceof CommonJoinOperator + || operator instanceof ScriptOperator) { + return false; + } + if (!traversed.add(operator)) { + return true; + } + if (operator.getNumChild() == 0) { + if (operator instanceof FileSinkOperator) { + fetch.fileSink = operator; + return true; + } + return false; + } + for (Operator child : operator.getChildOperators()) { + if (!traversed.containsAll(child.getParentOperators())){ + continue; + } + if (!isConvertible(fetch, child, traversed)) { + return false; + } + } + return true; + } + private class FetchData { + // source table scan + private final TableScanOperator scanOp; private final ReadEntity parent; + private final Table table; private final SplitSample splitSample; private final PrunedPartitionList partsList; - private final LinkedHashSet inputs = new LinkedHashSet(); + private final Set inputs = new LinkedHashSet(); private final boolean onlyPruningFilter; - // source table scan - private TableScanOperator scanOp; - // this is always non-null when conversion is completed private Operator fileSink; - private FetchData(ReadEntity parent, Table table, SplitSample splitSample) { + private FetchData(TableScanOperator scanOp, ReadEntity parent, Table table, SplitSample splitSample) { + this.scanOp = scanOp; this.parent = parent; this.table = table; this.partsList = null; @@ -287,8 +324,9 @@ private FetchData(ReadEntity parent, Table table, SplitSample splitSample) { this.onlyPruningFilter = false; } - private FetchData(ReadEntity parent, Table table, PrunedPartitionList partsList, + private FetchData(TableScanOperator scanOp, ReadEntity parent, Table table, PrunedPartitionList partsList, SplitSample splitSample, boolean bypassFilter) { + this.scanOp = scanOp; this.parent = parent; this.table = table; this.partsList = partsList; @@ -306,7 +344,7 @@ public boolean hasOnlyPruningFilter() { private FetchWork convertToWork() throws HiveException { inputs.clear(); if (!table.isPartitioned()) { - inputs.add(new ReadEntity(table, parent, parent == null)); + inputs.add(new ReadEntity(table, parent, !table.isView() && parent == null)); FetchWork work = new FetchWork(table.getPath(), Utilities.getTableDesc(table)); PlanUtils.configureInputJobPropertiesForStorageHandler(work.getTblDesc()); work.setSplitSample(splitSample); @@ -399,8 +437,8 @@ private long getFileLength(JobConf conf, Path path, Class } public static ListSinkOperator replaceFSwithLS(Operator fileSink, String nullFormat) { - ListSinkOperator sink = new ListSinkOperator(); - sink.setConf(new ListSinkDesc(nullFormat)); + ListSinkDesc desc = new ListSinkDesc(nullFormat); + ListSinkOperator sink = (ListSinkOperator) OperatorFactory.get(desc); sink.setParentOperators(new ArrayList>()); Operator parent = fileSink.getParentOperators().get(0); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index 99b2950..58f89c5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -389,8 +389,11 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx } calciteLiteral = rexBuilder.makeTimestampLiteral(c, RelDataType.PRECISION_NOT_SPECIFIED); break; - case BINARY: case VOID: + calciteLiteral = cluster.getRexBuilder().makeLiteral(null, + cluster.getTypeFactory().createSqlType(SqlTypeName.NULL), true); + break; + case BINARY: case UNKNOWN: default: throw new RuntimeException("UnSupported Literal"); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java index 3df1c26..9c26907 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java @@ -24,6 +24,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -391,8 +392,9 @@ public static boolean cannotConvert(long aliasKnownSize, List> listTasks = new ArrayList>(); // create task to aliases mapping and alias to input file mapping for resolver + // Must be deterministic order map for consistent q-test output across Java versions HashMap, Set> taskToAliases = - new HashMap, Set>(); + new LinkedHashMap, Set>(); HashMap> pathToAliases = currWork.getPathToAliases(); Map> aliasToWork = currWork.getAliasToWork(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java index c37b6fd..c0a72b6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java @@ -200,8 +200,9 @@ private void processCurrentTask(Task currTask, .getResolverCtx(); HashMap, Set> taskToAliases = context.getTaskToAliases(); // to avoid concurrent modify the hashmap + // Must be deterministic order map for consistent q-test output across Java versions HashMap, Set> newTaskToAliases = - new HashMap, Set>(); + new LinkedHashMap, Set>(); // reset the resolver for (Map.Entry, Set> entry : taskToAliases.entrySet()) { Task task = entry.getKey(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java index 3fcccb0..6f92b13 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java @@ -21,6 +21,7 @@ import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -265,8 +266,9 @@ private boolean isEligibleForOptimization(SMBMapJoinOperator originalSMBJoinOp) List> listTasks = new ArrayList>(); // create task to aliases mapping and alias to input file mapping for resolver + // Must be deterministic order map for consistent q-test output across Java versions HashMap, Set> taskToAliases = - new HashMap, Set>(); + new LinkedHashMap, Set>(); // Note that pathToAlias will behave as if the original plan was a join plan HashMap> pathToAliases = currJoinWork.getMapWork().getPathToAliases(); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java new file mode 100644 index 0000000..5d72e15 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -0,0 +1,2663 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.UndeclaredThrowableException; +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; + +import org.antlr.runtime.tree.TreeVisitor; +import org.antlr.runtime.tree.TreeVisitorAction; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelOptQuery; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptSchema; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.plan.hep.HepMatchOrder; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.plan.hep.HepProgramBuilder; +import org.apache.calcite.rel.InvalidRelException; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationImpl; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.core.SemiJoin; +import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; +import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.rules.FilterAggregateTransposeRule; +import org.apache.calcite.rel.rules.FilterMergeRule; +import org.apache.calcite.rel.rules.FilterProjectTransposeRule; +import org.apache.calcite.rel.rules.JoinPushTransitivePredicatesRule; +import org.apache.calcite.rel.rules.JoinToMultiJoinRule; +import org.apache.calcite.rel.rules.LoptOptimizeJoinRule; +import org.apache.calcite.rel.rules.SemiJoinFilterTransposeRule; +import org.apache.calcite.rel.rules.SemiJoinJoinTransposeRule; +import org.apache.calcite.rel.rules.SemiJoinProjectTransposeRule; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexFieldCollation; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexWindowBound; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlExplainLevel; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlWindow; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql2rel.RelFieldTrimmer; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.util.CompositeList; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.ImmutableIntList; +import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.QueryProperties; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinCondTypeCheckProcFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinTypeCheckCtx; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec; +import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.RangeBoundarySpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.ImmutableList.Builder; + +public class CalcitePlanner extends SemanticAnalyzer { + private AtomicInteger noColsMissingStats = new AtomicInteger(0); + private List topLevelFieldSchema; + private SemanticException semanticException; + private boolean runCBO = true; + + public CalcitePlanner(HiveConf conf) throws SemanticException { + super(conf); + if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED)) { + runCBO = false; + } + } + + @Override + @SuppressWarnings("nls") + public void analyzeInternal(ASTNode ast) throws SemanticException { + if (runCBO) { + PreCboCtx cboCtx = new PreCboCtx(); + super.analyzeInternal(ast, cboCtx); + } else { + super.analyzeInternal(ast); + } + } + + @SuppressWarnings("rawtypes") + Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException { + Operator sinkOp = null; + boolean skipCalcitePlan = false; + + if (!runCBO) { + skipCalcitePlan = true; + } else { + PreCboCtx cboCtx = (PreCboCtx) plannerCtx; + + // Note: for now, we don't actually pass the queryForCbo to CBO, because + // it accepts qb, not AST, and can also access all the private stuff in + // SA. We rely on the fact that CBO ignores the unknown tokens (create + // table, destination), so if the query is otherwise ok, it is as if we + // did remove those and gave CBO the proper AST. That is kinda hacky. + ASTNode queryForCbo = ast; + if (cboCtx.type == PreCboCtx.Type.CTAS) { + queryForCbo = cboCtx.nodeOfInterest; // nodeOfInterest is the query + } + runCBO = canHandleAstForCbo(queryForCbo, getQB(), cboCtx); + + if (runCBO) { + disableJoinMerge = true; + boolean reAnalyzeAST = false; + + try { + // 1. Gen Optimized AST + ASTNode newAST = getOptimizedAST(); + + // 1.1. Fix up the query for insert/ctas + newAST = fixUpCtasAndInsertAfterCbo(ast, newAST, cboCtx); + + // 2. Regen OP plan from optimized AST + init(false); + if (cboCtx.type == PreCboCtx.Type.CTAS) { + // Redo create-table analysis, because it's not part of doPhase1. + setAST(newAST); + newAST = reAnalyzeCtasAfterCbo(newAST); + } + Phase1Ctx ctx_1 = initPhase1Ctx(); + if (!doPhase1(newAST, getQB(), ctx_1, null)) { + throw new RuntimeException("Couldn't do phase1 on CBO optimized query plan"); + } + // unfortunately making prunedPartitions immutable is not possible + // here with SemiJoins not all tables are costed in CBO, so their + // PartitionList is not evaluated until the run phase. + getMetaData(getQB()); + + disableJoinMerge = false; + sinkOp = genPlan(getQB()); + LOG.info("CBO Succeeded; optimized logical plan."); + LOG.debug(newAST.dump()); + } catch (Exception e) { + boolean isMissingStats = noColsMissingStats.get() > 0; + if (isMissingStats) { + LOG.error("CBO failed due to missing column stats (see previous errors), skipping CBO"); + } else { + LOG.error("CBO failed, skipping CBO. ", e); + } + if (!conf.getBoolVar(ConfVars.HIVE_IN_TEST) || isMissingStats + || e instanceof CalciteSemanticException) { + reAnalyzeAST = true; + } else if (e instanceof SemanticException) { + throw (SemanticException) e; + } else if (e instanceof RuntimeException) { + throw (RuntimeException) e; + } else { + throw new SemanticException(e); + } + } finally { + runCBO = false; + disableJoinMerge = false; + if (reAnalyzeAST) { + init(true); + prunedPartitions.clear(); + // Assumption: At this point Parse Tree gen & resolution will always + // be true (since we started out that way). + super.genResolvedParseTree(ast, new PlannerContext()); + skipCalcitePlan = true; + } + } + } else { + skipCalcitePlan = true; + } + } + + if (skipCalcitePlan) { + sinkOp = super.genOPTree(ast, plannerCtx); + } + + return sinkOp; + } + + /** + * Can CBO handle the given AST? + * + * @param ast + * Top level AST + * @param qb + * top level QB corresponding to the AST + * @param cboCtx + * @param semAnalyzer + * @return boolean + * + * Assumption:
+ * If top level QB is query then everything below it must also be + * Query. + */ + boolean canHandleAstForCbo(ASTNode ast, QB qb, PreCboCtx cboCtx) { + int root = ast.getToken().getType(); + boolean needToLogMessage = STATIC_LOG.isInfoEnabled(); + boolean isSupportedRoot = root == HiveParser.TOK_QUERY || root == HiveParser.TOK_EXPLAIN + || qb.isCTAS(); + boolean isSupportedType = qb.getIsQuery() || qb.isCTAS() + || cboCtx.type == PreCboCtx.Type.INSERT; + boolean noBadTokens = HiveCalciteUtil.validateASTForUnsupportedTokens(ast); + boolean result = isSupportedRoot && isSupportedType && getCreateViewDesc() == null + && noBadTokens; + + if (!result) { + if (needToLogMessage) { + String msg = ""; + if (!isSupportedRoot) { + msg += "doesn't have QUERY or EXPLAIN as root and not a CTAS; "; + } + if (!isSupportedType) { + msg += "is not a query, CTAS, or insert; "; + } + if (getCreateViewDesc() != null) { + msg += "has create view; "; + } + if (!noBadTokens) { + msg += "has unsupported tokens; "; + } + + if (msg.isEmpty()) { + msg += "has some unspecified limitations; "; + } + STATIC_LOG.info("Not invoking CBO because the statement " + + msg.substring(0, msg.length() - 2)); + } + return false; + } + // Now check QB in more detail. canHandleQbForCbo returns null if query can + // be handled. + String msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, needToLogMessage); + if (msg == null) { + return true; + } + if (needToLogMessage) { + STATIC_LOG.info("Not invoking CBO because the statement " + + msg.substring(0, msg.length() - 2)); + } + return false; + } + + /** + * Checks whether Calcite can handle the query. + * + * @param queryProperties + * @param conf + * @param topLevelQB + * Does QB corresponds to top most query block? + * @param verbose + * Whether return value should be verbose in case of failure. + * @return null if the query can be handled; non-null reason string if it + * cannot be. + * + * Assumption:
+ * 1. If top level QB is query then everything below it must also be + * Query
+ * 2. Nested Subquery will return false for qbToChk.getIsQuery() + */ + static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, + boolean topLevelQB, boolean verbose) { + boolean isInTest = conf.getBoolVar(ConfVars.HIVE_IN_TEST); + boolean isStrictTest = isInTest + && !conf.getVar(ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("nonstrict"); + boolean hasEnoughJoins = !topLevelQB || (queryProperties.getJoinCount() > 1) || isInTest; + + if (!isStrictTest && hasEnoughJoins && !queryProperties.hasClusterBy() + && !queryProperties.hasDistributeBy() && !queryProperties.hasSortBy() + && !queryProperties.hasPTF() && !queryProperties.usesScript() + && !queryProperties.hasMultiDestQuery() && !queryProperties.hasLateralViews()) { + // Ok to run CBO. + return null; + } + + // Not ok to run CBO, build error message. + String msg = ""; + if (verbose) { + if (isStrictTest) + msg += "is in test running in mode other than nonstrict; "; + if (!hasEnoughJoins) + msg += "has too few joins; "; + if (queryProperties.hasClusterBy()) + msg += "has cluster by; "; + if (queryProperties.hasDistributeBy()) + msg += "has distribute by; "; + if (queryProperties.hasSortBy()) + msg += "has sort by; "; + if (queryProperties.hasPTF()) + msg += "has PTF; "; + if (queryProperties.usesScript()) + msg += "uses scripts; "; + if (queryProperties.hasMultiDestQuery()) + msg += "is a multi-destination query; "; + if (queryProperties.hasLateralViews()) + msg += "has lateral views; "; + + if (msg.isEmpty()) + msg += "has some unspecified limitations; "; + } + return msg; + } + + @Override + boolean continueJoinMerge() { + return !runCBO; + } + + @Override + String fixCtasColumnName(String colName) { + if (runCBO) { + int lastDot = colName.lastIndexOf('.'); + if (lastDot < 0) + return colName; // alias is not fully qualified + String nqColumnName = colName.substring(lastDot + 1); + STATIC_LOG.debug("Replacing " + colName + " (produced by CBO) by " + nqColumnName); + return nqColumnName; + } + + return super.fixCtasColumnName(colName); + } + + /** + * The context that doPhase1 uses to populate information pertaining to CBO + * (currently, this is used for CTAS and insert-as-select). + */ + static class PreCboCtx extends PlannerContext { + enum Type { + NONE, INSERT, CTAS, UNEXPECTED + } + + private ASTNode nodeOfInterest; + private Type type = Type.NONE; + + private void set(Type type, ASTNode ast) { + if (this.type != Type.NONE) { + STATIC_LOG.warn("Setting " + type + " when already " + this.type + "; node " + ast.dump() + + " vs old node " + nodeOfInterest.dump()); + this.type = Type.UNEXPECTED; + return; + } + this.type = type; + this.nodeOfInterest = ast; + } + + @Override + void setCTASToken(ASTNode child) { + set(PreCboCtx.Type.CTAS, child); + } + + @Override + void setInsertToken(ASTNode ast, boolean isTmpFileDest) { + if (!isTmpFileDest) { + set(PreCboCtx.Type.INSERT, ast); + } + } + } + + ASTNode fixUpCtasAndInsertAfterCbo(ASTNode originalAst, ASTNode newAst, PreCboCtx cboCtx) + throws SemanticException { + switch (cboCtx.type) { + + case NONE: + // nothing to do + return newAst; + + case CTAS: { + // Patch the optimized query back into original CTAS AST, replacing the + // original query. + replaceASTChild(cboCtx.nodeOfInterest, newAst); + return originalAst; + } + + case INSERT: { + // We need to patch the dest back to original into new query. + // This makes assumptions about the structure of the AST. + ASTNode newDest = new ASTSearcher().simpleBreadthFirstSearch(newAst, HiveParser.TOK_QUERY, + HiveParser.TOK_INSERT, HiveParser.TOK_DESTINATION); + if (newDest == null) { + LOG.error("Cannot find destination after CBO; new ast is " + newAst.dump()); + throw new SemanticException("Cannot find destination after CBO"); + } + replaceASTChild(newDest, cboCtx.nodeOfInterest); + return newAst; + } + + default: + throw new AssertionError("Unexpected type " + cboCtx.type); + } + } + + ASTNode reAnalyzeCtasAfterCbo(ASTNode newAst) throws SemanticException { + // analyzeCreateTable uses this.ast, but doPhase1 doesn't, so only reset it + // here. + newAst = analyzeCreateTable(newAst, getQB(), null); + if (newAst == null) { + LOG.error("analyzeCreateTable failed to initialize CTAS after CBO;" + " new ast is " + + getAST().dump()); + throw new SemanticException("analyzeCreateTable failed to initialize CTAS after CBO"); + } + return newAst; + } + + /** + * Performs breadth-first search of the AST for a nested set of tokens. Tokens + * don't have to be each others' direct children, they can be separated by + * layers of other tokens. For each token in the list, the first one found is + * matched and there's no backtracking; thus, if AST has multiple instances of + * some token, of which only one matches, it is not guaranteed to be found. We + * use this for simple things. Not thread-safe - reuses searchQueue. + */ + static class ASTSearcher { + private final LinkedList searchQueue = new LinkedList(); + + public ASTNode simpleBreadthFirstSearch(ASTNode ast, int... tokens) { + searchQueue.clear(); + searchQueue.add(ast); + for (int i = 0; i < tokens.length; ++i) { + boolean found = false; + int token = tokens[i]; + while (!searchQueue.isEmpty() && !found) { + ASTNode next = searchQueue.poll(); + found = next.getType() == token; + if (found) { + if (i == tokens.length - 1) + return next; + searchQueue.clear(); + } + for (int j = 0; j < next.getChildCount(); ++j) { + searchQueue.add((ASTNode) next.getChild(j)); + } + } + if (!found) + return null; + } + return null; + } + } + + private static void replaceASTChild(ASTNode child, ASTNode newChild) { + ASTNode parent = (ASTNode) child.parent; + int childIndex = child.childIndex; + parent.deleteChild(childIndex); + parent.insertChild(childIndex, newChild); + } + + /** + * Get Optimized AST for the given QB tree in the semAnalyzer. + * + * @return Optimized operator tree translated in to Hive AST + * @throws SemanticException + */ + ASTNode getOptimizedAST() throws SemanticException { + ASTNode optiqOptimizedAST = null; + RelNode optimizedOptiqPlan = null; + CalcitePlannerAction calcitePlannerAction = new CalcitePlannerAction(prunedPartitions); + + try { + optimizedOptiqPlan = Frameworks.withPlanner(calcitePlannerAction, Frameworks + .newConfigBuilder().typeSystem(new HiveTypeSystemImpl()).build()); + } catch (Exception e) { + rethrowCalciteException(e); + throw new AssertionError("rethrowCalciteException didn't throw for " + e.getMessage()); + } + optiqOptimizedAST = ASTConverter.convert(optimizedOptiqPlan, topLevelFieldSchema); + + return optiqOptimizedAST; + } + + /*** + * Unwraps Calcite Invocation exceptions coming meta data provider chain and + * obtains the real cause. + * + * @param Exception + */ + private void rethrowCalciteException(Exception e) throws SemanticException { + Throwable first = (semanticException != null) ? semanticException : e, current = first, cause = current + .getCause(); + while (cause != null) { + Throwable causeOfCause = cause.getCause(); + if (current == first && causeOfCause == null && isUselessCause(first)) { + // "cause" is a root cause, and "e"/"first" is a useless + // exception it's wrapped in. + first = cause; + break; + } else if (causeOfCause != null && isUselessCause(cause) + && ExceptionHelper.resetCause(current, causeOfCause)) { + // "cause" was a useless intermediate cause and was replace it + // with its own cause. + cause = causeOfCause; + continue; // do loop once again with the new cause of "current" + } + current = cause; + cause = current.getCause(); + } + + if (first instanceof RuntimeException) { + throw (RuntimeException) first; + } else if (first instanceof SemanticException) { + throw (SemanticException) first; + } + throw new RuntimeException(first); + } + + private static class ExceptionHelper { + private static final Field CAUSE_FIELD = getField(Throwable.class, "cause"), + TARGET_FIELD = getField(InvocationTargetException.class, "target"), + MESSAGE_FIELD = getField(Throwable.class, "detailMessage"); + + private static Field getField(Class clazz, String name) { + try { + Field f = clazz.getDeclaredField(name); + f.setAccessible(true); + return f; + } catch (Throwable t) { + return null; + } + } + + public static boolean resetCause(Throwable target, Throwable newCause) { + try { + if (MESSAGE_FIELD == null) + return false; + Field field = (target instanceof InvocationTargetException) ? TARGET_FIELD : CAUSE_FIELD; + if (field == null) + return false; + + Throwable oldCause = target.getCause(); + String oldMsg = target.getMessage(); + field.set(target, newCause); + if (oldMsg != null && oldMsg.equals(oldCause.toString())) { + MESSAGE_FIELD.set(target, newCause == null ? null : newCause.toString()); + } + } catch (Throwable se) { + return false; + } + return true; + } + } + + private boolean isUselessCause(Throwable t) { + return t instanceof RuntimeException || t instanceof InvocationTargetException + || t instanceof UndeclaredThrowableException; + } + + /** + * Code responsible for Calcite plan generation and optimization. + */ + private class CalcitePlannerAction implements Frameworks.PlannerAction { + private RelOptCluster cluster; + private RelOptSchema relOptSchema; + private Map partitionCache; + + // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or + // just last one. + LinkedHashMap relToHiveRR = new LinkedHashMap(); + LinkedHashMap> relToHiveColNameCalcitePosMap = new LinkedHashMap>(); + + CalcitePlannerAction(Map partitionCache) { + this.partitionCache = partitionCache; + } + + @Override + public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlus rootSchema) { + RelNode calciteGenPlan = null; + RelNode calcitePreCboPlan = null; + RelNode calciteOptimizedPlan = null; + + /* + * recreate cluster, so that it picks up the additional traitDef + */ + RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(); + final RelOptQuery query = new RelOptQuery(planner); + final RexBuilder rexBuilder = cluster.getRexBuilder(); + cluster = query.createCluster(rexBuilder.getTypeFactory(), rexBuilder); + + this.cluster = cluster; + this.relOptSchema = relOptSchema; + + // 1. Gen Calcite Plan + try { + calciteGenPlan = genLogicalPlan(getQB(), true); + topLevelFieldSchema = SemanticAnalyzer.convertRowSchemaToResultSetSchema( + relToHiveRR.get(calciteGenPlan), + HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES)); + } catch (SemanticException e) { + semanticException = e; + throw new RuntimeException(e); + } + + // 2. Apply Pre Join Order optimizations + calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan, + HiveDefaultRelMetadataProvider.INSTANCE); + + // 3. Appy Join Order Optimizations using Hep Planner (MST Algorithm) + List list = Lists.newArrayList(); + list.add(HiveDefaultRelMetadataProvider.INSTANCE); + RelTraitSet desiredTraits = cluster + .traitSetOf(HiveRelNode.CONVENTION, RelCollationImpl.EMPTY); + + HepProgram hepPgm = null; + HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP) + .addRuleInstance(new JoinToMultiJoinRule(HiveJoin.class)); + hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoin.HIVE_JOIN_FACTORY, + HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY)); + + hepPgm = hepPgmBldr.build(); + HepPlanner hepPlanner = new HepPlanner(hepPgm); + + hepPlanner.registerMetadataProviders(list); + RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); + cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); + + RelNode rootRel = calcitePreCboPlan; + hepPlanner.setRoot(rootRel); + if (!calcitePreCboPlan.getTraitSet().equals(desiredTraits)) { + rootRel = hepPlanner.changeTraits(calcitePreCboPlan, desiredTraits); + } + hepPlanner.setRoot(rootRel); + + calciteOptimizedPlan = hepPlanner.findBestExp(); + + if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) { + LOG.debug("CBO Planning details:\n"); + LOG.debug("Original Plan:\n" + RelOptUtil.toString(calciteGenPlan)); + LOG.debug("Plan After PPD, PartPruning, ColumnPruning:\n" + + RelOptUtil.toString(calcitePreCboPlan)); + LOG.debug("Plan After Join Reordering:\n" + + RelOptUtil.toString(calciteOptimizedPlan, SqlExplainLevel.ALL_ATTRIBUTES)); + } + + return calciteOptimizedPlan; + } + + /** + * Perform all optimizations before Join Ordering. + * + * @param basePlan + * original plan + * @param mdProvider + * meta data provider + * @return + */ + private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProvider mdProvider) { + + // TODO: Decorelation of subquery should be done before attempting + // Partition Pruning; otherwise Expression evaluation may try to execute + // corelated sub query. + + // 1. Push Down Semi Joins + basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE, + SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); + + // 2. PPD + basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTransposeRule( + Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, + HiveProject.DEFAULT_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( + HiveFilter.DEFAULT_FILTER_FACTORY), + new FilterMergeRule(HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN, + HiveFilterJoinRule.FILTER_ON_JOIN, new FilterAggregateTransposeRule(Filter.class, + HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class)); + + // 3. Transitive inference & Partition Pruning + basePlan = hepPlan(basePlan, false, mdProvider, new JoinPushTransitivePredicatesRule( + Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), + // TODO: Enable it after CALCITE-407 is fixed + // RemoveTrivialProjectRule.INSTANCE, + new HivePartitionPruneRule(conf)); + + // 4. Projection Pruning + RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, + HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, + RelFactories.DEFAULT_SEMI_JOIN_FACTORY, HiveSort.HIVE_SORT_REL_FACTORY, + HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); + basePlan = fieldTrimmer.trim(basePlan); + + // 5. Rerun PPD through Project as column pruning would have introduced DT + // above scans + basePlan = hepPlan(basePlan, true, mdProvider, + new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, + HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY)); + + return basePlan; + } + + /** + * Run the HEP Planner with the given rule set. + * + * @param basePlan + * @param followPlanChanges + * @param mdProvider + * @param rules + * @return optimized RelNode + */ + private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, + RelMetadataProvider mdProvider, RelOptRule... rules) { + + RelNode optimizedRelNode = basePlan; + HepProgramBuilder programBuilder = new HepProgramBuilder(); + if (followPlanChanges) { + programBuilder.addMatchOrder(HepMatchOrder.TOP_DOWN); + programBuilder = programBuilder.addRuleCollection(ImmutableList.copyOf(rules)); + } else { + // TODO: Should this be also TOP_DOWN? + for (RelOptRule r : rules) + programBuilder.addRuleInstance(r); + } + + HepPlanner planner = new HepPlanner(programBuilder.build()); + List list = Lists.newArrayList(); + list.add(mdProvider); + planner.registerMetadataProviders(list); + RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); + basePlan.getCluster().setMetadataProvider( + new CachingRelMetadataProvider(chainedProvider, planner)); + + planner.setRoot(basePlan); + optimizedRelNode = planner.findBestExp(); + + return optimizedRelNode; + } + + @SuppressWarnings("nls") + private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode leftRel, + String rightalias, RelNode rightRel) throws SemanticException { + HiveUnion unionRel = null; + + // 1. Get Row Resolvers, Column map for original left and right input of + // Union Rel + RowResolver leftRR = this.relToHiveRR.get(leftRel); + RowResolver rightRR = this.relToHiveRR.get(rightRel); + HashMap leftmap = leftRR.getFieldMap(leftalias); + HashMap rightmap = rightRR.getFieldMap(rightalias); + + // 2. Validate that Union is feasible according to Hive (by using type + // info from RR) + if (leftmap.size() != rightmap.size()) { + throw new SemanticException("Schema of both sides of union should match."); + } + + ASTNode tabref = getQB().getAliases().isEmpty() ? null : getQB().getParseInfo() + .getSrcForAlias(getQB().getAliases().get(0)); + for (Map.Entry lEntry : leftmap.entrySet()) { + String field = lEntry.getKey(); + ColumnInfo lInfo = lEntry.getValue(); + ColumnInfo rInfo = rightmap.get(field); + if (rInfo == null) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(tabref, + "Schema of both sides of union should match. " + rightalias + + " does not have the field " + field)); + } + if (lInfo == null) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(tabref, + "Schema of both sides of union should match. " + leftalias + + " does not have the field " + field)); + } + if (!lInfo.getInternalName().equals(rInfo.getInternalName())) { + throw new CalciteSemanticException(SemanticAnalyzer.generateErrorMessage( + tabref, + "Schema of both sides of union should match: field " + field + ":" + + " appears on the left side of the UNION at column position: " + + SemanticAnalyzer.getPositionFromInternalName(lInfo.getInternalName()) + + ", and on the right side of the UNION at column position: " + + SemanticAnalyzer.getPositionFromInternalName(rInfo.getInternalName()) + + ". Column positions should match for a UNION")); + } + // try widening coversion, otherwise fail union + TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), + rInfo.getType()); + if (commonTypeInfo == null) { + throw new CalciteSemanticException(SemanticAnalyzer.generateErrorMessage(tabref, + "Schema of both sides of union should match: Column " + field + " is of type " + + lInfo.getType().getTypeName() + " on first table and type " + + rInfo.getType().getTypeName() + " on second table")); + } + } + + // 3. construct Union Output RR using original left & right Input + RowResolver unionoutRR = new RowResolver(); + for (Map.Entry lEntry : leftmap.entrySet()) { + String field = lEntry.getKey(); + ColumnInfo lInfo = lEntry.getValue(); + ColumnInfo rInfo = rightmap.get(field); + ColumnInfo unionColInfo = new ColumnInfo(lInfo); + unionColInfo.setTabAlias(unionalias); + unionColInfo.setType(FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), + rInfo.getType())); + unionoutRR.put(unionalias, field, unionColInfo); + } + + // 4. Determine which columns requires cast on left/right input (Calcite + // requires exact types on both sides of union) + boolean leftNeedsTypeCast = false; + boolean rightNeedsTypeCast = false; + List leftProjs = new ArrayList(); + List rightProjs = new ArrayList(); + List leftRowDT = leftRel.getRowType().getFieldList(); + List rightRowDT = rightRel.getRowType().getFieldList(); + + RelDataType leftFieldDT; + RelDataType rightFieldDT; + RelDataType unionFieldDT; + for (int i = 0; i < leftRowDT.size(); i++) { + leftFieldDT = leftRowDT.get(i).getType(); + rightFieldDT = rightRowDT.get(i).getType(); + if (!leftFieldDT.equals(rightFieldDT)) { + unionFieldDT = TypeConverter.convert(unionoutRR.getColumnInfos().get(i).getType(), + cluster.getTypeFactory()); + if (!unionFieldDT.equals(leftFieldDT)) { + leftNeedsTypeCast = true; + } + leftProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT, + cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true)); + + if (!unionFieldDT.equals(rightFieldDT)) { + rightNeedsTypeCast = true; + } + rightProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT, + cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true)); + } else { + leftProjs.add(cluster.getRexBuilder().ensureType(leftFieldDT, + cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true)); + rightProjs.add(cluster.getRexBuilder().ensureType(rightFieldDT, + cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true)); + } + } + + // 5. Introduce Project Rel above original left/right inputs if cast is + // needed for type parity + RelNode unionLeftInput = leftRel; + RelNode unionRightInput = rightRel; + if (leftNeedsTypeCast) { + unionLeftInput = HiveProject.create(leftRel, leftProjs, leftRel.getRowType() + .getFieldNames()); + } + if (rightNeedsTypeCast) { + unionRightInput = HiveProject.create(rightRel, rightProjs, rightRel.getRowType() + .getFieldNames()); + } + + // 6. Construct Union Rel + Builder bldr = new ImmutableList.Builder(); + bldr.add(unionLeftInput); + bldr.add(unionRightInput); + unionRel = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build()); + + relToHiveRR.put(unionRel, unionoutRR); + relToHiveColNameCalcitePosMap.put(unionRel, + this.buildHiveToCalciteColumnMap(unionoutRR, unionRel)); + + return unionRel; + } + + private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJoinType, + ASTNode joinCond) throws SemanticException { + RelNode joinRel = null; + + // 1. construct the RowResolver for the new Join Node by combining row + // resolvers from left, right + RowResolver leftRR = this.relToHiveRR.get(leftRel); + RowResolver rightRR = this.relToHiveRR.get(rightRel); + RowResolver joinRR = null; + + if (hiveJoinType != JoinType.LEFTSEMI) { + joinRR = RowResolver.getCombinedRR(leftRR, rightRR); + } else { + joinRR = new RowResolver(); + if (!RowResolver.add(joinRR, leftRR)) { + LOG.warn("Duplicates detected when adding columns to RR: see previous message"); + } + } + + // 2. Construct ExpressionNodeDesc representing Join Condition + RexNode calciteJoinCond = null; + if (joinCond != null) { + JoinTypeCheckCtx jCtx = new JoinTypeCheckCtx(leftRR, rightRR, hiveJoinType); + Map exprNodes = JoinCondTypeCheckProcFactory.genExprNode(joinCond, + jCtx); + if (jCtx.getError() != null) + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(jCtx.getErrorSrcNode(), + jCtx.getError())); + + ExprNodeDesc joinCondnExprNode = exprNodes.get(joinCond); + + List inputRels = new ArrayList(); + inputRels.add(leftRel); + inputRels.add(rightRel); + calciteJoinCond = RexNodeConverter.convert(cluster, joinCondnExprNode, inputRels, + relToHiveRR, relToHiveColNameCalcitePosMap, false); + } else { + calciteJoinCond = cluster.getRexBuilder().makeLiteral(true); + } + + // 3. Validate that join condition is legal (i.e no function refering to + // both sides of join, only equi join) + // TODO: Join filter handling (only supported for OJ by runtime or is it + // supported for IJ as well) + + // 4. Construct Join Rel Node + boolean leftSemiJoin = false; + JoinRelType calciteJoinType; + switch (hiveJoinType) { + case LEFTOUTER: + calciteJoinType = JoinRelType.LEFT; + break; + case RIGHTOUTER: + calciteJoinType = JoinRelType.RIGHT; + break; + case FULLOUTER: + calciteJoinType = JoinRelType.FULL; + break; + case LEFTSEMI: + calciteJoinType = JoinRelType.INNER; + leftSemiJoin = true; + break; + case INNER: + default: + calciteJoinType = JoinRelType.INNER; + break; + } + + if (leftSemiJoin) { + List sysFieldList = new ArrayList(); + List leftJoinKeys = new ArrayList(); + List rightJoinKeys = new ArrayList(); + + RexNode nonEquiConds = RelOptUtil.splitJoinCondition(sysFieldList, leftRel, rightRel, + calciteJoinCond, leftJoinKeys, rightJoinKeys, null, null); + + if (!nonEquiConds.isAlwaysTrue()) { + throw new SemanticException("Non equality condition not supported in Semi-Join" + + nonEquiConds); + } + + RelNode[] inputRels = new RelNode[] { leftRel, rightRel }; + final List leftKeys = new ArrayList(); + final List rightKeys = new ArrayList(); + calciteJoinCond = HiveCalciteUtil.projectNonColumnEquiConditions( + HiveProject.DEFAULT_PROJECT_FACTORY, inputRels, leftJoinKeys, rightJoinKeys, 0, + leftKeys, rightKeys); + + joinRel = new SemiJoin(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), inputRels[0], + inputRels[1], calciteJoinCond, ImmutableIntList.copyOf(leftKeys), + ImmutableIntList.copyOf(rightKeys)); + } else { + joinRel = HiveJoin.getJoin(cluster, leftRel, rightRel, calciteJoinCond, calciteJoinType, + leftSemiJoin); + } + // 5. Add new JoinRel & its RR to the maps + relToHiveColNameCalcitePosMap.put(joinRel, this.buildHiveToCalciteColumnMap(joinRR, joinRel)); + relToHiveRR.put(joinRel, joinRR); + + return joinRel; + } + + /** + * Generate Join Logical Plan Relnode by walking through the join AST. + * + * @param qb + * @param aliasToRel + * Alias(Table/Relation alias) to RelNode; only read and not + * written in to by this method + * @return + * @throws SemanticException + */ + private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map aliasToRel) + throws SemanticException { + RelNode leftRel = null; + RelNode rightRel = null; + JoinType hiveJoinType = null; + + if (joinParseTree.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) { + String msg = String.format("UNIQUE JOIN is currently not supported in CBO," + + " turn off cbo to use UNIQUE JOIN."); + LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + + // 1. Determine Join Type + // TODO: What about TOK_CROSSJOIN, TOK_MAPJOIN + switch (joinParseTree.getToken().getType()) { + case HiveParser.TOK_LEFTOUTERJOIN: + hiveJoinType = JoinType.LEFTOUTER; + break; + case HiveParser.TOK_RIGHTOUTERJOIN: + hiveJoinType = JoinType.RIGHTOUTER; + break; + case HiveParser.TOK_FULLOUTERJOIN: + hiveJoinType = JoinType.FULLOUTER; + break; + case HiveParser.TOK_LEFTSEMIJOIN: + hiveJoinType = JoinType.LEFTSEMI; + break; + default: + hiveJoinType = JoinType.INNER; + break; + } + + // 2. Get Left Table Alias + ASTNode left = (ASTNode) joinParseTree.getChild(0); + if ((left.getToken().getType() == HiveParser.TOK_TABREF) + || (left.getToken().getType() == HiveParser.TOK_SUBQUERY) + || (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) { + String tableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName( + (ASTNode) left.getChild(0)).toLowerCase(); + String leftTableAlias = left.getChildCount() == 1 ? tableName : SemanticAnalyzer + .unescapeIdentifier(left.getChild(left.getChildCount() - 1).getText().toLowerCase()); + // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? + // partitionTableFunctionSource partitioningSpec? expression*) + // guranteed to have an lias here: check done in processJoin + leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? SemanticAnalyzer + .unescapeIdentifier(left.getChild(1).getText().toLowerCase()) : leftTableAlias; + leftRel = aliasToRel.get(leftTableAlias); + } else if (SemanticAnalyzer.isJoinToken(left)) { + leftRel = genJoinLogicalPlan(left, aliasToRel); + } else { + assert (false); + } + + // 3. Get Right Table Alias + ASTNode right = (ASTNode) joinParseTree.getChild(1); + if ((right.getToken().getType() == HiveParser.TOK_TABREF) + || (right.getToken().getType() == HiveParser.TOK_SUBQUERY) + || (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) { + String tableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName( + (ASTNode) right.getChild(0)).toLowerCase(); + String rightTableAlias = right.getChildCount() == 1 ? tableName : SemanticAnalyzer + .unescapeIdentifier(right.getChild(right.getChildCount() - 1).getText().toLowerCase()); + // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? + // partitionTableFunctionSource partitioningSpec? expression*) + // guranteed to have an lias here: check done in processJoin + rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? SemanticAnalyzer + .unescapeIdentifier(right.getChild(1).getText().toLowerCase()) : rightTableAlias; + rightRel = aliasToRel.get(rightTableAlias); + } else { + assert (false); + } + + // 4. Get Join Condn + ASTNode joinCond = (ASTNode) joinParseTree.getChild(2); + + // 5. Create Join rel + return genJoinRelNode(leftRel, rightRel, hiveJoinType, joinCond); + } + + private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticException { + RowResolver rr = new RowResolver(); + HiveTableScan tableRel = null; + + try { + + // 1. If the table has a Sample specified, bail from Calcite path. + if (qb.getParseInfo().getTabSample(tableAlias) != null + || getNameToSplitSampleMap().containsKey(tableAlias)) { + String msg = String.format("Table Sample specified for %s." + + " Currently we don't support Table Sample clauses in CBO," + + " turn off cbo for queries on tableSamples.", tableAlias); + LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + + // 2. Get Table Metadata + Table tab = qb.getMetaData().getSrcForAlias(tableAlias); + + // 3. Get Table Logical Schema (Row Type) + // NOTE: Table logical schema = Non Partition Cols + Partition Cols + + // Virtual Cols + + // 3.1 Add Column info for non partion cols (Object Inspector fields) + @SuppressWarnings("deprecation") + StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer() + .getObjectInspector(); + List fields = rowObjectInspector.getAllStructFieldRefs(); + ColumnInfo colInfo; + String colName; + ArrayList cInfoLst = new ArrayList(); + for (int i = 0; i < fields.size(); i++) { + colName = fields.get(i).getFieldName(); + colInfo = new ColumnInfo( + fields.get(i).getFieldName(), + TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), + tableAlias, false); + colInfo.setSkewedCol((SemanticAnalyzer.isSkewedCol(tableAlias, qb, colName)) ? true + : false); + rr.put(tableAlias, colName, colInfo); + cInfoLst.add(colInfo); + } + // TODO: Fix this + ArrayList nonPartitionColumns = new ArrayList(cInfoLst); + ArrayList partitionColumns = new ArrayList(); + + // 3.2 Add column info corresponding to partition columns + for (FieldSchema part_col : tab.getPartCols()) { + colName = part_col.getName(); + colInfo = new ColumnInfo(colName, + TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), tableAlias, true); + rr.put(tableAlias, colName, colInfo); + cInfoLst.add(colInfo); + partitionColumns.add(colInfo); + } + + // 3.3 Add column info corresponding to virtual columns + Iterator vcs = VirtualColumn.getRegistry(conf).iterator(); + while (vcs.hasNext()) { + VirtualColumn vc = vcs.next(); + colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, + vc.getIsHidden()); + rr.put(tableAlias, vc.getName(), colInfo); + cInfoLst.add(colInfo); + } + + // 3.4 Build row type from field + RelDataType rowType = TypeConverter.getType(cluster, rr, null); + + // 4. Build RelOptAbstractTable + String fullyQualifiedTabName = tab.getDbName(); + if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) + fullyQualifiedTabName = fullyQualifiedTabName + "." + tab.getTableName(); + else + fullyQualifiedTabName = tab.getTableName(); + RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, + tableAlias, rowType, tab, nonPartitionColumns, partitionColumns, conf, partitionCache, + noColsMissingStats); + + // 5. Build Hive Table Scan Rel + tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, + rowType); + + // 6. Add Schema(RR) to RelNode-Schema map + ImmutableMap hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr, + tableRel); + relToHiveRR.put(tableRel, rr); + relToHiveColNameCalcitePosMap.put(tableRel, hiveToCalciteColMap); + } catch (Exception e) { + if (e instanceof SemanticException) { + throw (SemanticException) e; + } else { + throw (new RuntimeException(e)); + } + } + + return tableRel; + } + + private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel) throws SemanticException { + ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel)); + if (filterCondn instanceof ExprNodeConstantDesc + && !filterCondn.getTypeString().equals(serdeConstants.BOOLEAN_TYPE_NAME)) { + // queries like select * from t1 where 'foo'; + // Calcite's rule PushFilterThroughProject chokes on it. Arguably, we + // can insert a cast to + // boolean in such cases, but since Postgres, Oracle and MS SQL server + // fail on compile time + // for such queries, its an arcane corner case, not worth of adding that + // complexity. + throw new CalciteSemanticException("Filter expression with non-boolean return type."); + } + ImmutableMap hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap + .get(srcRel); + RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(), + hiveColNameCalcitePosMap, 0, true).convert(filterCondn); + RexNode factoredFilterExpr = RexUtil + .pullFactors(cluster.getRexBuilder(), convertedFilterExpr); + RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), + srcRel, factoredFilterExpr); + this.relToHiveColNameCalcitePosMap.put(filterRel, hiveColNameCalcitePosMap); + relToHiveRR.put(filterRel, relToHiveRR.get(srcRel)); + relToHiveColNameCalcitePosMap.put(filterRel, hiveColNameCalcitePosMap); + + return filterRel; + } + + private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, + Map aliasToRel, boolean forHavingClause) throws SemanticException { + /* + * Handle Subquery predicates. + * + * Notes (8/22/14 hb): Why is this a copy of the code from {@link + * #genFilterPlan} - for now we will support the same behavior as non CBO + * route. - but plan to allow nested SubQueries(Restriction.9.m) and + * multiple SubQuery expressions(Restriction.8.m). This requires use to + * utilize Calcite's Decorrelation mechanics, and for Calcite to fix/flush + * out Null semantics(CALCITE-373) - besides only the driving code has + * been copied. Most of the code which is SubQueryUtils and QBSubQuery is + * reused. + */ + int numSrcColumns = srcRel.getRowType().getFieldCount(); + List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); + if (subQueriesInOriginalTree.size() > 0) { + + /* + * Restriction.9.m :: disallow nested SubQuery expressions. + */ + if (qb.getSubQueryPredicateDef() != null) { + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported.")); + } + + /* + * Restriction.8.m :: We allow only 1 SubQuery expression per Query. + */ + if (subQueriesInOriginalTree.size() > 1) { + + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported.")); + } + + /* + * Clone the Search AST; apply all rewrites on the clone. + */ + ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); + List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); + + RowResolver inputRR = relToHiveRR.get(srcRel); + RowResolver outerQBRR = inputRR; + ImmutableMap outerQBPosMap = relToHiveColNameCalcitePosMap.get(srcRel); + + for (int i = 0; i < subQueries.size(); i++) { + ASTNode subQueryAST = subQueries.get(i); + ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); + + int sqIdx = qb.incrNumSubQueryPredicates(); + clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST); + + QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, + originalSubQueryAST, ctx); + + if (!forHavingClause) { + qb.setWhereClauseSubQueryPredicate(subQuery); + } else { + qb.setHavingClauseSubQueryPredicate(subQuery); + } + String havingInputAlias = null; + + if (forHavingClause) { + havingInputAlias = "gby_sq" + sqIdx; + aliasToRel.put(havingInputAlias, srcRel); + } + + subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, + aliasToRel.keySet()); + + QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true); + qbSQ.setSubQueryDef(subQuery.getSubQuery()); + Phase1Ctx ctx_1 = initPhase1Ctx(); + doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1, null); + getMetaData(qbSQ); + RelNode subQueryRelNode = genLogicalPlan(qbSQ, false); + aliasToRel.put(subQuery.getAlias(), subQueryRelNode); + RowResolver sqRR = relToHiveRR.get(subQueryRelNode); + + /* + * Check.5.h :: For In and Not In the SubQuery must implicitly or + * explicitly only contain one select item. + */ + if (subQuery.getOperator().getType() != SubQueryType.EXISTS + && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS + && sqRR.getColumnInfos().size() - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) { + throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST, + "SubQuery can contain only 1 item in Select List.")); + } + + /* + * If this is a Not In SubQuery Predicate then Join in the Null Check + * SubQuery. See QBSubQuery.NotInCheck for details on why and how this + * is constructed. + */ + if (subQuery.getNotInCheck() != null) { + QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck(); + notInCheck.setSQRR(sqRR); + QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true); + qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery()); + ctx_1 = initPhase1Ctx(); + doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1, null); + getMetaData(qbSQ_nic); + RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic, false); + aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode); + srcRel = genJoinRelNode(srcRel, subQueryNICRelNode, + // set explicitly to inner until we figure out SemiJoin use + // notInCheck.getJoinType(), + JoinType.INNER, notInCheck.getJoinConditionAST()); + inputRR = relToHiveRR.get(srcRel); + if (forHavingClause) { + aliasToRel.put(havingInputAlias, srcRel); + } + } + + /* + * Gen Join between outer Operator and SQ op + */ + subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias); + srcRel = genJoinRelNode(srcRel, subQueryRelNode, subQuery.getJoinType(), + subQuery.getJoinConditionAST()); + searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond); + + srcRel = genFilterRelNode(searchCond, srcRel); + + /* + * For Not Exists and Not In, add a projection on top of the Left + * Outer Join. + */ + if (subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS + || subQuery.getOperator().getType() != SubQueryType.NOT_IN) { + srcRel = projectLeftOuterSide(srcRel, numSrcColumns); + } + } + relToHiveRR.put(srcRel, outerQBRR); + relToHiveColNameCalcitePosMap.put(srcRel, outerQBPosMap); + return srcRel; + } + + return genFilterRelNode(searchCond, srcRel); + } + + private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws SemanticException { + RowResolver iRR = relToHiveRR.get(srcRel); + RowResolver oRR = new RowResolver(); + RowResolver.add(oRR, iRR, numColumns); + + List calciteColLst = new ArrayList(); + List oFieldNames = new ArrayList(); + RelDataType iType = srcRel.getRowType(); + + for (int i = 0; i < iType.getFieldCount(); i++) { + RelDataTypeField fType = iType.getFieldList().get(i); + String fName = iType.getFieldNames().get(i); + calciteColLst.add(cluster.getRexBuilder().makeInputRef(fType.getType(), i)); + oFieldNames.add(fName); + } + + HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, oFieldNames); + + this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(oRR, selRel)); + this.relToHiveRR.put(selRel, oRR); + return selRel; + } + + private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map aliasToRel, + boolean forHavingClause) throws SemanticException { + RelNode filterRel = null; + + Iterator whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values() + .iterator(); + if (whereClauseIterator.hasNext()) { + filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel, + aliasToRel, forHavingClause); + } + + return filterRel; + } + + /** + * Class to store GenericUDAF related information. + */ + private class AggInfo { + private final List m_aggParams; + private final TypeInfo m_returnType; + private final String m_udfName; + private final boolean m_distinct; + + private AggInfo(List aggParams, TypeInfo returnType, String udfName, + boolean isDistinct) { + m_aggParams = aggParams; + m_returnType = returnType; + m_udfName = udfName; + m_distinct = isDistinct; + } + } + + private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List gbChildProjLst, + RexNodeConverter converter, HashMap rexNodeToPosMap, + Integer childProjLstIndx) throws SemanticException { + + // 1. Get agg fn ret type in Calcite + RelDataType aggFnRetType = TypeConverter.convert(agg.m_returnType, + this.cluster.getTypeFactory()); + + // 2. Convert Agg Fn args and type of args to Calcite + // TODO: Does HQL allows expressions as aggregate args or can it only be + // projections from child? + Integer inputIndx; + List argList = new ArrayList(); + RexNode rexNd = null; + RelDataTypeFactory dtFactory = this.cluster.getTypeFactory(); + ImmutableList.Builder aggArgRelDTBldr = new ImmutableList.Builder(); + for (ExprNodeDesc expr : agg.m_aggParams) { + rexNd = converter.convert(expr); + inputIndx = rexNodeToPosMap.get(rexNd.toString()); + if (inputIndx == null) { + gbChildProjLst.add(rexNd); + rexNodeToPosMap.put(rexNd.toString(), childProjLstIndx); + inputIndx = childProjLstIndx; + childProjLstIndx++; + } + argList.add(inputIndx); + + // TODO: does arg need type cast? + aggArgRelDTBldr.add(TypeConverter.convert(expr.getTypeInfo(), dtFactory)); + } + + // 3. Get Aggregation FN from Calcite given name, ret type and input arg + // type + final SqlAggFunction aggregation = SqlFunctionConverter.getCalciteAggFn(agg.m_udfName, + aggArgRelDTBldr.build(), aggFnRetType); + + return new AggregateCall(aggregation, agg.m_distinct, argList, aggFnRetType, null); + } + + private RelNode genGBRelNode(List gbExprs, List aggInfoLst, + RelNode srcRel) throws SemanticException { + ImmutableMap posMap = this.relToHiveColNameCalcitePosMap.get(srcRel); + RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), posMap, + 0, false); + + final List gbChildProjLst = Lists.newArrayList(); + final HashMap rexNodeToPosMap = new HashMap(); + final List groupSetPositions = Lists.newArrayList(); + Integer gbIndx = 0; + RexNode rnd; + for (ExprNodeDesc key : gbExprs) { + rnd = converter.convert(key); + gbChildProjLst.add(rnd); + groupSetPositions.add(gbIndx); + rexNodeToPosMap.put(rnd.toString(), gbIndx); + gbIndx++; + } + final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions); + + List aggregateCalls = Lists.newArrayList(); + for (AggInfo agg : aggInfoLst) { + aggregateCalls.add(convertGBAgg(agg, srcRel, gbChildProjLst, converter, rexNodeToPosMap, + gbChildProjLst.size())); + } + + if (gbChildProjLst.isEmpty()) { + // This will happen for count(*), in such cases we arbitarily pick + // first element from srcRel + gbChildProjLst.add(this.cluster.getRexBuilder().makeInputRef(srcRel, 0)); + } + RelNode gbInputRel = HiveProject.create(srcRel, gbChildProjLst, null); + + HiveRelNode aggregateRel = null; + try { + aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), + gbInputRel, false, groupSet, null, aggregateCalls); + } catch (InvalidRelException e) { + throw new SemanticException(e); + } + + return aggregateRel; + } + + private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, + RowResolver gByInputRR, RowResolver gByRR) { + if (gByExpr.getType() == HiveParser.DOT + && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) { + String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getChild(0) + .getText()); + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(1).getText()); + gByRR.put(tab_alias, col_alias, colInfo); + } else if (gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL) { + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getText()); + String tab_alias = null; + /* + * If the input to the GBy has a tab alias for the column, then add an + * entry based on that tab_alias. For e.g. this query: select b.x, + * count(*) from t1 b group by x needs (tab_alias=b, col_alias=x) in the + * GBy RR. tab_alias=b comes from looking at the RowResolver that is the + * ancestor before any GBy/ReduceSinks added for the GBY operation. + */ + try { + ColumnInfo pColInfo = gByInputRR.get(tab_alias, col_alias); + tab_alias = pColInfo == null ? null : pColInfo.getTabAlias(); + } catch (SemanticException se) { + } + gByRR.put(tab_alias, col_alias, colInfo); + } + } + + private void addToGBExpr(RowResolver groupByOutputRowResolver, + RowResolver groupByInputRowResolver, ASTNode grpbyExpr, ExprNodeDesc grpbyExprNDesc, + List gbExprNDescLst, List outputColumnNames) { + // TODO: Should we use grpbyExprNDesc.getTypeInfo()? what if expr is + // UDF + int i = gbExprNDescLst.size(); + String field = SemanticAnalyzer.getColumnInternalName(i); + outputColumnNames.add(field); + gbExprNDescLst.add(grpbyExprNDesc); + + ColumnInfo oColInfo = new ColumnInfo(field, grpbyExprNDesc.getTypeInfo(), null, false); + groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo); + + addAlternateGByKeyMappings(grpbyExpr, oColInfo, groupByInputRowResolver, + groupByOutputRowResolver); + } + + private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver inputRR) + throws SemanticException { + AggInfo aInfo = null; + + // 1 Convert UDAF Params to ExprNodeDesc + ArrayList aggParameters = new ArrayList(); + for (int i = 1; i <= aggFnLstArgIndx; i++) { + ASTNode paraExpr = (ASTNode) aggAst.getChild(i); + ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, inputRR); + aggParameters.add(paraExprNode); + } + + // 2. Is this distinct UDAF + boolean isDistinct = aggAst.getType() == HiveParser.TOK_FUNCTIONDI; + + // 3. Determine type of UDAF + TypeInfo udafRetType = null; + + // 3.1 Obtain UDAF name + String aggName = SemanticAnalyzer.unescapeIdentifier(aggAst.getChild(0).getText()); + + // 3.2 Rank functions type is 'int'/'double' + if (FunctionRegistry.isRankingFunction(aggName)) { + if (aggName.equalsIgnoreCase("percent_rank")) + udafRetType = TypeInfoFactory.doubleTypeInfo; + else + udafRetType = TypeInfoFactory.intTypeInfo; + } else { + // 3.3 Try obtaining UDAF evaluators to determine the ret type + try { + boolean isAllColumns = aggAst.getType() == HiveParser.TOK_FUNCTIONSTAR; + + // 3.3.1 Get UDAF Evaluator + Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, + isDistinct); + + GenericUDAFEvaluator genericUDAFEvaluator = null; + if (aggName.toLowerCase().equals(FunctionRegistry.LEAD_FUNC_NAME) + || aggName.toLowerCase().equals(FunctionRegistry.LAG_FUNC_NAME)) { + ArrayList originalParameterTypeInfos = SemanticAnalyzer + .getWritableObjectInspector(aggParameters); + genericUDAFEvaluator = FunctionRegistry.getGenericWindowingEvaluator(aggName, + originalParameterTypeInfos, isDistinct, isAllColumns); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(genericUDAFEvaluator, amode, + aggParameters); + udafRetType = ((ListTypeInfo) udaf.returnType).getListElementTypeInfo(); + } else { + genericUDAFEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(aggName, aggParameters, + aggAst, isDistinct, isAllColumns); + assert (genericUDAFEvaluator != null); + + // 3.3.2 Get UDAF Info using UDAF Evaluator + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(genericUDAFEvaluator, amode, + aggParameters); + udafRetType = udaf.returnType; + } + } catch (Exception e) { + LOG.debug("CBO: Couldn't Obtain UDAF evaluators for " + aggName + + ", trying to translate to GenericUDF"); + } + + // 3.4 Try GenericUDF translation + if (udafRetType == null) { + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + // We allow stateful functions in the SELECT list (but nowhere else) + tcCtx.setAllowStatefulFunctions(true); + tcCtx.setAllowDistinctFunctions(false); + ExprNodeDesc exp = genExprNodeDesc((ASTNode) aggAst.getChild(0), inputRR, tcCtx); + udafRetType = exp.getTypeInfo(); + } + } + + // 4. Construct AggInfo + aInfo = new AggInfo(aggParameters, udafRetType, aggName, isDistinct); + + return aInfo; + } + + /** + * Generate GB plan. + * + * @param qb + * @param srcRel + * @return TODO: 1. Grouping Sets (roll up..) + * @throws SemanticException + */ + private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { + RelNode gbRel = null; + QBParseInfo qbp = getQBParseInfo(qb); + + // 0. for GSets, Cube, Rollup, bail from Calcite path. + if (!qbp.getDestRollups().isEmpty() || !qbp.getDestGroupingSets().isEmpty() + || !qbp.getDestCubes().isEmpty()) { + String gbyClause = null; + HashMap gbysMap = qbp.getDestToGroupBy(); + if (gbysMap.size() == 1) { + ASTNode gbyAST = gbysMap.entrySet().iterator().next().getValue(); + gbyClause = ctx.getTokenRewriteStream().toString(gbyAST.getTokenStartIndex(), + gbyAST.getTokenStopIndex()); + gbyClause = "in '" + gbyClause + "'."; + } else { + gbyClause = "."; + } + String msg = String.format("Encountered Grouping Set/Cube/Rollup%s" + + " Currently we don't support Grouping Set/Cube/Rollup" + " clauses in CBO," + + " turn off cbo for these queries.", gbyClause); + LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + + // 1. Gather GB Expressions (AST) (GB + Aggregations) + // NOTE: Multi Insert is not supported + String detsClauseName = qbp.getClauseNames().iterator().next(); + List grpByAstExprs = SemanticAnalyzer.getGroupByForClause(qbp, detsClauseName); + HashMap aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName); + boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false; + boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty()) ? true + : false; + + if (hasGrpByAstExprs || hasAggregationTrees) { + ArrayList gbExprNDescLst = new ArrayList(); + ArrayList outputColumnNames = new ArrayList(); + + // 2. Input, Output Row Resolvers + RowResolver groupByInputRowResolver = this.relToHiveRR.get(srcRel); + RowResolver groupByOutputRowResolver = new RowResolver(); + groupByOutputRowResolver.setIsExprResolver(true); + + if (hasGrpByAstExprs) { + // 3. Construct GB Keys (ExprNode) + for (int i = 0; i < grpByAstExprs.size(); ++i) { + ASTNode grpbyExpr = grpByAstExprs.get(i); + Map astToExprNDescMap = TypeCheckProcFactory.genExprNode( + grpbyExpr, new TypeCheckCtx(groupByInputRowResolver)); + ExprNodeDesc grpbyExprNDesc = astToExprNDescMap.get(grpbyExpr); + if (grpbyExprNDesc == null) + throw new CalciteSemanticException("Invalid Column Reference: " + grpbyExpr.dump()); + + addToGBExpr(groupByOutputRowResolver, groupByInputRowResolver, grpbyExpr, + grpbyExprNDesc, gbExprNDescLst, outputColumnNames); + } + } + + // 4. Construct aggregation function Info + ArrayList aggregations = new ArrayList(); + if (hasAggregationTrees) { + assert (aggregationTrees != null); + for (ASTNode value : aggregationTrees.values()) { + // 4.1 Determine type of UDAF + // This is the GenericUDAF name + String aggName = SemanticAnalyzer.unescapeIdentifier(value.getChild(0).getText()); + boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; + boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; + + // 4.2 Convert UDAF Params to ExprNodeDesc + ArrayList aggParameters = new ArrayList(); + for (int i = 1; i < value.getChildCount(); i++) { + ASTNode paraExpr = (ASTNode) value.getChild(i); + ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver); + aggParameters.add(paraExprNode); + } + + Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, + isDistinct); + GenericUDAFEvaluator genericUDAFEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator( + aggName, aggParameters, value, isDistinct, isAllColumns); + assert (genericUDAFEvaluator != null); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(genericUDAFEvaluator, amode, + aggParameters); + AggInfo aInfo = new AggInfo(aggParameters, udaf.returnType, aggName, isDistinct); + aggregations.add(aInfo); + String field = SemanticAnalyzer.getColumnInternalName(gbExprNDescLst.size() + + aggregations.size() - 1); + outputColumnNames.add(field); + groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, aInfo.m_returnType, + "", false)); + } + } + + gbRel = genGBRelNode(gbExprNDescLst, aggregations, srcRel); + relToHiveColNameCalcitePosMap.put(gbRel, + buildHiveToCalciteColumnMap(groupByOutputRowResolver, gbRel)); + this.relToHiveRR.put(gbRel, groupByOutputRowResolver); + } + + return gbRel; + } + + /** + * Generate OB RelNode and input Select RelNode that should be used to + * introduce top constraining Project. If Input select RelNode is not + * present then don't introduce top constraining select. + * + * @param qb + * @param srcRel + * @param outermostOB + * @return Pair Key- OB RelNode, Value - Input Select for + * top constraining Select + * @throws SemanticException + */ + private Pair genOBLogicalPlan(QB qb, RelNode srcRel, boolean outermostOB) + throws SemanticException { + RelNode sortRel = null; + RelNode originalOBChild = null; + + QBParseInfo qbp = getQBParseInfo(qb); + String dest = qbp.getClauseNames().iterator().next(); + ASTNode obAST = qbp.getOrderByForClause(dest); + + if (obAST != null) { + // 1. OB Expr sanity test + // in strict mode, in the presence of order by, limit must be specified + Integer limit = qb.getParseInfo().getDestLimit(dest); + if (conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict") + && limit == null) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(obAST, + ErrorMsg.NO_LIMIT_WITH_ORDERBY.getMsg())); + } + + // 2. Walk through OB exprs and extract field collations and additional + // virtual columns needed + final List newVCLst = new ArrayList(); + final List fieldCollations = Lists.newArrayList(); + int fieldIndex = 0; + + List obASTExprLst = obAST.getChildren(); + ASTNode obASTExpr; + List> vcASTTypePairs = new ArrayList>(); + RowResolver inputRR = relToHiveRR.get(srcRel); + RowResolver outputRR = new RowResolver(); + + RexNode rnd; + RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), + relToHiveColNameCalcitePosMap.get(srcRel), 0, false); + int srcRelRecordSz = srcRel.getRowType().getFieldCount(); + + for (int i = 0; i < obASTExprLst.size(); i++) { + // 2.1 Convert AST Expr to ExprNode + obASTExpr = (ASTNode) obASTExprLst.get(i); + Map astToExprNDescMap = TypeCheckProcFactory.genExprNode( + obASTExpr, new TypeCheckCtx(inputRR)); + ExprNodeDesc obExprNDesc = astToExprNDescMap.get(obASTExpr.getChild(0)); + if (obExprNDesc == null) + throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); + + // 2.2 Convert ExprNode to RexNode + rnd = converter.convert(obExprNDesc); + + // 2.3 Determine the index of ob expr in child schema + // NOTE: Calcite can not take compound exprs in OB without it being + // present in the child (& hence we add a child Project Rel) + if (rnd instanceof RexInputRef) { + fieldIndex = ((RexInputRef) rnd).getIndex(); + } else { + fieldIndex = srcRelRecordSz + newVCLst.size(); + newVCLst.add(rnd); + vcASTTypePairs.add(new Pair((ASTNode) obASTExpr.getChild(0), + obExprNDesc.getTypeInfo())); + } + + // 2.4 Determine the Direction of order by + org.apache.calcite.rel.RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; + if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { + order = RelFieldCollation.Direction.ASCENDING; + } + + // 2.5 Add to field collations + fieldCollations.add(new RelFieldCollation(fieldIndex, order)); + } + + // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel + // for top constraining Sel + RelNode obInputRel = srcRel; + if (!newVCLst.isEmpty()) { + List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), + new Function() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + RowResolver obSyntheticProjectRR = new RowResolver(); + if (!RowResolver.add(obSyntheticProjectRR, inputRR)) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message"); + } + int vcolPos = inputRR.getRowSchema().getSignature().size(); + for (Pair astTypePair : vcASTTypePairs) { + obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo( + SemanticAnalyzer.getColumnInternalName(vcolPos), astTypePair.getValue(), null, + false)); + vcolPos++; + } + obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, newVCLst), + obSyntheticProjectRR, srcRel); + + if (outermostOB) { + if (!RowResolver.add(outputRR, inputRR)) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message"); + } + + } else { + if (!RowResolver.add(outputRR, obSyntheticProjectRR)) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message"); + } + originalOBChild = srcRel; + } + } else { + if (!RowResolver.add(outputRR, inputRR)) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message"); + } + } + + // 4. Construct SortRel + RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); + RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations)); + sortRel = new HiveSort(cluster, traitSet, obInputRel, canonizedCollation, null, null); + + // 5. Update the maps + // NOTE: Output RR for SortRel is considered same as its input; we may + // end up not using VC that is present in sort rel. Also note that + // rowtype of sortrel is the type of it child; if child happens to be + // synthetic project that we introduced then that projectrel would + // contain the vc. + ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap( + outputRR, sortRel); + relToHiveRR.put(sortRel, outputRR); + relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); + } + + return (new Pair(sortRel, originalOBChild)); + } + + private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { + HiveRelNode sortRel = null; + QBParseInfo qbp = getQBParseInfo(qb); + Integer limit = qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next()); + + if (limit != null) { + RexNode fetch = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(limit)); + RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); + RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.EMPTY); + sortRel = new HiveSort(cluster, traitSet, srcRel, canonizedCollation, null, fetch); + + RowResolver outputRR = new RowResolver(); + if (!RowResolver.add(outputRR, relToHiveRR.get(srcRel))) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message"); + } + ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap( + outputRR, sortRel); + relToHiveRR.put(sortRel, outputRR); + relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); + } + + return sortRel; + } + + private List getPartitionKeys(PartitionSpec ps, RexNodeConverter converter, + RowResolver inputRR) throws SemanticException { + List pKeys = new ArrayList(); + if (ps != null) { + List pExprs = ps.getExpressions(); + for (PartitionExpression pExpr : pExprs) { + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + tcCtx.setAllowStatefulFunctions(true); + ExprNodeDesc exp = genExprNodeDesc(pExpr.getExpression(), inputRR, tcCtx); + pKeys.add(converter.convert(exp)); + } + } + + return pKeys; + } + + private List getOrderKeys(OrderSpec os, RexNodeConverter converter, + RowResolver inputRR) throws SemanticException { + List oKeys = new ArrayList(); + if (os != null) { + List oExprs = os.getExpressions(); + for (OrderExpression oExpr : oExprs) { + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + tcCtx.setAllowStatefulFunctions(true); + ExprNodeDesc exp = genExprNodeDesc(oExpr.getExpression(), inputRR, tcCtx); + RexNode ordExp = converter.convert(exp); + Set flags = new HashSet(); + if (oExpr.getOrder() == org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order.DESC) + flags.add(SqlKind.DESCENDING); + oKeys.add(new RexFieldCollation(ordExp, flags)); + } + } + + return oKeys; + } + + private RexWindowBound getBound(BoundarySpec bs, RexNodeConverter converter) { + RexWindowBound rwb = null; + + if (bs != null) { + SqlParserPos pos = new SqlParserPos(1, 1); + SqlNode amt = bs.getAmt() == 0 ? null : SqlLiteral.createExactNumeric( + String.valueOf(bs.getAmt()), new SqlParserPos(2, 2)); + RexNode amtLiteral = null; + SqlCall sc = null; + + if (amt != null) + amtLiteral = cluster.getRexBuilder().makeLiteral(new Integer(bs.getAmt()), + cluster.getTypeFactory().createSqlType(SqlTypeName.INTEGER), true); + + switch (bs.getDirection()) { + case PRECEDING: + if (amt == null) { + rwb = RexWindowBound.create(SqlWindow.createUnboundedPreceding(pos), null); + } else { + sc = (SqlCall) SqlWindow.createPreceding(amt, pos); + rwb = RexWindowBound.create(sc, + cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral)); + } + break; + + case CURRENT: + rwb = RexWindowBound.create(SqlWindow.createCurrentRow(new SqlParserPos(1, 1)), null); + break; + + case FOLLOWING: + if (amt == null) { + rwb = RexWindowBound.create(SqlWindow.createUnboundedFollowing(new SqlParserPos(1, 1)), + null); + } else { + sc = (SqlCall) SqlWindow.createFollowing(amt, pos); + rwb = RexWindowBound.create(sc, + cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral)); + } + break; + } + } + + return rwb; + } + + private int getWindowSpecIndx(ASTNode wndAST) { + int wi = wndAST.getChildCount() - 1; + if (wi <= 0 || (wndAST.getChild(wi).getType() != HiveParser.TOK_WINDOWSPEC)) { + wi = -1; + } + + return wi; + } + + private Pair genWindowingProj(QB qb, WindowExpressionSpec wExpSpec, + RelNode srcRel) throws SemanticException { + RexNode w = null; + TypeInfo wHiveRetType = null; + + if (wExpSpec instanceof WindowFunctionSpec) { + WindowFunctionSpec wFnSpec = (WindowFunctionSpec) wExpSpec; + ASTNode windowProjAst = wFnSpec.getExpression(); + // TODO: do we need to get to child? + int wndSpecASTIndx = getWindowSpecIndx(windowProjAst); + // 2. Get Hive Aggregate Info + AggInfo hiveAggInfo = getHiveAggInfo(windowProjAst, wndSpecASTIndx - 1, + this.relToHiveRR.get(srcRel)); + + // 3. Get Calcite Return type for Agg Fn + wHiveRetType = hiveAggInfo.m_returnType; + RelDataType calciteAggFnRetType = TypeConverter.convert(hiveAggInfo.m_returnType, + this.cluster.getTypeFactory()); + + // 4. Convert Agg Fn args to Calcite + ImmutableMap posMap = this.relToHiveColNameCalcitePosMap.get(srcRel); + RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), + posMap, 0, false); + Builder calciteAggFnArgsBldr = ImmutableList. builder(); + Builder calciteAggFnArgsTypeBldr = ImmutableList. builder(); + for (int i = 0; i < hiveAggInfo.m_aggParams.size(); i++) { + calciteAggFnArgsBldr.add(converter.convert(hiveAggInfo.m_aggParams.get(i))); + calciteAggFnArgsTypeBldr.add(TypeConverter.convert(hiveAggInfo.m_aggParams.get(i) + .getTypeInfo(), this.cluster.getTypeFactory())); + } + ImmutableList calciteAggFnArgs = calciteAggFnArgsBldr.build(); + ImmutableList calciteAggFnArgsType = calciteAggFnArgsTypeBldr.build(); + + // 5. Get Calcite Agg Fn + final SqlAggFunction calciteAggFn = SqlFunctionConverter.getCalciteAggFn( + hiveAggInfo.m_udfName, calciteAggFnArgsType, calciteAggFnRetType); + + // 6. Translate Window spec + RowResolver inputRR = relToHiveRR.get(srcRel); + WindowSpec wndSpec = ((WindowFunctionSpec) wExpSpec).getWindowSpec(); + List partitionKeys = getPartitionKeys(wndSpec.getPartition(), converter, inputRR); + List orderKeys = getOrderKeys(wndSpec.getOrder(), converter, inputRR); + RexWindowBound upperBound = getBound(wndSpec.windowFrame.start, converter); + RexWindowBound lowerBound = getBound(wndSpec.windowFrame.end, converter); + boolean isRows = ((wndSpec.windowFrame.start instanceof RangeBoundarySpec) || (wndSpec.windowFrame.end instanceof RangeBoundarySpec)) ? true + : false; + + w = cluster.getRexBuilder().makeOver(calciteAggFnRetType, calciteAggFn, calciteAggFnArgs, + partitionKeys, ImmutableList. copyOf(orderKeys), lowerBound, + upperBound, isRows, true, false); + } else { + // TODO: Convert to Semantic Exception + throw new RuntimeException("Unsupported window Spec"); + } + + return new Pair(w, wHiveRetType); + } + + private RelNode genSelectForWindowing(QB qb, RelNode srcRel, HashSet newColumns) + throws SemanticException { + getQBParseInfo(qb); + WindowingSpec wSpec = (!qb.getAllWindowingSpecs().isEmpty()) ? qb.getAllWindowingSpecs() + .values().iterator().next() : null; + if (wSpec == null) + return null; + // 1. Get valid Window Function Spec + wSpec.validateAndMakeEffective(); + List windowExpressions = wSpec.getWindowExpressions(); + if (windowExpressions == null || windowExpressions.isEmpty()) + return null; + + RowResolver inputRR = this.relToHiveRR.get(srcRel); + // 2. Get RexNodes for original Projections from below + List projsForWindowSelOp = new ArrayList( + HiveCalciteUtil.getProjsFromBelowAsInputRef(srcRel)); + + // 3. Construct new Row Resolver with everything from below. + RowResolver out_rwsch = new RowResolver(); + if (!RowResolver.add(out_rwsch, inputRR)) { + LOG.warn("Duplicates detected when adding columns to RR: see previous message"); + } + + // 4. Walk through Window Expressions & Construct RexNodes for those, + // Update out_rwsch + for (WindowExpressionSpec wExprSpec : windowExpressions) { + if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) { + Pair wtp = genWindowingProj(qb, wExprSpec, srcRel); + projsForWindowSelOp.add(wtp.getKey()); + + // 6.2.2 Update Output Row Schema + ColumnInfo oColInfo = new ColumnInfo( + SemanticAnalyzer.getColumnInternalName(projsForWindowSelOp.size()), wtp.getValue(), + null, false); + out_rwsch.putExpression(wExprSpec.getExpression(), oColInfo); + newColumns.add(oColInfo); + } + } + + return genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel); + } + + private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, + RelNode srcRel) throws CalciteSemanticException { + // 1. Build Column Names + Set colNamesSet = new HashSet(); + List cInfoLst = out_rwsch.getRowSchema().getSignature(); + ArrayList columnNames = new ArrayList(); + String[] qualifiedColNames; + String tmpColAlias; + for (int i = 0; i < calciteColLst.size(); i++) { + ColumnInfo cInfo = cInfoLst.get(i); + qualifiedColNames = out_rwsch.reverseLookup(cInfo.getInternalName()); + /* + * if (qualifiedColNames[0] != null && !qualifiedColNames[0].isEmpty()) + * tmpColAlias = qualifiedColNames[0] + "." + qualifiedColNames[1]; else + */ + tmpColAlias = qualifiedColNames[1]; + + // Prepend column names with '_o_' if it starts with '_c' + /* + * Hive treats names that start with '_c' as internalNames; so change + * the names so we don't run into this issue when converting back to + * Hive AST. + */ + if (tmpColAlias.startsWith("_c")) + tmpColAlias = "_o_" + tmpColAlias; + int suffix = 1; + while (colNamesSet.contains(tmpColAlias)) { + tmpColAlias = qualifiedColNames[1] + suffix; + suffix++; + } + + colNamesSet.add(tmpColAlias); + columnNames.add(tmpColAlias); + } + + // 3 Build Calcite Rel Node for project using converted projections & col + // names + HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, columnNames); + + // 4. Keep track of colname-to-posmap && RR for new select + this.relToHiveColNameCalcitePosMap + .put(selRel, buildHiveToCalciteColumnMap(out_rwsch, selRel)); + this.relToHiveRR.put(selRel, out_rwsch); + + return selRel; + } + + /** + * NOTE: there can only be one select caluse since we don't handle multi + * destination insert. + * + * @throws SemanticException + */ + private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel) + throws SemanticException { + // 0. Generate a Select Node for Windowing + // Exclude the newly-generated select columns from */etc. resolution. + HashSet excludedColumns = new HashSet(); + RelNode selForWindow = genSelectForWindowing(qb, srcRel, excludedColumns); + srcRel = (selForWindow == null) ? srcRel : selForWindow; + + ArrayList col_list = new ArrayList(); + + // 1. Get Select Expression List + QBParseInfo qbp = getQBParseInfo(qb); + String selClauseName = qbp.getClauseNames().iterator().next(); + ASTNode selExprList = qbp.getSelForClause(selClauseName); + + // 2.Row resolvers for input, output + RowResolver out_rwsch = new RowResolver(); + Integer pos = Integer.valueOf(0); + // TODO: will this also fix windowing? try + RowResolver inputRR = this.relToHiveRR.get(srcRel), starRR = inputRR; + if (starSrcRel != null) { + starRR = this.relToHiveRR.get(starSrcRel); + } + + // 3. Query Hints + // TODO: Handle Query Hints; currently we ignore them + boolean selectStar = false; + int posn = 0; + boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST); + if (hintPresent) { + String hint = ctx.getTokenRewriteStream().toString( + selExprList.getChild(0).getTokenStartIndex(), + selExprList.getChild(0).getTokenStopIndex()); + String msg = String.format("Hint specified for %s." + + " Currently we don't support hints in CBO, turn off cbo to use hints.", hint); + LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + + // 4. Bailout if select involves Transform + boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM); + if (isInTransform) { + String msg = String.format("SELECT TRANSFORM is currently not supported in CBO," + + " turn off cbo to use TRANSFORM."); + LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + + // 5. Bailout if select involves UDTF + ASTNode expr = (ASTNode) selExprList.getChild(posn).getChild(0); + int exprType = expr.getType(); + if (exprType == HiveParser.TOK_FUNCTION || exprType == HiveParser.TOK_FUNCTIONSTAR) { + String funcName = TypeCheckProcFactory.DefaultExprProcessor.getFunctionText(expr, true); + FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName); + if (fi != null && fi.getGenericUDTF() != null) { + String msg = String.format("UDTF " + funcName + " is currently not supported in CBO," + + " turn off cbo to use UDTF " + funcName); + LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + } + + // 6. Iterate over all expression (after SELECT) + ASTNode exprList = selExprList; + int startPosn = posn; + List tabAliasesForAllProjs = getTabAliases(starRR); + for (int i = startPosn; i < exprList.getChildCount(); ++i) { + + // 6.1 child can be EXPR AS ALIAS, or EXPR. + ASTNode child = (ASTNode) exprList.getChild(i); + boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2); + + // 6.2 EXPR AS (ALIAS,...) parses, but is only allowed for UDTF's + // This check is not needed and invalid when there is a transform b/c + // the + // AST's are slightly different. + if (child.getChildCount() > 2) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage( + (ASTNode) child.getChild(2), ErrorMsg.INVALID_AS.getMsg())); + } + + String tabAlias; + String colAlias; + + // 6.3 Get rid of TOK_SELEXPR + expr = (ASTNode) child.getChild(0); + String[] colRef = SemanticAnalyzer.getColAlias(child, getAutogenColAliasPrfxLbl(), inputRR, + autogenColAliasPrfxIncludeFuncName(), i); + tabAlias = colRef[0]; + colAlias = colRef[1]; + + // 6.4 Build ExprNode corresponding to colums + if (expr.getType() == HiveParser.TOK_ALLCOLREF) { + pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : SemanticAnalyzer + .getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list, + excludedColumns, inputRR, starRR, pos, out_rwsch, tabAliasesForAllProjs, true); + selectStar = true; + } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL + && !hasAsClause + && !inputRR.getIsExprResolver() + && SemanticAnalyzer.isRegex( + SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), conf)) { + // In case the expression is a regex COL. + // This can only happen without AS clause + // We don't allow this for ExprResolver - the Group By case + pos = genColListRegex(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), + null, expr, col_list, excludedColumns, inputRR, starRR, pos, out_rwsch, + tabAliasesForAllProjs, true); + } else if (expr.getType() == HiveParser.DOT + && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL + && inputRR.hasTableAlias(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0) + .getChild(0).getText().toLowerCase())) + && !hasAsClause + && !inputRR.getIsExprResolver() + && SemanticAnalyzer.isRegex( + SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), conf)) { + // In case the expression is TABLE.COL (col can be regex). + // This can only happen without AS clause + // We don't allow this for ExprResolver - the Group By case + pos = genColListRegex( + SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), + SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText() + .toLowerCase()), expr, col_list, excludedColumns, inputRR, starRR, pos, + out_rwsch, tabAliasesForAllProjs, true); + } else if (expr.toStringTree().contains("TOK_FUNCTIONDI") + && !(srcRel instanceof HiveAggregate)) { + // Likely a malformed query eg, select hash(distinct c1) from t1; + throw new CalciteSemanticException("Distinct without an aggreggation."); + } else { + // Case when this is an expression + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + // We allow stateful functions in the SELECT list (but nowhere else) + tcCtx.setAllowStatefulFunctions(true); + ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx); + String recommended = recommendName(exp, colAlias); + if (recommended != null && out_rwsch.get(null, recommended) == null) { + colAlias = recommended; + } + col_list.add(exp); + + ColumnInfo colInfo = new ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos), + exp.getWritableObjectInspector(), tabAlias, false); + colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp) + .isSkewedCol() : false); + if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) { + throw new CalciteSemanticException("Cannot add column to RR: " + tabAlias + "." + + colAlias + " => " + colInfo + " due to duplication, see previous warnings"); + } + + if (exp instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp; + String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn()); + if (altMapping != null) { + out_rwsch.put(altMapping[0], altMapping[1], colInfo); + } + } + + pos = Integer.valueOf(pos.intValue() + 1); + } + } + selectStar = selectStar && exprList.getChildCount() == posn + 1; + + // 7. Convert Hive projections to Calcite + List calciteColLst = new ArrayList(); + RexNodeConverter rexNodeConv = new RexNodeConverter(cluster, srcRel.getRowType(), + buildHiveColNameToInputPosMap(col_list, inputRR), 0, false); + for (ExprNodeDesc colExpr : col_list) { + calciteColLst.add(rexNodeConv.convert(colExpr)); + } + + // 8. Build Calcite Rel + RelNode selRel = genSelectRelNode(calciteColLst, out_rwsch, srcRel); + + return selRel; + } + + private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException { + if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { + return genLogicalPlan(qbexpr.getQB(), false); + } + if (qbexpr.getOpcode() == QBExpr.Opcode.UNION) { + RelNode qbexpr1Ops = genLogicalPlan(qbexpr.getQBExpr1()); + RelNode qbexpr2Ops = genLogicalPlan(qbexpr.getQBExpr2()); + + return genUnionLogicalPlan(qbexpr.getAlias(), qbexpr.getQBExpr1().getAlias(), qbexpr1Ops, + qbexpr.getQBExpr2().getAlias(), qbexpr2Ops); + } + return null; + } + + private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticException { + RelNode srcRel = null; + RelNode filterRel = null; + RelNode gbRel = null; + RelNode gbHavingRel = null; + RelNode selectRel = null; + RelNode obRel = null; + RelNode limitRel = null; + + // First generate all the opInfos for the elements in the from clause + Map aliasToRel = new HashMap(); + + // 0. Check if we can handle the SubQuery; + // canHandleQbForCbo returns null if the query can be handled. + String reason = canHandleQbForCbo(queryProperties, conf, false, LOG.isDebugEnabled()); + if (reason != null) { + String msg = "CBO can not handle Sub Query"; + if (LOG.isDebugEnabled()) { + LOG.debug(msg + " because it: " + reason); + } + throw new CalciteSemanticException(msg); + } + + // 1. Build Rel For Src (SubQuery, TS, Join) + // 1.1. Recurse over the subqueries to fill the subquery part of the plan + for (String subqAlias : qb.getSubqAliases()) { + QBExpr qbexpr = qb.getSubqForAlias(subqAlias); + aliasToRel.put(subqAlias, genLogicalPlan(qbexpr)); + } + + // 1.2 Recurse over all the source tables + for (String tableAlias : qb.getTabAliases()) { + RelNode op = genTableLogicalPlan(tableAlias, qb); + aliasToRel.put(tableAlias, op); + } + + if (aliasToRel.isEmpty()) { + // // This may happen for queries like select 1; (no source table) + // We can do following which is same, as what Hive does. + // With this, we will be able to generate Calcite plan. + // qb.getMetaData().setSrcForAlias(DUMMY_TABLE, getDummyTable()); + // RelNode op = genTableLogicalPlan(DUMMY_TABLE, qb); + // qb.addAlias(DUMMY_TABLE); + // qb.setTabAlias(DUMMY_TABLE, DUMMY_TABLE); + // aliasToRel.put(DUMMY_TABLE, op); + // However, Hive trips later while trying to get Metadata for this dummy + // table + // So, for now lets just disable this. Anyway there is nothing much to + // optimize in such cases. + throw new CalciteSemanticException("Unsupported"); + + } + // 1.3 process join + if (qb.getParseInfo().getJoinExpr() != null) { + srcRel = genJoinLogicalPlan(qb.getParseInfo().getJoinExpr(), aliasToRel); + } else { + // If no join then there should only be either 1 TS or 1 SubQuery + srcRel = aliasToRel.values().iterator().next(); + } + + // 2. Build Rel for where Clause + filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, false); + srcRel = (filterRel == null) ? srcRel : filterRel; + RelNode starSrcRel = srcRel; + + // 3. Build Rel for GB Clause + gbRel = genGBLogicalPlan(qb, srcRel); + srcRel = (gbRel == null) ? srcRel : gbRel; + + // 4. Build Rel for GB Having Clause + gbHavingRel = genGBHavingLogicalPlan(qb, srcRel, aliasToRel); + srcRel = (gbHavingRel == null) ? srcRel : gbHavingRel; + + // 5. Build Rel for Select Clause + selectRel = genSelectLogicalPlan(qb, srcRel, starSrcRel); + srcRel = (selectRel == null) ? srcRel : selectRel; + + // 6. Build Rel for OB Clause + Pair obTopProjPair = genOBLogicalPlan(qb, srcRel, outerMostQB); + obRel = obTopProjPair.getKey(); + RelNode topConstrainingProjArgsRel = obTopProjPair.getValue(); + srcRel = (obRel == null) ? srcRel : obRel; + + // 7. Build Rel for Limit Clause + limitRel = genLimitLogicalPlan(qb, srcRel); + srcRel = (limitRel == null) ? srcRel : limitRel; + + // 8. Introduce top constraining select if needed. + // NOTES: + // 1. Calcite can not take an expr in OB; hence it needs to be added as VC + // in the input select; In such cases we need to introduce a select on top + // to ensure VC is not visible beyond Limit, OB. + // 2. Hive can not preserve order across select. In subqueries OB is used + // to get a deterministic set of tuples from following limit. Hence we + // introduce the constraining select above Limit (if present) instead of + // OB. + // 3. The top level OB will not introduce constraining select due to Hive + // limitation(#2) stated above. The RR for OB will not include VC. Thus + // Result Schema will not include exprs used by top OB. During AST Conv, + // in the PlanModifierForASTConv we would modify the top level OB to + // migrate exprs from input sel to SortRel (Note that Calcite doesn't + // support this; but since we are done with Calcite at this point its OK). + if (topConstrainingProjArgsRel != null) { + List originalInputRefs = Lists.transform(topConstrainingProjArgsRel.getRowType() + .getFieldList(), new Function() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + RowResolver topConstrainingProjRR = new RowResolver(); + if (!RowResolver.add(topConstrainingProjRR, + this.relToHiveRR.get(topConstrainingProjArgsRel))) { + LOG.warn("Duplicates detected when adding columns to RR: see previous message"); + } + srcRel = genSelectRelNode(originalInputRefs, topConstrainingProjRR, srcRel); + } + + // 9. Incase this QB corresponds to subquery then modify its RR to point + // to subquery alias + // TODO: cleanup this + if (qb.getParseInfo().getAlias() != null) { + RowResolver rr = this.relToHiveRR.get(srcRel); + RowResolver newRR = new RowResolver(); + String alias = qb.getParseInfo().getAlias(); + for (ColumnInfo colInfo : rr.getColumnInfos()) { + String name = colInfo.getInternalName(); + String[] tmp = rr.reverseLookup(name); + if ("".equals(tmp[0]) || tmp[1] == null) { + // ast expression is not a valid column name for table + tmp[1] = colInfo.getInternalName(); + } + ColumnInfo newCi = new ColumnInfo(colInfo); + newCi.setTabAlias(alias); + newRR.put(alias, tmp[1], newCi); + } + relToHiveRR.put(srcRel, newRR); + relToHiveColNameCalcitePosMap.put(srcRel, buildHiveToCalciteColumnMap(newRR, srcRel)); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Created Plan for Query Block " + qb.getId()); + } + + return srcRel; + } + + private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, Map aliasToRel) + throws SemanticException { + RelNode gbFilter = null; + QBParseInfo qbp = getQBParseInfo(qb); + ASTNode havingClause = qbp.getHavingForClause(qbp.getClauseNames().iterator().next()); + + if (havingClause != null) { + if (!(srcRel instanceof HiveAggregate)) { + // ill-formed query like select * from t1 having c1 > 0; + throw new CalciteSemanticException("Having clause without any group-by."); + } + validateNoHavingReferenceToAlias(qb, (ASTNode) havingClause.getChild(0)); + gbFilter = genFilterRelNode(qb, (ASTNode) havingClause.getChild(0), srcRel, aliasToRel, + true); + } + + return gbFilter; + } + + /* + * Bail if having clause uses Select Expression aliases for Aggregation + * expressions. We could do what Hive does. But this is non standard + * behavior. Making sure this doesn't cause issues when translating through + * Calcite is not worth it. + */ + private void validateNoHavingReferenceToAlias(QB qb, ASTNode havingExpr) + throws CalciteSemanticException { + + QBParseInfo qbPI = qb.getParseInfo(); + Map exprToAlias = qbPI.getAllExprToColumnAlias(); + /* + * a mouthful, but safe: - a QB is guaranteed to have atleast 1 + * destination - we don't support multi insert, so picking the first dest. + */ + Set aggExprs = qbPI.getDestToAggregationExprs().values().iterator().next().keySet(); + + for (Map.Entry selExpr : exprToAlias.entrySet()) { + ASTNode selAST = selExpr.getKey(); + if (!aggExprs.contains(selAST.toStringTree().toLowerCase())) { + continue; + } + final String aliasToCheck = selExpr.getValue(); + final Set aliasReferences = new HashSet(); + TreeVisitorAction action = new TreeVisitorAction() { + + @Override + public Object pre(Object t) { + if (ParseDriver.adaptor.getType(t) == HiveParser.TOK_TABLE_OR_COL) { + Object c = ParseDriver.adaptor.getChild(t, 0); + if (c != null && ParseDriver.adaptor.getType(c) == HiveParser.Identifier + && ParseDriver.adaptor.getText(c).equals(aliasToCheck)) { + aliasReferences.add(t); + } + } + return t; + } + + @Override + public Object post(Object t) { + return t; + } + }; + new TreeVisitor(ParseDriver.adaptor).visit(havingExpr, action); + + if (aliasReferences.size() > 0) { + String havingClause = ctx.getTokenRewriteStream().toString( + havingExpr.getTokenStartIndex(), havingExpr.getTokenStopIndex()); + String msg = String.format("Encountered Select alias '%s' in having clause '%s'" + + " This non standard behavior is not supported with cbo on." + + " Turn off cbo for these queries.", aliasToCheck, havingClause); + LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + } + + } + + private ImmutableMap buildHiveToCalciteColumnMap(RowResolver rr, RelNode rNode) { + ImmutableMap.Builder b = new ImmutableMap.Builder(); + for (ColumnInfo ci : rr.getRowSchema().getSignature()) { + b.put(ci.getInternalName(), rr.getPosition(ci.getInternalName())); + } + return b.build(); + } + + private ImmutableMap buildHiveColNameToInputPosMap( + List col_list, RowResolver inputRR) { + // Build a map of Hive column Names (ExprNodeColumnDesc Name) + // to the positions of those projections in the input + Map hashCodeTocolumnDescMap = new HashMap(); + ExprNodeDescUtils.getExprNodeColumnDesc(col_list, hashCodeTocolumnDescMap); + ImmutableMap.Builder hiveColNameToInputPosMapBuilder = new ImmutableMap.Builder(); + String exprNodecolName; + for (ExprNodeDesc exprDesc : hashCodeTocolumnDescMap.values()) { + exprNodecolName = ((ExprNodeColumnDesc) exprDesc).getColumn(); + hiveColNameToInputPosMapBuilder.put(exprNodecolName, inputRR.getPosition(exprNodecolName)); + } + + return hiveColNameToInputPosMapBuilder.build(); + } + + private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException { + QBParseInfo qbp = qb.getParseInfo(); + if (qbp.getClauseNames().size() > 1) { + String msg = String.format("Multi Insert is currently not supported in CBO," + + " turn off cbo to use Multi Insert."); + LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + return qbp; + } + + private List getTabAliases(RowResolver inputRR) { + List tabAliases = new ArrayList(); // TODO: this should be + // unique + for (ColumnInfo ci : inputRR.getColumnInfos()) { + tabAliases.add(ci.getTabAlias()); + } + + return tabAliases; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java index 8416cff..3d7206b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java @@ -22,8 +22,8 @@ import java.util.ArrayList; import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -35,20 +35,23 @@ private final Map> tableToColumnAccessMap; public ColumnAccessInfo() { - tableToColumnAccessMap = new HashMap>(); + // Must be deterministic order map for consistent q-test output across Java versions + tableToColumnAccessMap = new LinkedHashMap>(); } public void add(String table, String col) { Set tableColumns = tableToColumnAccessMap.get(table); if (tableColumns == null) { - tableColumns = new HashSet(); + // Must be deterministic order set for consistent q-test output across Java versions + tableColumns = new LinkedHashSet(); tableToColumnAccessMap.put(table, tableColumns); } tableColumns.add(col); } public Map> getTableToColumnAccessMap() { - Map> mapping = new HashMap>(); + // Must be deterministic order map for consistent q-test output across Java versions + Map> mapping = new LinkedHashMap>(); for (Map.Entry> entry : tableToColumnAccessMap.entrySet()) { List sortedCols = new ArrayList(entry.getValue()); Collections.sort(sortedCols); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 73348d8..8979b4c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -59,7 +59,7 @@ private Table tbl; public ColumnStatsSemanticAnalyzer(HiveConf conf) throws SemanticException { - super(conf, false); + super(conf); } private boolean shouldRewrite(ASTNode tree) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 5377082..2b23559 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -1692,7 +1692,8 @@ private void analyzeAlterTableCompact(ASTNode ast, String tableName, } static HashMap getProps(ASTNode prop) { - HashMap mapProp = new HashMap(); + // Must be deterministic order map for consistent q-test output across Java versions + HashMap mapProp = new LinkedHashMap(); readProps(prop, mapProp); return mapProp; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/MacroSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/MacroSemanticAnalyzer.java index 809c0f9..e3ba201 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/MacroSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/MacroSemanticAnalyzer.java @@ -132,7 +132,9 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) throw new SemanticException("At least one parameter name was used more than once " + macroColNames); } - SemanticAnalyzer sa = new SemanticAnalyzer(conf); + SemanticAnalyzer sa = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED) ? new CalcitePlanner( + conf) : new SemanticAnalyzer(conf); + ; ExprNodeDesc body; if(isNoArgumentMacro) { body = sa.genExprNodeDesc((ASTNode)ast.getChild(1), rowResolver); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java index ec69bac..1b6de64 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java @@ -282,10 +282,10 @@ protected void generateTaskTree(List> rootTasks, Pa // generate map reduce plans ParseContext tempParseContext = getParseContext(pCtx, rootTasks); - GenMRProcContext procCtx = new GenMRProcContext( conf, - new HashMap, Task>(), + // Must be deterministic order map for consistent q-test output across Java versions + new LinkedHashMap, Task>(), tempParseContext, mvTask, rootTasks, new LinkedHashMap, GenMapRedCtx>(), inputs, outputs); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index 0497e5a..cf6941c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -262,8 +262,15 @@ public boolean isTopLevelSelectStarQuery() { // to find target for fetch task conversion optimizer (not allows subqueries) public boolean isSimpleSelectQuery() { - return qbp.isSimpleSelectQuery() && aliasToSubq.isEmpty() && !isCTAS() && - !qbp.isAnalyzeCommand(); + if (!qbp.isSimpleSelectQuery() || isCTAS() || qbp.isAnalyzeCommand()) { + return false; + } + for (QBExpr qbexpr : aliasToSubq.values()) { + if (!qbexpr.isSimpleSelectQuery()) { + return false; + } + } + return true; } public boolean hasTableSample(String alias) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java index e923bca..36e65da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java @@ -114,4 +114,10 @@ public void print(String msg) { } } + public boolean isSimpleSelectQuery() { + if (qb != null) { + return qb.isSimpleSelectQuery(); + } + return qbexpr1.isSimpleSelectQuery() && qbexpr2.isSimpleSelectQuery(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index 02c4be9..3e51188 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -466,10 +466,12 @@ public boolean isTopLevelSimpleSelectStarQuery() { return true; } + // for fast check of possible existence of RS (will be checked again in SimpleFetchOptimizer) public boolean isSimpleSelectQuery() { - if (isSubQ || joinExpr != null || !destToOrderby.isEmpty() || !destToSortby.isEmpty() + if (joinExpr != null || !destToOrderby.isEmpty() || !destToSortby.isEmpty() || !destToGroupby.isEmpty() || !destToClusterby.isEmpty() || !destToDistributeby.isEmpty() - || !aliasToLateralViews.isEmpty() || !destToLateralView.isEmpty()) { + || !destRollups.isEmpty() || !destCubes.isEmpty() || !destGroupingSets.isEmpty() + || !destToHaving.isEmpty()) { return false; } @@ -491,6 +493,7 @@ public boolean isSimpleSelectQuery() { } } + // exclude insert queries for (ASTNode v : nameToDest.values()) { if (!(v.getChild(0).getType() == HiveParser.TOK_TMP_FILE)) { return false; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g index 1855d7f..eba3689 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g @@ -125,9 +125,10 @@ selectItem @init { gParent.pushMsg("selection target", state); } @after { gParent.popMsg(state); } : - ( selectExpression + ( expression ((KW_AS? identifier) | (KW_AS LPAREN identifier (COMMA identifier)* RPAREN))? - ) -> ^(TOK_SELEXPR selectExpression identifier*) + ) -> ^(TOK_SELEXPR expression identifier*) + | tableAllColumns -> ^(TOK_SELEXPR tableAllColumns) ; trfmClause diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d1de485..c2d5c8c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -22,95 +22,26 @@ import java.io.IOException; import java.io.Serializable; -import java.lang.reflect.Field; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.UndeclaredThrowableException; -import java.math.BigDecimal; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; -import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; import java.util.UUID; -import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.antlr.runtime.ClassicToken; import org.antlr.runtime.Token; import org.antlr.runtime.tree.Tree; -import org.antlr.runtime.tree.TreeVisitor; -import org.antlr.runtime.tree.TreeVisitorAction; import org.antlr.runtime.tree.TreeWizard; import org.antlr.runtime.tree.TreeWizard.ContextVisitor; -import org.apache.calcite.plan.RelOptCluster; -import org.apache.calcite.plan.RelOptPlanner; -import org.apache.calcite.plan.RelOptQuery; -import org.apache.calcite.plan.RelOptRule; -import org.apache.calcite.plan.RelOptSchema; -import org.apache.calcite.plan.RelOptUtil; -import org.apache.calcite.plan.RelTraitSet; -import org.apache.calcite.plan.hep.HepMatchOrder; -import org.apache.calcite.plan.hep.HepPlanner; -import org.apache.calcite.plan.hep.HepProgram; -import org.apache.calcite.plan.hep.HepProgramBuilder; -import org.apache.calcite.rel.InvalidRelException; -import org.apache.calcite.rel.RelCollation; -import org.apache.calcite.rel.RelCollationImpl; -import org.apache.calcite.rel.RelFieldCollation; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Aggregate; -import org.apache.calcite.rel.core.AggregateCall; -import org.apache.calcite.rel.core.Filter; -import org.apache.calcite.rel.core.Join; -import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.core.RelFactories; -import org.apache.calcite.rel.core.SemiJoin; -import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; -import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; -import org.apache.calcite.rel.metadata.RelMetadataProvider; -import org.apache.calcite.rel.rules.FilterAggregateTransposeRule; -import org.apache.calcite.rel.rules.FilterMergeRule; -import org.apache.calcite.rel.rules.FilterProjectTransposeRule; -import org.apache.calcite.rel.rules.FilterSetOpTransposeRule; -import org.apache.calcite.rel.rules.JoinPushTransitivePredicatesRule; -import org.apache.calcite.rel.rules.JoinToMultiJoinRule; -import org.apache.calcite.rel.rules.LoptOptimizeJoinRule; -import org.apache.calcite.rel.rules.SemiJoinFilterTransposeRule; -import org.apache.calcite.rel.rules.SemiJoinJoinTransposeRule; -import org.apache.calcite.rel.rules.SemiJoinProjectTransposeRule; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.rel.type.RelDataTypeField; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexFieldCollation; -import org.apache.calcite.rex.RexInputRef; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.rex.RexWindowBound; -import org.apache.calcite.schema.SchemaPlus; -import org.apache.calcite.sql.SqlAggFunction; -import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlExplainLevel; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.SqlLiteral; -import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.sql.SqlWindow; -import org.apache.calcite.sql.parser.SqlParserPos; -import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.calcite.sql2rel.RelFieldTrimmer; -import org.apache.calcite.tools.Frameworks; -import org.apache.calcite.util.CompositeList; -import org.apache.calcite.util.ImmutableBitSet; -import org.apache.calcite.util.ImmutableIntList; -import org.apache.calcite.util.Pair; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -179,32 +110,9 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.Optimizer; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; -import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; -import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinCondTypeCheckProcFactory; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinTypeCheckCtx; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType; +import org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFInputSpec; @@ -288,7 +196,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -297,12 +204,6 @@ import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.OutputFormat; -import com.google.common.base.Function; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableList.Builder; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; - /** * Implementation of the semantic analyzer. It generates the query plan. * There are other specific semantic analyzers for some hive operations such as @@ -369,8 +270,7 @@ //flag for partial scan during analyze ... compute statistics protected boolean partialscan; - private volatile boolean runCBO = true; // TODO: why is this volatile? - private volatile boolean disableJoinMerge = false; + protected volatile boolean disableJoinMerge = false; /* * Capture the CTE definitions in a Query. @@ -382,18 +282,13 @@ private ArrayList ctesExpanded; /** Not thread-safe. */ - private final ASTSearcher astSearcher = new ASTSearcher(); + final ASTSearcher astSearcher = new ASTSearcher(); - private static class Phase1Ctx { + static class Phase1Ctx { String dest; int nextNum; } - protected SemanticAnalyzer(HiveConf conf, boolean runCBO) throws SemanticException { - this(conf); - this.runCBO = runCBO; - } - public SemanticAnalyzer(HiveConf conf) throws SemanticException { super(conf); opToPartPruner = new HashMap(); @@ -408,7 +303,8 @@ public SemanticAnalyzer(HiveConf conf) throws SemanticException { opParseCtx = new LinkedHashMap, OpParseContext>(); joinContext = new HashMap(); smbMapJoinContext = new HashMap(); - topToTable = new HashMap(); + // Must be deterministic order map for consistent q-test output across Java versions + topToTable = new LinkedHashMap(); fsopToTable = new HashMap(); reduceSinkOperatorsAddedByEnforceBucketingSorting = new ArrayList(); topToTableProps = new HashMap>(); @@ -674,6 +570,14 @@ public static String generateErrorMessage(ASTNode ast, String message) { return sb.toString(); } + ASTNode getAST() { + return this.ast; + } + + protected void setAST(ASTNode newAST) { + this.ast = newAST; + } + /** * Goes though the tabref tree and finds the alias for the table. Once found, * it records the table name-> alias association in aliasToTabs. It also makes @@ -805,6 +709,10 @@ private String processTable(QB qb, ASTNode tabref) throws SemanticException { return alias; } + Map getNameToSplitSampleMap() { + return this.nameToSplitSample; + } + // Generate a temp table out of a value clause private ASTNode genValuesTempTable(ASTNode originalFrom) throws SemanticException { // Pick a name for the table @@ -1074,7 +982,7 @@ private void addCTEAsSubQuery(QB qb, String cteName, String cteAlias) throws Sem qb.rewriteCTEToSubq(cteAlias, cteName, cteQBExpr); } - private boolean isJoinToken(ASTNode node) { + static boolean isJoinToken(ASTNode node) { if ((node.getToken().getType() == HiveParser.TOK_JOIN) || (node.getToken().getType() == HiveParser.TOK_CROSSJOIN) || isOuterJoinToken(node) @@ -1086,7 +994,7 @@ private boolean isJoinToken(ASTNode node) { return false; } - private boolean isOuterJoinToken(ASTNode node) { + static private boolean isOuterJoinToken(ASTNode node) { return (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN); @@ -1182,30 +1090,6 @@ private String processLateralView(QB qb, ASTNode lateralView) return alias; } - /** The context that doPhase1 uses to populate information pertaining - * to CBO (currently, this is used for CTAS and insert-as-select). */ - private static class PreCboCtx { - enum Type { - NONE, - INSERT, - CTAS, - - UNEXPECTED - } - public ASTNode nodeOfInterest; - public Type type = Type.NONE; - public void set(Type type, ASTNode ast) { - if (this.type != Type.NONE) { - STATIC_LOG.warn("Setting " + type + " when already " + this.type - + "; node " + ast.dump() + " vs old node " + nodeOfInterest.dump()); - this.type = Type.UNEXPECTED; - return; - } - this.type = type; - this.nodeOfInterest = ast; - } - } - /** * Phase 1: (including, but not limited to): * @@ -1223,7 +1107,7 @@ public void set(Type type, ASTNode ast) { * @throws SemanticException */ @SuppressWarnings({"fallthrough", "nls"}) - public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PreCboCtx cboCtx) + public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) throws SemanticException { boolean phase1Result = true; @@ -1285,9 +1169,11 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PreCboCtx cboCtx) if (qbp.getIsSubQ() && !isTmpFileDest) { throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast)); } - if (cboCtx != null && !isTmpFileDest) { - cboCtx.set(PreCboCtx.Type.INSERT, ast); + + if (plannerCtx != null) { + plannerCtx.setInsertToken(ast, isTmpFileDest); } + qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0)); if (qbp.getClauseNamesForDest().size() > 1) { @@ -1516,17 +1402,12 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PreCboCtx cboCtx) for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) { // Recurse phase1Result = phase1Result && doPhase1( - (ASTNode)ast.getChild(child_pos), qb, ctx_1, cboCtx); + (ASTNode)ast.getChild(child_pos), qb, ctx_1, plannerCtx); } } return phase1Result; } - private void traceLogAst(ASTNode ast, String what) { - if (!LOG.isTraceEnabled()) return; - LOG.trace(what + ast.dump()); - } - private void getMetaData(QBExpr qbexpr, ReadEntity parentInput) throws SemanticException { if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { @@ -2761,7 +2642,7 @@ private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, @SuppressWarnings("nls") // TODO: make aliases unique, otherwise needless rewriting takes place - private Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, + Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, ArrayList col_list, HashSet excludeCols, RowResolver input, RowResolver colSrcRR, Integer pos, RowResolver output, List aliases, boolean ensureUniqueCols) throws SemanticException { @@ -2907,7 +2788,7 @@ private String getScriptArgs(String cmd) { return (end == -1) ? "" : cmd.substring(end, cmd.length()); } - private static int getPositionFromInternalName(String internalName) { + static int getPositionFromInternalName(String internalName) { return HiveConf.getPositionFromInternalName(internalName); } @@ -3365,7 +3246,7 @@ private int setBit(int bitmap, int bitIdx) { } } - private static String[] getColAlias(ASTNode selExpr, String defaultName, + static String[] getColAlias(ASTNode selExpr, String defaultName, RowResolver inputRR, boolean includeFuncName, int colNum) { String colAlias = null; String tabAlias = null; @@ -3441,7 +3322,7 @@ private int setBit(int bitmap, int bitIdx) { * Returns whether the pattern is a regex expression (instead of a normal * string). Normal string is a string with all alphabets/digits and "_". */ - private static boolean isRegex(String pattern, HiveConf conf) { + static boolean isRegex(String pattern, HiveConf conf) { String qIdSupport = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT); if ( "column".equals(qIdSupport)) { return false; @@ -3734,7 +3615,7 @@ private static boolean isRegex(String pattern, HiveConf conf) { return output; } - private String recommendName(ExprNodeDesc exp, String colAlias) { + String recommendName(ExprNodeDesc exp, String colAlias) { if (!colAlias.startsWith(autogenColAliasPrfxLbl)) { return null; } @@ -3745,6 +3626,14 @@ private String recommendName(ExprNodeDesc exp, String colAlias) { return null; } + String getAutogenColAliasPrfxLbl() { + return this.autogenColAliasPrfxLbl; + } + + boolean autogenColAliasPrfxIncludeFuncName() { + return this.autogenColAliasPrfxIncludeFuncName; + } + /** * Class to store GenericUDAF related information. */ @@ -3851,7 +3740,7 @@ static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, return r; } - private static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode( + static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode( GroupByDesc.Mode mode, boolean isDistinct) { switch (mode) { case COMPLETE: @@ -6292,9 +6181,7 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) if (!("".equals(nm[0])) && nm[1] != null) { colName = unescapeIdentifier(colInfo.getAlias()).toLowerCase(); // remove `` } - if (runCBO) { - colName = fixCtasColumnName(colName); - } + colName = fixCtasColumnName(colName); col.setName(colName); col.setType(colInfo.getType().getTypeName()); field_schemas.add(col); @@ -6376,7 +6263,7 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) } else { try { StructObjectInspector rowObjectInspector = (StructObjectInspector) table_desc - .getDeserializer().getObjectInspector(); + .getDeserializer(conf).getObjectInspector(); List fields = rowObjectInspector .getAllStructFieldRefs(); for (int i = 0; i < fields.size(); i++) { @@ -6473,12 +6360,8 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) return output; } - private static String fixCtasColumnName(String colName) { - int lastDot = colName.lastIndexOf('.'); - if (lastDot < 0) return colName; // alias is not fully qualified - String nqColumnName = colName.substring(lastDot + 1); - STATIC_LOG.debug("Replacing " + colName + " (produced by CBO) by " + nqColumnName); - return nqColumnName; + String fixCtasColumnName(String colName) { + return colName; } // Check constraints on acid tables. This includes @@ -8334,6 +8217,10 @@ private void mergeJoins(QB qb, QBJoinTree node, QBJoinTree target, int pos, int[ return new ObjectPair(res, tgtToNodeExprMap); } + boolean continueJoinMerge() { + return true; + } + // try merge join tree from inner most source // (it was merged from outer most to inner, which could be invalid) // @@ -8374,7 +8261,7 @@ private void mergeJoinTree(QB qb) { if (!node.getNoOuterJoin() || !target.getNoOuterJoin()) { if (node.getRightAliases().length + target.getRightAliases().length + 1 > 16) { LOG.info(ErrorMsg.JOINNODE_OUTERJOIN_MORETHAN_16); - continueScanning = !runCBO; + continueScanning = continueJoinMerge(); continue; } } @@ -8386,7 +8273,7 @@ private void mergeJoinTree(QB qb) { * for CBO provided orderings, don't attempt to reorder joins. * only convert consecutive joins into n-way joins. */ - continueScanning = !runCBO; + continueScanning = continueJoinMerge(); if (prevType == null) { prevType = currType; } @@ -9550,7 +9437,7 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { return output; } - private boolean isSkewedCol(String alias, QB qb, String colName) { + static boolean isSkewedCol(String alias, QB qb, String colName) { boolean isSkewedCol = false; List skewedCols = qb.getSkewedColumnNames(alias); for (String skewedCol : skewedCols) { @@ -9759,7 +9646,7 @@ public Operator genPlan(QB qb, boolean skipAmbiguityCheck) rewriteRRForSubQ(qb.getAlias(), bodyOpInfo, skipAmbiguityCheck); } - this.qb = qb; + setQB(qb); return bodyOpInfo; } @@ -9990,174 +9877,134 @@ public void init(boolean clearPartsCache) { this.qb = qb; } + boolean analyzeCreateTable(ASTNode child) throws SemanticException { + if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) { + // if it is not CTAS, we don't need to go further and just return + if ((child = analyzeCreateTable(ast, qb, null)) == null) { + return true; + } + } else { + SessionState.get().setCommandType(HiveOperation.QUERY); + } + + return false; + } + @Override @SuppressWarnings("nls") public void analyzeInternal(ASTNode ast) throws SemanticException { + analyzeInternal(ast, new PlannerContext()); + } + + /** + * Planner specific stuff goen in here. + */ + static class PlannerContext { + protected ASTNode child; + protected Phase1Ctx ctx_1; + + void setParseTreeAttr(ASTNode child, Phase1Ctx ctx_1) { + this.child = child; + this.ctx_1 = ctx_1; + } + + void setCTASToken(ASTNode child) { + } + + void setInsertToken(ASTNode ast, boolean isTmpFileDest) { + } + } + + boolean genResolvedParseTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException { ASTNode child = ast; this.ast = ast; viewsExpanded = new ArrayList(); ctesExpanded = new ArrayList(); - LOG.info("Starting Semantic Analysis"); - - // analyze and process the position alias + // 1. analyze and process the position alias processPositionAlias(ast); - // Check configuration for CBO first. - runCBO = runCBO && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED); - // analyze create table command - PreCboCtx cboCtx = runCBO ? new PreCboCtx() : null; + // 2. analyze create table command if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) { // if it is not CTAS, we don't need to go further and just return - if ((child = analyzeCreateTable(ast, qb, cboCtx)) == null) { - return; + if ((child = analyzeCreateTable(ast, qb, plannerCtx)) == null) { + return false; } } else { SessionState.get().setCommandType(HiveOperation.QUERY); } - // analyze create view command - if (ast.getToken().getType() == HiveParser.TOK_CREATEVIEW || - (ast.getToken().getType() == HiveParser.TOK_ALTERVIEW - && ast.getChild(1).getType() == HiveParser.TOK_QUERY)) { + // 3. analyze create view command + if (ast.getToken().getType() == HiveParser.TOK_CREATEVIEW + || (ast.getToken().getType() == HiveParser.TOK_ALTERVIEW && ast.getChild(1).getType() == HiveParser.TOK_QUERY)) { child = analyzeCreateView(ast, qb); SessionState.get().setCommandType(HiveOperation.CREATEVIEW); if (child == null) { - return; + return false; } viewSelect = child; // prevent view from referencing itself viewsExpanded.add(createVwDesc.getViewName()); } - // continue analyzing from the child ASTNode. + // 4. continue analyzing from the child ASTNode. Phase1Ctx ctx_1 = initPhase1Ctx(); - if (!doPhase1(child, qb, ctx_1, cboCtx)) { + if (!doPhase1(child, qb, ctx_1, plannerCtx)) { // if phase1Result false return - return; + return false; } - LOG.info("Completed phase 1 of Semantic Analysis"); + // 5. Resolve Parse Tree getMetaData(qb); LOG.info("Completed getting MetaData in Semantic Analysis"); - // Note: for now, we don't actually pass the queryForCbo to CBO, because it accepts qb, not - // AST, and can also access all the private stuff in SA. We rely on the fact that CBO - // ignores the unknown tokens (create table, destination), so if the query is otherwise ok, - // it is as if we did remove those and gave CBO the proper AST. That is kinda hacky. - if (runCBO) { - ASTNode queryForCbo = ast; - if (cboCtx.type == PreCboCtx.Type.CTAS) { - queryForCbo = cboCtx.nodeOfInterest; // nodeOfInterest is the query - } - runCBO = canHandleAstForCbo(queryForCbo, qb, cboCtx); - } - - // Save the result schema derived from the sink operator produced - // by genPlan. This has the correct column names, which clients - // such as JDBC would prefer instead of the c0, c1 we'll end - // up with later. - Operator sinkOp = null; + plannerCtx.setParseTreeAttr(child, ctx_1); - if (runCBO) { - disableJoinMerge = true; - CalciteBasedPlanner calcitePlanner = new CalciteBasedPlanner(); - boolean reAnalyzeAST = false; + return true; + } - try { - // 1. Gen Optimized AST - ASTNode newAST = calcitePlanner.getOptimizedAST(prunedPartitions); - - // 1.1. Fix up the query for insert/ctas - newAST = fixUpCtasAndInsertAfterCbo(ast, newAST, cboCtx); - - // 2. Regen OP plan from optimized AST - init(false); - if (cboCtx.type == PreCboCtx.Type.CTAS) { - // Redo create-table analysis, because it's not part of doPhase1. - newAST = reAnalyzeCtasAfterCbo(newAST); - } - ctx_1 = initPhase1Ctx(); - if (!doPhase1(newAST, qb, ctx_1, null)) { - throw new RuntimeException( - "Couldn't do phase1 on CBO optimized query plan"); - } - // unfortunately making prunedPartitions immutable is not possible here - // with SemiJoins not all tables are costed in CBO, - // so their PartitionList is not evaluated until the run phase. - //prunedPartitions = ImmutableMap.copyOf(prunedPartitions); - getMetaData(qb); - - disableJoinMerge = false; - sinkOp = genPlan(qb); - LOG.info("CBO Succeeded; optimized logical plan."); - LOG.debug(newAST.dump()); + Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException { + return genPlan(qb); + } - /* - * Use non CBO Result Set Schema so as to preserve user specified names. - * Hive seems to have bugs with OB/LIMIT in sub queries. // 3. Reset - * result set schema resultSchema = - * convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp) - * .getRowResolver(), true); - */ - } catch (Exception e) { - boolean isMissingStats = calcitePlanner.noColsMissingStats.get() > 0; - if (isMissingStats) { - LOG.error("CBO failed due to missing column stats (see previous errors), skipping CBO"); - } else { - LOG.error("CBO failed, skipping CBO. ", e); - } - if (!conf.getBoolVar(ConfVars.HIVE_IN_TEST) || isMissingStats - || e instanceof CalciteSemanticException) { - reAnalyzeAST = true; - } else if (e instanceof SemanticException) { - throw (SemanticException)e; - } else if (e instanceof RuntimeException) { - throw (RuntimeException)e; - } else { - throw new SemanticException(e); - } - } finally { - runCBO = false; - disableJoinMerge = false; - if (reAnalyzeAST) { - init(true); - prunedPartitions.clear(); - analyzeInternal(ast); - return; - } - } - } else { - sinkOp = genPlan(qb); + void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticException { + // 1. Generate Resolved Parse tree from syntax tree + LOG.info("Starting Semantic Analysis"); + if (!genResolvedParseTree(ast, plannerCtx)) { + return; } - if (createVwDesc != null) + // 2. Gen OP Tree from resolved Parse Tree + Operator sinkOp = genOPTree(ast, plannerCtx); + + // 3. Deduce Resultset Schema + if (createVwDesc != null) { resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver()); - else + } else { resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES)); + } - ParseContext pCtx = new ParseContext(conf, qb, child, opToPartPruner, - opToPartList, topOps, topSelOps, opParseCtx, joinContext, smbMapJoinContext, - topToTable, topToTableProps, fsopToTable, - loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, - listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, - opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, - opToPartToSkewedPruner, viewAliasToInput, - reduceSinkOperatorsAddedByEnforceBucketingSorting, queryProperties); + // 4. Generate Parse Context for Optimizer & Physical compiler + ParseContext pCtx = new ParseContext(conf, qb, plannerCtx.child, opToPartPruner, opToPartList, + topOps, topSelOps, opParseCtx, joinContext, smbMapJoinContext, topToTable, topToTableProps, + fsopToTable, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, + listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, opToSamplePruner, + globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, + viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, queryProperties); + // 5. Take care of view creation if (createVwDesc != null) { saveViewDefinition(); - // validate the create view statement - // at this point, the createVwDesc gets all the information for semantic check + // validate the create view statement at this point, the createVwDesc gets + // all the information for semanticcheck validateCreateView(createVwDesc); - // Since we're only creating a view (not executing it), we - // don't need to optimize or translate the plan (and in fact, those - // procedures can interfere with the view creation). So - // skip the rest of this method. + // Since we're only creating a view (not executing it), we don't need to + // optimize or translate the plan (and in fact, those procedures can + // interfere with the view creation). So skip the rest of this method. ctx.setResDir(null); ctx.setResFile(null); @@ -10169,176 +10016,58 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { return; } - // Generate table access stats if required + // 6. Generate table access stats if required if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS) == true) { TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx); setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess()); } + // 7. Perform Logical optimization if (LOG.isDebugEnabled()) { LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values())); } - Optimizer optm = new Optimizer(); optm.setPctx(pCtx); optm.initialize(conf); pCtx = optm.optimize(); - FetchTask origFetchTask = pCtx.getFetchTask(); - if (LOG.isDebugEnabled()) { LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values())); } - // Generate column access stats if required - wait until column pruning takes place - // during optimization + // 8. Generate column access stats if required - wait until column pruning + // takes place during optimization boolean isColumnInfoNeedForAuth = SessionState.get().isAuthorizationModeV2() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED); - if (isColumnInfoNeedForAuth || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS) == true) { ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx); setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess()); } + // 9. Optimize Physical op tree & Translate to target execution engine (MR, + // TEZ..) if (!ctx.getExplainLogical()) { - // At this point we have the complete operator tree - // from which we want to create the map-reduce plan TaskCompiler compiler = TaskCompilerFactory.getCompiler(conf, pCtx); compiler.init(conf, console, db); compiler.compile(pCtx, rootTasks, inputs, outputs); fetchTask = pCtx.getFetchTask(); } - LOG.info("Completed plan generation"); - // put accessed columns to readEntity + // 10. put accessed columns to readEntity if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) { putAccessedColumnsToReadEntity(inputs, columnAccessInfo); } + // 11. if desired check we're not going over partition scan limits if (!ctx.getExplain()) { - // if desired check we're not going over partition scan limits enforceScanLimits(pCtx, origFetchTask); } return; } - private ASTNode fixUpCtasAndInsertAfterCbo( - ASTNode originalAst, ASTNode newAst, PreCboCtx cboCtx) throws SemanticException { - switch (cboCtx.type) { - case NONE: return newAst; // nothing to do - case CTAS: { - // Patch the optimized query back into original CTAS AST, replacing the original query. - replaceASTChild(cboCtx.nodeOfInterest, newAst); - return originalAst; - } - case INSERT: { - // We need to patch the dest back to original into new query. - // This makes assumptions about the structure of the AST. - ASTNode newDest = astSearcher.simpleBreadthFirstSearch( - newAst, HiveParser.TOK_QUERY, HiveParser.TOK_INSERT, HiveParser.TOK_DESTINATION); - if (newDest == null) { - LOG.error("Cannot find destination after CBO; new ast is "+ newAst.dump()); - throw new SemanticException("Cannot find destination after CBO"); - } - replaceASTChild(newDest, cboCtx.nodeOfInterest); - return newAst; - } - default: throw new AssertionError("Unexpected type " + cboCtx.type); - } - } - - private ASTNode reAnalyzeCtasAfterCbo(ASTNode newAst) throws SemanticException { - // analyzeCreateTable uses this.ast, but doPhase1 doesn't, so only reset it here. - this.ast = newAst; - newAst = analyzeCreateTable(newAst, qb, null); - if (newAst == null) { - LOG.error("analyzeCreateTable failed to initialize CTAS after CBO;" - + " new ast is " + this.ast.dump()); - throw new SemanticException("analyzeCreateTable failed to initialize CTAS after CBO"); - } - return newAst; - } - - private boolean canHandleAstForCbo(ASTNode ast, QB qb, PreCboCtx cboCtx) { - int root = ast.getToken().getType(); - boolean needToLogMessage = LOG.isInfoEnabled(); - boolean isSupportedRoot = - root == HiveParser.TOK_QUERY || root == HiveParser.TOK_EXPLAIN || qb.isCTAS(); - // Check AST. - // Assumption: If top level QB is query then everything below it must also be Query - // Can there be an insert or CTAS that wouldn't - // be supported and would require additional checks similar to IsQuery? - boolean isSupportedType = - qb.getIsQuery() || qb.isCTAS() || cboCtx.type == PreCboCtx.Type.INSERT; - boolean noBadTokens = HiveCalciteUtil.validateASTForUnsupportedTokens(ast); - boolean result = isSupportedRoot && isSupportedType && createVwDesc == null && noBadTokens; - if (!result) { - if (needToLogMessage) { - String msg = ""; - if (!isSupportedRoot) msg += "doesn't have QUERY or EXPLAIN as root and not a CTAS; "; - if (!isSupportedType) msg += "is not a query, CTAS, or insert; "; - if (createVwDesc != null) msg += "has create view; "; - if (!noBadTokens) msg += "has unsupported tokens; "; - - if (msg.isEmpty()) msg += "has some unspecified limitations; "; - LOG.info("Not invoking CBO because the statement " + msg.substring(0, msg.length() - 2)); - } - return false; - } - // Now check QB in more detail. canHandleQbForCbo returns null if query can be handled. - String msg = canHandleQbForCbo(qb, true, needToLogMessage); - if (msg == null) { - return true; - } - if (needToLogMessage) { - LOG.info("Not invoking CBO because the statement " + msg.substring(0, msg.length() - 2)); - } - return false; - } - - private class ASTSearcher { - private final LinkedList searchQueue = new LinkedList(); - /** - * Performs breadth-first search of the AST for a nested set of tokens. Tokens don't have to be - * each others' direct children, they can be separated by layers of other tokens. For each token - * in the list, the first one found is matched and there's no backtracking; thus, if AST has - * multiple instances of some token, of which only one matches, it is not guaranteed to be found. - * We use this for simple things. - * Not thread-safe - reuses searchQueue. - */ - public ASTNode simpleBreadthFirstSearch(ASTNode ast, int... tokens) { - searchQueue.clear(); - searchQueue.add(ast); - for (int i = 0; i < tokens.length; ++i) { - boolean found = false; - int token = tokens[i]; - while (!searchQueue.isEmpty() && !found) { - ASTNode next = searchQueue.poll(); - found = next.getType() == token; - if (found) { - if (i == tokens.length - 1) return next; - searchQueue.clear(); - } - for (int j = 0; j < next.getChildCount(); ++j) { - searchQueue.add((ASTNode)next.getChild(j)); - } - } - if (!found) return null; - } - return null; - } - } - - private void replaceASTChild(ASTNode child, ASTNode newChild) { - ASTNode parent = (ASTNode)child.parent; - int childIndex = child.childIndex; - parent.deleteChild(childIndex); - parent.insertChild(childIndex, newChild); - } - private void putAccessedColumnsToReadEntity(HashSet inputs, ColumnAccessInfo columnAccessInfo) { Map> tableToColumnAccessMap = columnAccessInfo.getTableToColumnAccessMap(); if (tableToColumnAccessMap != null && !tableToColumnAccessMap.isEmpty()) { @@ -10514,13 +10243,13 @@ private void saveViewDefinition() throws SemanticException { createVwDesc.setViewExpandedText(expandedText); } - private List convertRowSchemaToViewSchema(RowResolver rr) throws SemanticException { + static List convertRowSchemaToViewSchema(RowResolver rr) throws SemanticException { List fieldSchema = convertRowSchemaToResultSetSchema(rr, false); ParseUtils.validateColumnNameUniqueness(fieldSchema); return fieldSchema; } - private List convertRowSchemaToResultSetSchema(RowResolver rr, + static List convertRowSchemaToResultSetSchema(RowResolver rr, boolean useTabAliasIfAvailable) { List fieldSchemas = new ArrayList(); String[] qualifiedColName; @@ -10835,8 +10564,8 @@ public RowResolver getRowResolver(Operator opt) { * the semantic analyzer need to deal with the select statement with respect * to the SerDe and Storage Format. */ - private ASTNode analyzeCreateTable( - ASTNode ast, QB qb, PreCboCtx cboCtx) throws SemanticException { + ASTNode analyzeCreateTable( + ASTNode ast, QB qb, PlannerContext plannerCtx) throws SemanticException { String[] qualifiedTabName = getQualifiedTableName((ASTNode) ast.getChild(0)); String dbDotTab = getDotName(qualifiedTabName); @@ -10926,8 +10655,8 @@ private ASTNode analyzeCreateTable( throw new SemanticException(ErrorMsg.CTAS_EXTTBL_COEXISTENCE.getMsg()); } command_type = CTAS; - if (cboCtx != null) { - cboCtx.set(PreCboCtx.Type.CTAS, child); + if (plannerCtx != null) { + plannerCtx.setCTASToken(child); } selectStmt = child; break; @@ -11184,6 +10913,10 @@ private ASTNode analyzeCreateView(ASTNode ast, QB qb) return selectStmt; } + CreateViewDesc getCreateViewDesc() { + return this.createVwDesc; + } + // validate the create view statement // the statement could be CREATE VIEW, REPLACE VIEW, or ALTER VIEW AS SELECT // check semantic conditions @@ -12495,2160 +12228,4 @@ protected boolean updating() { protected boolean deleting() { return false; } - - /**** Temporary Place Holder For Calcite plan Gen, Optimizer ****/ - - /** - * Entry point to Optimizations using Calcite. Checks whether Calcite can handle the query. - * @param qbToChk Query block to check. - * @param verbose Whether return value should be verbose in case of failure. - * @return null if the query can be handled; non-null reason string if it cannot be. - */ - private String canHandleQbForCbo(QB qbToChk, boolean topLevelQB, boolean verbose) { - // Assumption: - // 1. If top level QB is query then everything below it must also be Query - // 2. Nested Subquery will return false for qbToChk.getIsQuery() - boolean isInTest = conf.getBoolVar(ConfVars.HIVE_IN_TEST); - boolean isStrictTest = isInTest - && !conf.getVar(ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("nonstrict"); - boolean hasEnoughJoins = !topLevelQB || (queryProperties.getJoinCount() > 1) || isInTest; - if (!isStrictTest && hasEnoughJoins && !queryProperties.hasClusterBy() - && !queryProperties.hasDistributeBy() && !queryProperties.hasSortBy() - && !queryProperties.hasPTF() && !queryProperties.usesScript() - && !queryProperties.hasMultiDestQuery() && !queryProperties.hasLateralViews()) { - return null; // Ok to run CBO. - } - - // Not ok to run CBO, build error message. - String msg = ""; - if (verbose) { - if (isStrictTest) msg += "is in test running in mode other than nonstrict; "; - if (!hasEnoughJoins) msg += "has too few joins; "; - if (queryProperties.hasClusterBy()) msg += "has cluster by; "; - if (queryProperties.hasDistributeBy()) msg += "has distribute by; "; - if (queryProperties.hasSortBy()) msg += "has sort by; "; - if (queryProperties.hasPTF()) msg += "has PTF; "; - if (queryProperties.usesScript()) msg += "uses scripts; "; - if (queryProperties.hasMultiDestQuery()) msg += "is a multi-destination query; "; - if (queryProperties.hasLateralViews()) msg += "has lateral views; "; - - if (msg.isEmpty()) msg += "has some unspecified limitations; "; - } - return msg; - } - - private class CalciteBasedPlanner implements Frameworks.PlannerAction { - private RelOptCluster cluster; - private RelOptSchema relOptSchema; - private SemanticException semanticException; - private Map partitionCache; - private final AtomicInteger noColsMissingStats = new AtomicInteger(0); - List topLevelFieldSchema; - - // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or - // just last one. - LinkedHashMap relToHiveRR = new LinkedHashMap(); - LinkedHashMap> relToHiveColNameCalcitePosMap = new LinkedHashMap>(); - - private ASTNode getOptimizedAST(Map partitionCache) - throws SemanticException { - ASTNode calciteOptimizedAST = null; - RelNode optimizedCalcitePlan = null; - this.partitionCache = partitionCache; - - try { - optimizedCalcitePlan = Frameworks.withPlanner(this, - Frameworks.newConfigBuilder().typeSystem(new HiveTypeSystemImpl()).build()); - } catch (Exception e) { - rethrowCalciteException(e); - throw new AssertionError("rethrowCalciteException didn't throw for " + e.getMessage()); - } - calciteOptimizedAST = ASTConverter.convert(optimizedCalcitePlan, topLevelFieldSchema); - - return calciteOptimizedAST; - } - - /* - * Unwraps a chain of useless UndeclaredThrowableException-s, InvocationTargetException-s - * and RuntimeException-s potentially coming from CBO/Calcite code. - */ - private void rethrowCalciteException(Exception e) throws SemanticException { - Throwable first = (semanticException != null) ? semanticException : e, - current = first, cause = current.getCause(); - while (cause != null) { - Throwable causeOfCause = cause.getCause(); - if (current == first && causeOfCause == null && isUselessCause(first)) { - // "cause" is a root cause, and "e"/"first" is a useless exception it's wrapped in. - first = cause; - break; - } else if (causeOfCause != null && isUselessCause(cause) - && ExceptionHelper.resetCause(current, causeOfCause)) { - // "cause" was a useless intermediate cause and was replace it with its own cause. - cause = causeOfCause; - continue; // do loop once again with the new cause of "current" - } - current = cause; - cause = current.getCause(); - } - - if (first instanceof RuntimeException) { - throw (RuntimeException)first; - } else if (first instanceof SemanticException) { - throw (SemanticException)first; - } - throw new RuntimeException(first); - } - - private boolean isUselessCause(Throwable t) { - return t instanceof RuntimeException || t instanceof InvocationTargetException - || t instanceof UndeclaredThrowableException; - } - - @Override - public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlus rootSchema) { - RelNode calciteGenPlan = null; - RelNode calcitePreCboPlan = null; - RelNode calciteOptimizedPlan = null; - - /* - * recreate cluster, so that it picks up the additional traitDef - */ - RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(); - final RelOptQuery query = new RelOptQuery(planner); - final RexBuilder rexBuilder = cluster.getRexBuilder(); - cluster = query.createCluster(rexBuilder.getTypeFactory(), rexBuilder); - - this.cluster = cluster; - this.relOptSchema = relOptSchema; - - try { - calciteGenPlan = genLogicalPlan(qb, true); - topLevelFieldSchema = convertRowSchemaToResultSetSchema(relToHiveRR.get(calciteGenPlan), - HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES)); - } catch (SemanticException e) { - semanticException = e; - throw new RuntimeException(e); - } - - calcitePreCboPlan = applyPreCBOTransforms(calciteGenPlan, HiveDefaultRelMetadataProvider.INSTANCE); - List list = Lists.newArrayList(); - list.add(HiveDefaultRelMetadataProvider.INSTANCE); - RelTraitSet desiredTraits = cluster.traitSetOf(HiveRelNode.CONVENTION, RelCollationImpl.EMPTY); - - HepProgram hepPgm = null; - HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP) - .addRuleInstance(new JoinToMultiJoinRule(HiveJoin.class)); - hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoin.HIVE_JOIN_FACTORY, - HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY)); - - hepPgm = hepPgmBldr.build(); - HepPlanner hepPlanner = new HepPlanner(hepPgm); - - hepPlanner.registerMetadataProviders(list); - RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); - cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); - - RelNode rootRel = calcitePreCboPlan; - hepPlanner.setRoot(rootRel); - if (!calcitePreCboPlan.getTraitSet().equals(desiredTraits)) { - rootRel = hepPlanner.changeTraits(calcitePreCboPlan, desiredTraits); - } - hepPlanner.setRoot(rootRel); - - calciteOptimizedPlan = hepPlanner.findBestExp(); - - if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) { - LOG.debug("CBO Planning details:\n"); - LOG.debug("Original Plan:\n" + RelOptUtil.toString(calciteGenPlan)); - LOG.debug("Plan After PPD, PartPruning, ColumnPruning:\n" - + RelOptUtil.toString(calcitePreCboPlan)); - LOG.debug("Plan After Join Reordering:\n" - + RelOptUtil.toString(calciteOptimizedPlan, SqlExplainLevel.ALL_ATTRIBUTES)); - } - - return calciteOptimizedPlan; - } - - public RelNode applyPreCBOTransforms(RelNode basePlan, RelMetadataProvider mdProvider) { - - // TODO: Decorelation of subquery should be done before attempting - // Partition Pruning; otherwise Expression evaluation may try to execute - // corelated sub query. - - // Push Down Semi Joins - basePlan = hepPlan(basePlan, true, mdProvider, - SemiJoinJoinTransposeRule.INSTANCE, - SemiJoinFilterTransposeRule.INSTANCE, - SemiJoinProjectTransposeRule.INSTANCE); - - basePlan = hepPlan(basePlan, true, mdProvider, - new HiveFilterProjectTransposeRule( - Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, - HiveProject.DEFAULT_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( - HiveFilter.DEFAULT_FILTER_FACTORY), new FilterMergeRule( - HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN, - HiveFilterJoinRule.FILTER_ON_JOIN, - new FilterAggregateTransposeRule( - Filter.class, - HiveFilter.DEFAULT_FILTER_FACTORY, - Aggregate.class)); - - basePlan = hepPlan(basePlan, false, mdProvider, new JoinPushTransitivePredicatesRule( - Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), - // TODO: Enable it after CALCITE-407 is fixed - //RemoveTrivialProjectRule.INSTANCE, - new HivePartitionPruneRule(SemanticAnalyzer.this.conf)); - - RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, - HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, RelFactories.DEFAULT_SEMI_JOIN_FACTORY, - HiveSort.HIVE_SORT_REL_FACTORY, HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); - basePlan = fieldTrimmer.trim(basePlan); - - basePlan = hepPlan(basePlan, true, mdProvider, - new FilterProjectTransposeRule(Filter.class, - HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, - HiveProject.DEFAULT_PROJECT_FACTORY)); - - return basePlan; - } - - private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, - RelMetadataProvider mdProvider, RelOptRule... rules) { - - RelNode optimizedRelNode = basePlan; - HepProgramBuilder programBuilder = new HepProgramBuilder(); - if (followPlanChanges) { - programBuilder.addMatchOrder(HepMatchOrder.TOP_DOWN); - programBuilder = programBuilder.addRuleCollection(ImmutableList.copyOf(rules)); - } else { - // TODO: Should this be also TOP_DOWN? - for (RelOptRule r : rules) - programBuilder.addRuleInstance(r); - } - - HepPlanner planner = new HepPlanner(programBuilder.build()); - List list = Lists.newArrayList(); - list.add(mdProvider); - planner.registerMetadataProviders(list); - RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); - basePlan.getCluster().setMetadataProvider( - new CachingRelMetadataProvider(chainedProvider, planner)); - - planner.setRoot(basePlan); - optimizedRelNode = planner.findBestExp(); - - return optimizedRelNode; - } - - @SuppressWarnings("nls") - private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode leftRel, - String rightalias, RelNode rightRel) throws SemanticException { - HiveUnion unionRel = null; - - // 1. Get Row Resolvers, Column map for original left and right input of - // Union Rel - RowResolver leftRR = this.relToHiveRR.get(leftRel); - RowResolver rightRR = this.relToHiveRR.get(rightRel); - HashMap leftmap = leftRR.getFieldMap(leftalias); - HashMap rightmap = rightRR.getFieldMap(rightalias); - - // 2. Validate that Union is feasible according to Hive (by using type - // info from RR) - if (leftmap.size() != rightmap.size()) { - throw new SemanticException("Schema of both sides of union should match."); - } - - ASTNode tabref = qb.getAliases().isEmpty() ? null : qb.getParseInfo().getSrcForAlias( - qb.getAliases().get(0)); - for (Map.Entry lEntry : leftmap.entrySet()) { - String field = lEntry.getKey(); - ColumnInfo lInfo = lEntry.getValue(); - ColumnInfo rInfo = rightmap.get(field); - if (rInfo == null) { - throw new SemanticException(generateErrorMessage(tabref, - "Schema of both sides of union should match. " + rightalias - + " does not have the field " + field)); - } - if (lInfo == null) { - throw new SemanticException(generateErrorMessage(tabref, - "Schema of both sides of union should match. " + leftalias - + " does not have the field " + field)); - } - if (!lInfo.getInternalName().equals(rInfo.getInternalName())) { - throw new CalciteSemanticException(generateErrorMessage(tabref, - "Schema of both sides of union should match: field " + field + ":" - + " appears on the left side of the UNION at column position: " - + getPositionFromInternalName(lInfo.getInternalName()) - + ", and on the right side of the UNION at column position: " - + getPositionFromInternalName(rInfo.getInternalName()) - + ". Column positions should match for a UNION")); - } - // try widening coversion, otherwise fail union - TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), - rInfo.getType()); - if (commonTypeInfo == null) { - throw new CalciteSemanticException(generateErrorMessage(tabref, - "Schema of both sides of union should match: Column " + field + " is of type " - + lInfo.getType().getTypeName() + " on first table and type " - + rInfo.getType().getTypeName() + " on second table")); - } - } - - // 3. construct Union Output RR using original left & right Input - RowResolver unionoutRR = new RowResolver(); - for (Map.Entry lEntry : leftmap.entrySet()) { - String field = lEntry.getKey(); - ColumnInfo lInfo = lEntry.getValue(); - ColumnInfo rInfo = rightmap.get(field); - ColumnInfo unionColInfo = new ColumnInfo(lInfo); - unionColInfo.setTabAlias(unionalias); - unionColInfo.setType(FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), - rInfo.getType())); - unionoutRR.put(unionalias, field, unionColInfo); - } - - // 4. Determine which columns requires cast on left/right input (Calcite - // requires exact types on both sides of union) - boolean leftNeedsTypeCast = false; - boolean rightNeedsTypeCast = false; - List leftProjs = new ArrayList(); - List rightProjs = new ArrayList(); - List leftRowDT = leftRel.getRowType().getFieldList(); - List rightRowDT = rightRel.getRowType().getFieldList(); - - RelDataType leftFieldDT; - RelDataType rightFieldDT; - RelDataType unionFieldDT; - for (int i = 0; i < leftRowDT.size(); i++) { - leftFieldDT = leftRowDT.get(i).getType(); - rightFieldDT = rightRowDT.get(i).getType(); - if (!leftFieldDT.equals(rightFieldDT)) { - unionFieldDT = TypeConverter.convert(unionoutRR.getColumnInfos().get(i).getType(), - cluster.getTypeFactory()); - if (!unionFieldDT.equals(leftFieldDT)) { - leftNeedsTypeCast = true; - } - leftProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT, - cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true)); - - if (!unionFieldDT.equals(rightFieldDT)) { - rightNeedsTypeCast = true; - } - rightProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT, - cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true)); - } else { - leftProjs.add(cluster.getRexBuilder().ensureType(leftFieldDT, - cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true)); - rightProjs.add(cluster.getRexBuilder().ensureType(rightFieldDT, - cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true)); - } - } - - // 5. Introduce Project Rel above original left/right inputs if cast is - // needed for type parity - RelNode unionLeftInput = leftRel; - RelNode unionRightInput = rightRel; - if (leftNeedsTypeCast) { - unionLeftInput = HiveProject.create(leftRel, leftProjs, leftRel.getRowType() - .getFieldNames()); - } - if (rightNeedsTypeCast) { - unionRightInput = HiveProject.create(rightRel, rightProjs, rightRel.getRowType() - .getFieldNames()); - } - - // 6. Construct Union Rel - ImmutableList.Builder bldr = new ImmutableList.Builder(); - bldr.add(unionLeftInput); - bldr.add(unionRightInput); - unionRel = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), - bldr.build()); - - relToHiveRR.put(unionRel, unionoutRR); - relToHiveColNameCalcitePosMap.put(unionRel, - this.buildHiveToCalciteColumnMap(unionoutRR, unionRel)); - - return unionRel; - } - - private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJoinType, - ASTNode joinCond) throws SemanticException { - RelNode joinRel = null; - - // 1. construct the RowResolver for the new Join Node by combining row - // resolvers from left, right - RowResolver leftRR = this.relToHiveRR.get(leftRel); - RowResolver rightRR = this.relToHiveRR.get(rightRel); - RowResolver joinRR = null; - - if (hiveJoinType != JoinType.LEFTSEMI) { - joinRR = RowResolver.getCombinedRR(leftRR, rightRR); - } else { - joinRR = new RowResolver(); - if (!RowResolver.add(joinRR, leftRR)) { - LOG.warn("Duplicates detected when adding columns to RR: see previous message"); - } - } - - // 2. Construct ExpressionNodeDesc representing Join Condition - RexNode calciteJoinCond = null; - if (joinCond != null) { - JoinTypeCheckCtx jCtx = new JoinTypeCheckCtx(leftRR, rightRR, hiveJoinType); - Map exprNodes = JoinCondTypeCheckProcFactory.genExprNode(joinCond, - jCtx); - if (jCtx.getError() != null) - throw new SemanticException(SemanticAnalyzer.generateErrorMessage(jCtx.getErrorSrcNode(), - jCtx.getError())); - - ExprNodeDesc joinCondnExprNode = exprNodes.get(joinCond); - - List inputRels = new ArrayList(); - inputRels.add(leftRel); - inputRels.add(rightRel); - calciteJoinCond = RexNodeConverter.convert(cluster, joinCondnExprNode, inputRels, - relToHiveRR, relToHiveColNameCalcitePosMap, false); - } else { - calciteJoinCond = cluster.getRexBuilder().makeLiteral(true); - } - - // 3. Validate that join condition is legal (i.e no function refering to - // both sides of join, only equi join) - // TODO: Join filter handling (only supported for OJ by runtime or is it - // supported for IJ as well) - - // 4. Construct Join Rel Node - boolean leftSemiJoin = false; - JoinRelType calciteJoinType; - switch (hiveJoinType) { - case LEFTOUTER: - calciteJoinType = JoinRelType.LEFT; - break; - case RIGHTOUTER: - calciteJoinType = JoinRelType.RIGHT; - break; - case FULLOUTER: - calciteJoinType = JoinRelType.FULL; - break; - case LEFTSEMI: - calciteJoinType = JoinRelType.INNER; - leftSemiJoin = true; - break; - case INNER: - default: - calciteJoinType = JoinRelType.INNER; - break; - } - - if (leftSemiJoin) { - List sysFieldList = new ArrayList(); - List leftJoinKeys = new ArrayList(); - List rightJoinKeys = new ArrayList(); - - RexNode nonEquiConds = RelOptUtil.splitJoinCondition(sysFieldList, leftRel, rightRel, - calciteJoinCond, leftJoinKeys, rightJoinKeys, null, null); - - if (!nonEquiConds.isAlwaysTrue()) { - throw new SemanticException("Non equality condition not supported in Semi-Join" - + nonEquiConds); - } - - RelNode[] inputRels = new RelNode[] { leftRel, rightRel }; - final List leftKeys = new ArrayList(); - final List rightKeys = new ArrayList(); - calciteJoinCond = HiveCalciteUtil.projectNonColumnEquiConditions( - HiveProject.DEFAULT_PROJECT_FACTORY, inputRels, leftJoinKeys, rightJoinKeys, 0, - leftKeys, rightKeys); - - joinRel = new SemiJoin(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - inputRels[0], inputRels[1], calciteJoinCond, ImmutableIntList.copyOf(leftKeys), - ImmutableIntList.copyOf(rightKeys)); - } else { - joinRel = HiveJoin.getJoin(cluster, leftRel, rightRel, calciteJoinCond, calciteJoinType, - leftSemiJoin); - } - // 5. Add new JoinRel & its RR to the maps - relToHiveColNameCalcitePosMap.put(joinRel, this.buildHiveToCalciteColumnMap(joinRR, joinRel)); - relToHiveRR.put(joinRel, joinRR); - - return joinRel; - } - - /** - * Generate Join Logical Plan Relnode by walking through the join AST. - * - * @param qb - * @param aliasToRel - * Alias(Table/Relation alias) to RelNode; only read and not - * written in to by this method - * @return - * @throws SemanticException - */ - private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map aliasToRel) - throws SemanticException { - RelNode leftRel = null; - RelNode rightRel = null; - JoinType hiveJoinType = null; - - if (joinParseTree.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) { - String msg = String.format("UNIQUE JOIN is currently not supported in CBO," - + " turn off cbo to use UNIQUE JOIN."); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - - // 1. Determine Join Type - // TODO: What about TOK_CROSSJOIN, TOK_MAPJOIN - switch (joinParseTree.getToken().getType()) { - case HiveParser.TOK_LEFTOUTERJOIN: - hiveJoinType = JoinType.LEFTOUTER; - break; - case HiveParser.TOK_RIGHTOUTERJOIN: - hiveJoinType = JoinType.RIGHTOUTER; - break; - case HiveParser.TOK_FULLOUTERJOIN: - hiveJoinType = JoinType.FULLOUTER; - break; - case HiveParser.TOK_LEFTSEMIJOIN: - hiveJoinType = JoinType.LEFTSEMI; - break; - default: - hiveJoinType = JoinType.INNER; - break; - } - - // 2. Get Left Table Alias - ASTNode left = (ASTNode) joinParseTree.getChild(0); - if ((left.getToken().getType() == HiveParser.TOK_TABREF) - || (left.getToken().getType() == HiveParser.TOK_SUBQUERY) - || (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) { - String tableName = getUnescapedUnqualifiedTableName((ASTNode) left.getChild(0)) - .toLowerCase(); - String leftTableAlias = left.getChildCount() == 1 ? tableName : unescapeIdentifier(left - .getChild(left.getChildCount() - 1).getText().toLowerCase()); - // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? - // partitionTableFunctionSource partitioningSpec? expression*) - // guranteed to have an lias here: check done in processJoin - leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? unescapeIdentifier(left - .getChild(1).getText().toLowerCase()) - : leftTableAlias; - leftRel = aliasToRel.get(leftTableAlias); - } else if (isJoinToken(left)) { - leftRel = genJoinLogicalPlan(left, aliasToRel); - } else { - assert (false); - } - - // 3. Get Right Table Alias - ASTNode right = (ASTNode) joinParseTree.getChild(1); - if ((right.getToken().getType() == HiveParser.TOK_TABREF) - || (right.getToken().getType() == HiveParser.TOK_SUBQUERY) - || (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) { - String tableName = getUnescapedUnqualifiedTableName((ASTNode) right.getChild(0)) - .toLowerCase(); - String rightTableAlias = right.getChildCount() == 1 ? tableName : unescapeIdentifier(right - .getChild(right.getChildCount() - 1).getText().toLowerCase()); - // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? - // partitionTableFunctionSource partitioningSpec? expression*) - // guranteed to have an lias here: check done in processJoin - rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? unescapeIdentifier(right - .getChild(1).getText().toLowerCase()) - : rightTableAlias; - rightRel = aliasToRel.get(rightTableAlias); - } else { - assert (false); - } - - // 4. Get Join Condn - ASTNode joinCond = (ASTNode) joinParseTree.getChild(2); - - // 5. Create Join rel - return genJoinRelNode(leftRel, rightRel, hiveJoinType, joinCond); - } - - private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticException { - RowResolver rr = new RowResolver(); - HiveTableScan tableRel = null; - - try { - - // 1. If the table has a Sample specified, bail from Calcite path. - if ( qb.getParseInfo().getTabSample(tableAlias) != null || - SemanticAnalyzer.this.nameToSplitSample.containsKey(tableAlias)) { - String msg = String.format("Table Sample specified for %s." + - " Currently we don't support Table Sample clauses in CBO," + - " turn off cbo for queries on tableSamples.", tableAlias); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - - // 2. Get Table Metadata - Table tab = qb.getMetaData().getSrcForAlias(tableAlias); - - // 3. Get Table Logical Schema (Row Type) - // NOTE: Table logical schema = Non Partition Cols + Partition Cols + - // Virtual Cols - - // 3.1 Add Column info for non partion cols (Object Inspector fields) - StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer() - .getObjectInspector(); - List fields = rowObjectInspector.getAllStructFieldRefs(); - ColumnInfo colInfo; - String colName; - ArrayList cInfoLst = new ArrayList(); - for (int i = 0; i < fields.size(); i++) { - colName = fields.get(i).getFieldName(); - colInfo = new ColumnInfo( - fields.get(i).getFieldName(), - TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), - tableAlias, false); - colInfo.setSkewedCol((isSkewedCol(tableAlias, qb, colName)) ? true : false); - rr.put(tableAlias, colName, colInfo); - cInfoLst.add(colInfo); - } - // TODO: Fix this - ArrayList nonPartitionColumns = new ArrayList(cInfoLst); - ArrayList partitionColumns = new ArrayList(); - - // 3.2 Add column info corresponding to partition columns - for (FieldSchema part_col : tab.getPartCols()) { - colName = part_col.getName(); - colInfo = new ColumnInfo(colName, - TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), tableAlias, true); - rr.put(tableAlias, colName, colInfo); - cInfoLst.add(colInfo); - partitionColumns.add(colInfo); - } - - // 3.3 Add column info corresponding to virtual columns - Iterator vcs = VirtualColumn.getRegistry(conf).iterator(); - while (vcs.hasNext()) { - VirtualColumn vc = vcs.next(); - colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, - vc.getIsHidden()); - rr.put(tableAlias, vc.getName(), colInfo); - cInfoLst.add(colInfo); - } - - // 3.4 Build row type from field - RelDataType rowType = TypeConverter.getType(cluster, rr, null); - - // 4. Build RelOptAbstractTable - String fullyQualifiedTabName = tab.getDbName(); - if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) - fullyQualifiedTabName = fullyQualifiedTabName + "." + tab.getTableName(); - else - fullyQualifiedTabName = tab.getTableName(); - RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, - tableAlias, rowType, tab, nonPartitionColumns, partitionColumns, conf, partitionCache, - noColsMissingStats); - - // 5. Build Hive Table Scan Rel - tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, - rowType); - - // 6. Add Schema(RR) to RelNode-Schema map - ImmutableMap hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr, tableRel); - relToHiveRR.put(tableRel, rr); - relToHiveColNameCalcitePosMap.put(tableRel, hiveToCalciteColMap); - } catch (Exception e) { - if (e instanceof SemanticException) { - throw (SemanticException) e; - } else { - throw (new RuntimeException(e)); - } - } - - return tableRel; - } - - private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel) throws SemanticException { - ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel)); - if (filterCondn instanceof ExprNodeConstantDesc && - !filterCondn.getTypeString().equals(serdeConstants.BOOLEAN_TYPE_NAME)) { - // queries like select * from t1 where 'foo'; - // Calcite's rule PushFilterThroughProject chokes on it. Arguably, we can insert a cast to - // boolean in such cases, but since Postgres, Oracle and MS SQL server fail on compile time - // for such queries, its an arcane corner case, not worth of adding that complexity. - throw new CalciteSemanticException("Filter expression with non-boolean return type."); - } - ImmutableMap hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap - .get(srcRel); - RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(), - hiveColNameCalcitePosMap, 0, true).convert(filterCondn); - RexNode factoredFilterExpr = RexUtil.pullFactors(cluster.getRexBuilder(), convertedFilterExpr); - RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - srcRel, factoredFilterExpr); - this.relToHiveColNameCalcitePosMap.put(filterRel, hiveColNameCalcitePosMap); - relToHiveRR.put(filterRel, relToHiveRR.get(srcRel)); - relToHiveColNameCalcitePosMap.put(filterRel, hiveColNameCalcitePosMap); - - return filterRel; - } - - private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, - Map aliasToRel, boolean forHavingClause) throws SemanticException { - /* - * Handle Subquery predicates. - * - * Notes (8/22/14 hb): Why is this a copy of the code from {@link - * #genFilterPlan} - for now we will support the same behavior as non CBO - * route. - but plan to allow nested SubQueries(Restriction.9.m) and - * multiple SubQuery expressions(Restriction.8.m). This requires use to - * utilize Calcite's Decorrelation mechanics, and for Calcite to fix/flush out - * Null semantics(CALCITE-373) - besides only the driving code has been - * copied. Most of the code which is SubQueryUtils and QBSubQuery is - * reused. - */ - int numSrcColumns = srcRel.getRowType().getFieldCount(); - List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); - if (subQueriesInOriginalTree.size() > 0) { - - /* - * Restriction.9.m :: disallow nested SubQuery expressions. - */ - if (qb.getSubQueryPredicateDef() != null) { - throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported.")); - } - - /* - * Restriction.8.m :: We allow only 1 SubQuery expression per Query. - */ - if (subQueriesInOriginalTree.size() > 1) { - - throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported.")); - } - - /* - * Clone the Search AST; apply all rewrites on the clone. - */ - ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); - List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); - - RowResolver inputRR = relToHiveRR.get(srcRel); - RowResolver outerQBRR = inputRR; - ImmutableMap outerQBPosMap = - relToHiveColNameCalcitePosMap.get(srcRel); - - for (int i = 0; i < subQueries.size(); i++) { - ASTNode subQueryAST = subQueries.get(i); - ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); - - int sqIdx = qb.incrNumSubQueryPredicates(); - clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST); - - QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, - originalSubQueryAST, ctx); - - if (!forHavingClause) { - qb.setWhereClauseSubQueryPredicate(subQuery); - } else { - qb.setHavingClauseSubQueryPredicate(subQuery); - } - String havingInputAlias = null; - - if (forHavingClause) { - havingInputAlias = "gby_sq" + sqIdx; - aliasToRel.put(havingInputAlias, srcRel); - } - - subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, - aliasToRel.keySet()); - - QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true); - qbSQ.setSubQueryDef(subQuery.getSubQuery()); - Phase1Ctx ctx_1 = initPhase1Ctx(); - doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1, null); - getMetaData(qbSQ); - RelNode subQueryRelNode = genLogicalPlan(qbSQ, false); - aliasToRel.put(subQuery.getAlias(), subQueryRelNode); - RowResolver sqRR = relToHiveRR.get(subQueryRelNode); - - /* - * Check.5.h :: For In and Not In the SubQuery must implicitly or - * explicitly only contain one select item. - */ - if (subQuery.getOperator().getType() != SubQueryType.EXISTS - && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS - && sqRR.getColumnInfos().size() - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) { - throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST, - "SubQuery can contain only 1 item in Select List.")); - } - - /* - * If this is a Not In SubQuery Predicate then Join in the Null Check - * SubQuery. See QBSubQuery.NotInCheck for details on why and how this - * is constructed. - */ - if (subQuery.getNotInCheck() != null) { - QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck(); - notInCheck.setSQRR(sqRR); - QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true); - qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery()); - ctx_1 = initPhase1Ctx(); - doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1, null); - getMetaData(qbSQ_nic); - RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic, false); - aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode); - srcRel = genJoinRelNode(srcRel, subQueryNICRelNode, - // set explicitly to inner until we figure out SemiJoin use - // notInCheck.getJoinType(), - JoinType.INNER, notInCheck.getJoinConditionAST()); - inputRR = relToHiveRR.get(srcRel); - if (forHavingClause) { - aliasToRel.put(havingInputAlias, srcRel); - } - } - - /* - * Gen Join between outer Operator and SQ op - */ - subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias); - srcRel = genJoinRelNode(srcRel, subQueryRelNode, subQuery.getJoinType(), - subQuery.getJoinConditionAST()); - searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond); - - srcRel = genFilterRelNode(searchCond, srcRel); - - /* - * For Not Exists and Not In, add a projection on top of the Left - * Outer Join. - */ - if (subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS - || subQuery.getOperator().getType() != SubQueryType.NOT_IN) { - srcRel = projectLeftOuterSide(srcRel, numSrcColumns); - } - } - relToHiveRR.put(srcRel, outerQBRR); - relToHiveColNameCalcitePosMap.put(srcRel, outerQBPosMap); - return srcRel; - } - - return genFilterRelNode(searchCond, srcRel); - } - - private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws SemanticException { - RowResolver iRR = relToHiveRR.get(srcRel); - RowResolver oRR = new RowResolver(); - RowResolver.add(oRR, iRR, numColumns); - - List calciteColLst = new ArrayList(); - List oFieldNames = new ArrayList(); - RelDataType iType = srcRel.getRowType(); - - for (int i = 0; i < iType.getFieldCount(); i++) { - RelDataTypeField fType = iType.getFieldList().get(i); - String fName = iType.getFieldNames().get(i); - calciteColLst.add(cluster.getRexBuilder().makeInputRef(fType.getType(), i)); - oFieldNames.add(fName); - } - - HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, oFieldNames); - - this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(oRR, selRel)); - this.relToHiveRR.put(selRel, oRR); - return selRel; - } - - private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map aliasToRel, - boolean forHavingClause) throws SemanticException { - RelNode filterRel = null; - - Iterator whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values() - .iterator(); - if (whereClauseIterator.hasNext()) { - filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel, - aliasToRel, forHavingClause); - } - - return filterRel; - } - - /** - * Class to store GenericUDAF related information. - */ - private class AggInfo { - private final List m_aggParams; - private final TypeInfo m_returnType; - private final String m_udfName; - private final boolean m_distinct; - - private AggInfo(List aggParams, TypeInfo returnType, String udfName, - boolean isDistinct) { - m_aggParams = aggParams; - m_returnType = returnType; - m_udfName = udfName; - m_distinct = isDistinct; - } - } - - private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List gbChildProjLst, - RexNodeConverter converter, HashMap rexNodeToPosMap, - Integer childProjLstIndx) throws SemanticException { - - // 1. Get agg fn ret type in Calcite - RelDataType aggFnRetType = TypeConverter.convert(agg.m_returnType, - this.cluster.getTypeFactory()); - - // 2. Convert Agg Fn args and type of args to Calcite - // TODO: Does HQL allows expressions as aggregate args or can it only be - // projections from child? - Integer inputIndx; - List argList = new ArrayList(); - RexNode rexNd = null; - RelDataTypeFactory dtFactory = this.cluster.getTypeFactory(); - ImmutableList.Builder aggArgRelDTBldr = new ImmutableList.Builder(); - for (ExprNodeDesc expr : agg.m_aggParams) { - rexNd = converter.convert(expr); - inputIndx = rexNodeToPosMap.get(rexNd.toString()); - if (inputIndx == null) { - gbChildProjLst.add(rexNd); - rexNodeToPosMap.put(rexNd.toString(), childProjLstIndx); - inputIndx = childProjLstIndx; - childProjLstIndx++; - } - argList.add(inputIndx); - - // TODO: does arg need type cast? - aggArgRelDTBldr.add(TypeConverter.convert(expr.getTypeInfo(), dtFactory)); - } - - // 3. Get Aggregation FN from Calcite given name, ret type and input arg - // type - final SqlAggFunction aggregation = SqlFunctionConverter.getCalciteAggFn(agg.m_udfName, - aggArgRelDTBldr.build(), aggFnRetType); - - return new AggregateCall(aggregation, agg.m_distinct, argList, aggFnRetType, null); - } - - private RelNode genGBRelNode(List gbExprs, List aggInfoLst, - RelNode srcRel) throws SemanticException { - RowResolver gbInputRR = this.relToHiveRR.get(srcRel); - ImmutableMap posMap = this.relToHiveColNameCalcitePosMap.get(srcRel); - RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), - posMap, 0, false); - - final List gbChildProjLst = Lists.newArrayList(); - final HashMap rexNodeToPosMap = new HashMap(); - final List groupSetPositions = Lists.newArrayList(); - Integer gbIndx = 0; - RexNode rnd; - for (ExprNodeDesc key : gbExprs) { - rnd = converter.convert(key); - gbChildProjLst.add(rnd); - groupSetPositions.add(gbIndx); - rexNodeToPosMap.put(rnd.toString(), gbIndx); - gbIndx++; - } - final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions); - - List aggregateCalls = Lists.newArrayList(); - int i = aggInfoLst.size(); - for (AggInfo agg : aggInfoLst) { - aggregateCalls.add(convertGBAgg(agg, srcRel, gbChildProjLst, converter, rexNodeToPosMap, - gbChildProjLst.size())); - } - - if (gbChildProjLst.isEmpty()) { - // This will happen for count(*), in such cases we arbitarily pick - // first element from srcRel - gbChildProjLst.add(this.cluster.getRexBuilder().makeInputRef(srcRel, 0)); - } - RelNode gbInputRel = HiveProject.create(srcRel, gbChildProjLst, null); - - HiveRelNode aggregateRel = null; - try { - aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - gbInputRel, false, groupSet, null, aggregateCalls); - } catch (InvalidRelException e) { - throw new SemanticException(e); - } - - return aggregateRel; - } - - private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, - RowResolver gByInputRR, RowResolver gByRR) { - if (gByExpr.getType() == HiveParser.DOT - && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) { - String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getChild(0) - .getText()); - String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(1).getText()); - gByRR.put(tab_alias, col_alias, colInfo); - } else if (gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL) { - String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getText()); - String tab_alias = null; - /* - * If the input to the GBy has a tab alias for the column, then add an - * entry based on that tab_alias. For e.g. this query: select b.x, - * count(*) from t1 b group by x needs (tab_alias=b, col_alias=x) in the - * GBy RR. tab_alias=b comes from looking at the RowResolver that is the - * ancestor before any GBy/ReduceSinks added for the GBY operation. - */ - try { - ColumnInfo pColInfo = gByInputRR.get(tab_alias, col_alias); - tab_alias = pColInfo == null ? null : pColInfo.getTabAlias(); - } catch (SemanticException se) { - } - gByRR.put(tab_alias, col_alias, colInfo); - } - } - - private void addToGBExpr(RowResolver groupByOutputRowResolver, - RowResolver groupByInputRowResolver, ASTNode grpbyExpr, ExprNodeDesc grpbyExprNDesc, - List gbExprNDescLst, List outputColumnNames) { - // TODO: Should we use grpbyExprNDesc.getTypeInfo()? what if expr is - // UDF - int i = gbExprNDescLst.size(); - String field = getColumnInternalName(i); - outputColumnNames.add(field); - gbExprNDescLst.add(grpbyExprNDesc); - - ColumnInfo oColInfo = new ColumnInfo(field, grpbyExprNDesc.getTypeInfo(), null, false); - groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo); - - addAlternateGByKeyMappings(grpbyExpr, oColInfo, groupByInputRowResolver, - groupByOutputRowResolver); - } - - private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver inputRR) - throws SemanticException { - AggInfo aInfo = null; - - // 1 Convert UDAF Params to ExprNodeDesc - ArrayList aggParameters = new ArrayList(); - for (int i = 1; i <= aggFnLstArgIndx; i++) { - ASTNode paraExpr = (ASTNode) aggAst.getChild(i); - ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, inputRR); - aggParameters.add(paraExprNode); - } - - // 2. Is this distinct UDAF - boolean isDistinct = aggAst.getType() == HiveParser.TOK_FUNCTIONDI; - - // 3. Determine type of UDAF - TypeInfo udafRetType = null; - - // 3.1 Obtain UDAF name - String aggName = unescapeIdentifier(aggAst.getChild(0).getText()); - - // 3.2 Rank functions type is 'int'/'double' - if (FunctionRegistry.isRankingFunction(aggName)) { - if (aggName.equalsIgnoreCase("percent_rank")) - udafRetType = TypeInfoFactory.doubleTypeInfo; - else - udafRetType = TypeInfoFactory.intTypeInfo; - } else { - // 3.3 Try obtaining UDAF evaluators to determine the ret type - try { - boolean isAllColumns = aggAst.getType() == HiveParser.TOK_FUNCTIONSTAR; - - // 3.3.1 Get UDAF Evaluator - Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct); - - GenericUDAFEvaluator genericUDAFEvaluator = null; - if (aggName.toLowerCase().equals(FunctionRegistry.LEAD_FUNC_NAME) - || aggName.toLowerCase().equals(FunctionRegistry.LAG_FUNC_NAME)) { - ArrayList originalParameterTypeInfos = - getWritableObjectInspector(aggParameters); - genericUDAFEvaluator = - FunctionRegistry.getGenericWindowingEvaluator(aggName, - originalParameterTypeInfos, isDistinct, isAllColumns); - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); - udafRetType = ((ListTypeInfo)udaf.returnType).getListElementTypeInfo(); - } else { - genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, - aggParameters, aggAst, isDistinct, isAllColumns); - assert (genericUDAFEvaluator != null); - - // 3.3.2 Get UDAF Info using UDAF Evaluator - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); - udafRetType = udaf.returnType; - } - } catch (Exception e) { - LOG.debug("CBO: Couldn't Obtain UDAF evaluators for " + aggName - + ", trying to translate to GenericUDF"); - } - - // 3.4 Try GenericUDF translation - if (udafRetType == null) { - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); - // We allow stateful functions in the SELECT list (but nowhere else) - tcCtx.setAllowStatefulFunctions(true); - tcCtx.setAllowDistinctFunctions(false); - ExprNodeDesc exp = genExprNodeDesc((ASTNode) aggAst.getChild(0), inputRR, tcCtx); - udafRetType = exp.getTypeInfo(); - } - } - - // 4. Construct AggInfo - aInfo = new AggInfo(aggParameters, udafRetType, aggName, isDistinct); - - return aInfo; - } - - /** - * Generate GB plan. - * - * @param qb - * @param srcRel - * @return TODO: 1. Grouping Sets (roll up..) - * @throws SemanticException - */ - private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { - RelNode gbRel = null; - QBParseInfo qbp = getQBParseInfo(qb); - - // 0. for GSets, Cube, Rollup, bail from Calcite path. - if (!qbp.getDestRollups().isEmpty() - || !qbp.getDestGroupingSets().isEmpty() - || !qbp.getDestCubes().isEmpty()) { - String gbyClause = null; - HashMap gbysMap = qbp.getDestToGroupBy(); - if (gbysMap.size() == 1) { - ASTNode gbyAST = gbysMap.entrySet().iterator().next().getValue(); - gbyClause = SemanticAnalyzer.this.ctx.getTokenRewriteStream() - .toString(gbyAST.getTokenStartIndex(), - gbyAST.getTokenStopIndex()); - gbyClause = "in '" + gbyClause + "'."; - } else { - gbyClause = "."; - } - String msg = String.format("Encountered Grouping Set/Cube/Rollup%s" - + " Currently we don't support Grouping Set/Cube/Rollup" - + " clauses in CBO," + " turn off cbo for these queries.", - gbyClause); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - - // 1. Gather GB Expressions (AST) (GB + Aggregations) - // NOTE: Multi Insert is not supported - String detsClauseName = qbp.getClauseNames().iterator().next(); - List grpByAstExprs = getGroupByForClause(qbp, detsClauseName); - HashMap aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName); - boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false; - boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty()) ? true - : false; - - if (hasGrpByAstExprs || hasAggregationTrees) { - ArrayList gbExprNDescLst = new ArrayList(); - ArrayList outputColumnNames = new ArrayList(); - - // 2. Input, Output Row Resolvers - RowResolver groupByInputRowResolver = this.relToHiveRR.get(srcRel); - RowResolver groupByOutputRowResolver = new RowResolver(); - groupByOutputRowResolver.setIsExprResolver(true); - - if (hasGrpByAstExprs) { - // 3. Construct GB Keys (ExprNode) - for (int i = 0; i < grpByAstExprs.size(); ++i) { - ASTNode grpbyExpr = grpByAstExprs.get(i); - Map astToExprNDescMap = TypeCheckProcFactory.genExprNode( - grpbyExpr, new TypeCheckCtx(groupByInputRowResolver)); - ExprNodeDesc grpbyExprNDesc = astToExprNDescMap.get(grpbyExpr); - if (grpbyExprNDesc == null) - throw new CalciteSemanticException("Invalid Column Reference: " + grpbyExpr.dump()); - - addToGBExpr(groupByOutputRowResolver, groupByInputRowResolver, grpbyExpr, - grpbyExprNDesc, gbExprNDescLst, outputColumnNames); - } - } - - // 4. Construct aggregation function Info - ArrayList aggregations = new ArrayList(); - if (hasAggregationTrees) { - assert (aggregationTrees != null); - for (ASTNode value : aggregationTrees.values()) { - // 4.1 Determine type of UDAF - // This is the GenericUDAF name - String aggName = unescapeIdentifier(value.getChild(0).getText()); - boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; - boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; - - // 4.2 Convert UDAF Params to ExprNodeDesc - ArrayList aggParameters = new ArrayList(); - for (int i = 1; i < value.getChildCount(); i++) { - ASTNode paraExpr = (ASTNode) value.getChild(i); - ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver); - aggParameters.add(paraExprNode); - } - - Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct); - GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, - aggParameters, value, isDistinct, isAllColumns); - assert (genericUDAFEvaluator != null); - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); - AggInfo aInfo = new AggInfo(aggParameters, udaf.returnType, aggName, isDistinct); - aggregations.add(aInfo); - String field = getColumnInternalName(gbExprNDescLst.size() + aggregations.size() - 1); - outputColumnNames.add(field); - groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, aInfo.m_returnType, - "", false)); - } - } - - gbRel = genGBRelNode(gbExprNDescLst, aggregations, srcRel); - relToHiveColNameCalcitePosMap.put(gbRel, - buildHiveToCalciteColumnMap(groupByOutputRowResolver, gbRel)); - this.relToHiveRR.put(gbRel, groupByOutputRowResolver); - } - - return gbRel; - } - - /** - * Generate OB RelNode and input Select RelNode that should be used to - * introduce top constraining Project. If Input select RelNode is not - * present then don't introduce top constraining select. - * - * @param qb - * @param srcRel - * @param outermostOB - * @return Pair Key- OB RelNode, Value - Input Select for - * top constraining Select - * @throws SemanticException - */ - private Pair genOBLogicalPlan(QB qb, RelNode srcRel, boolean outermostOB) - throws SemanticException { - RelNode sortRel = null; - RelNode originalOBChild = null; - - QBParseInfo qbp = getQBParseInfo(qb); - String dest = qbp.getClauseNames().iterator().next(); - ASTNode obAST = qbp.getOrderByForClause(dest); - - if (obAST != null) { - // 1. OB Expr sanity test - // in strict mode, in the presence of order by, limit must be specified - Integer limit = qb.getParseInfo().getDestLimit(dest); - if (conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict") - && limit == null) { - throw new SemanticException(generateErrorMessage(obAST, - ErrorMsg.NO_LIMIT_WITH_ORDERBY.getMsg())); - } - - // 2. Walk through OB exprs and extract field collations and additional - // virtual columns needed - final List newVCLst = new ArrayList(); - final List fieldCollations = Lists.newArrayList(); - int fieldIndex = 0; - - List obASTExprLst = obAST.getChildren(); - ASTNode obASTExpr; - List> vcASTTypePairs = new ArrayList>(); - RowResolver inputRR = relToHiveRR.get(srcRel); - RowResolver outputRR = new RowResolver(); - - RexNode rnd; - RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), - relToHiveColNameCalcitePosMap.get(srcRel), 0, false); - int srcRelRecordSz = srcRel.getRowType().getFieldCount(); - - for (int i = 0; i < obASTExprLst.size(); i++) { - // 2.1 Convert AST Expr to ExprNode - obASTExpr = (ASTNode) obASTExprLst.get(i); - Map astToExprNDescMap = TypeCheckProcFactory.genExprNode( - obASTExpr, new TypeCheckCtx(inputRR)); - ExprNodeDesc obExprNDesc = astToExprNDescMap.get(obASTExpr.getChild(0)); - if (obExprNDesc == null) - throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); - - // 2.2 Convert ExprNode to RexNode - rnd = converter.convert(obExprNDesc); - - // 2.3 Determine the index of ob expr in child schema - // NOTE: Calcite can not take compound exprs in OB without it being - // present in the child (& hence we add a child Project Rel) - if (rnd instanceof RexInputRef) { - fieldIndex = ((RexInputRef) rnd).getIndex(); - } else { - fieldIndex = srcRelRecordSz + newVCLst.size(); - newVCLst.add(rnd); - vcASTTypePairs.add(new Pair((ASTNode) obASTExpr.getChild(0), - obExprNDesc.getTypeInfo())); - } - - // 2.4 Determine the Direction of order by - org.apache.calcite.rel.RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; - if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { - order = RelFieldCollation.Direction.ASCENDING; - } - - // 2.5 Add to field collations - fieldCollations.add(new RelFieldCollation(fieldIndex, order)); - } - - // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel - // for top constraining Sel - RelNode obInputRel = srcRel; - if (!newVCLst.isEmpty()) { - List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), - new Function() { - @Override - public RexNode apply(RelDataTypeField input) { - return new RexInputRef(input.getIndex(), input.getType()); - } - }); - RowResolver obSyntheticProjectRR = new RowResolver(); - if (!RowResolver.add(obSyntheticProjectRR, inputRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message"); - } - int vcolPos = inputRR.getRowSchema().getSignature().size(); - for (Pair astTypePair : vcASTTypePairs) { - obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo( - getColumnInternalName(vcolPos), astTypePair.getValue(), null, false)); - vcolPos++; - } - obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, newVCLst), - obSyntheticProjectRR, srcRel); - - if (outermostOB) { - if (!RowResolver.add(outputRR, inputRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message"); - } - - } else { - if (!RowResolver.add(outputRR, obSyntheticProjectRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message"); - } - originalOBChild = srcRel; - } - } else { - if (!RowResolver.add(outputRR, inputRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message"); - } - } - - // 4. Construct SortRel - RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); - RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations)); - sortRel = new HiveSort(cluster, traitSet, obInputRel, canonizedCollation, null, null); - - // 5. Update the maps - // NOTE: Output RR for SortRel is considered same as its input; we may - // end up not using VC that is present in sort rel. Also note that - // rowtype of sortrel is the type of it child; if child happens to be - // synthetic project that we introduced then that projectrel would - // contain the vc. - ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR, - sortRel); - relToHiveRR.put(sortRel, outputRR); - relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); - } - - return (new Pair(sortRel, originalOBChild)); - } - - private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { - HiveRelNode sortRel = null; - QBParseInfo qbp = getQBParseInfo(qb); - Integer limit = qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next()); - - if (limit != null) { - RexNode fetch = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(limit)); - RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); - RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.EMPTY); - sortRel = new HiveSort(cluster, traitSet, srcRel, canonizedCollation, null, fetch); - - RowResolver outputRR = new RowResolver(); - if (!RowResolver.add(outputRR, relToHiveRR.get(srcRel))) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message"); - } - ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR, - sortRel); - relToHiveRR.put(sortRel, outputRR); - relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); - } - - return sortRel; - } - - List getPartitionKeys(PartitionSpec ps, RexNodeConverter converter, RowResolver inputRR) - throws SemanticException { - List pKeys = new ArrayList(); - if (ps != null) { - List pExprs = ps.getExpressions(); - for (PartitionExpression pExpr : pExprs) { - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); - tcCtx.setAllowStatefulFunctions(true); - ExprNodeDesc exp = genExprNodeDesc(pExpr.getExpression(), inputRR, tcCtx); - pKeys.add(converter.convert(exp)); - } - } - - return pKeys; - } - - List getOrderKeys(OrderSpec os, RexNodeConverter converter, - RowResolver inputRR) throws SemanticException { - List oKeys = new ArrayList(); - if (os != null) { - List oExprs = os.getExpressions(); - for (OrderExpression oExpr : oExprs) { - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); - tcCtx.setAllowStatefulFunctions(true); - ExprNodeDesc exp = genExprNodeDesc(oExpr.getExpression(), inputRR, tcCtx); - RexNode ordExp = converter.convert(exp); - Set flags = new HashSet(); - if (oExpr.getOrder() == org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order.DESC) - flags.add(SqlKind.DESCENDING); - oKeys.add(new RexFieldCollation(ordExp, flags)); - } - } - - return oKeys; - } - - RexWindowBound getBound(BoundarySpec bs, RexNodeConverter converter) { - RexWindowBound rwb = null; - - if (bs != null) { - SqlNode sn = null; - SqlParserPos pos = new SqlParserPos(1, 1); - SqlNode amt = bs.getAmt() == 0 ? null : SqlLiteral.createExactNumeric( - String.valueOf(bs.getAmt()), new SqlParserPos(2, 2)); - RexNode amtLiteral = null; - SqlCall sc = null; - RexNode rn = null; - - if (amt != null) - amtLiteral = cluster.getRexBuilder().makeLiteral(new Integer(bs.getAmt()), - cluster.getTypeFactory().createSqlType(SqlTypeName.INTEGER), true); - - switch (bs.getDirection()) { - case PRECEDING: - if (amt == null) { - rwb = RexWindowBound.create(SqlWindow.createUnboundedPreceding(pos), null); - } else { - sc = (SqlCall) SqlWindow.createPreceding(amt, pos); - rwb = RexWindowBound.create(sc, - cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral)); - } - break; - - case CURRENT: - rwb = RexWindowBound.create(SqlWindow.createCurrentRow(new SqlParserPos(1, 1)), null); - break; - - case FOLLOWING: - if (amt == null) { - rwb = RexWindowBound.create(SqlWindow.createUnboundedFollowing(new SqlParserPos(1, 1)), - null); - } else { - sc = (SqlCall) SqlWindow.createFollowing(amt, pos); - rwb = RexWindowBound.create(sc, - cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral)); - } - break; - } - } - - return rwb; - } - - int getWindowSpecIndx(ASTNode wndAST) { - int wndASTIndx = -1; - int wi = wndAST.getChildCount() - 1; - if (wi <= 0 || (wndAST.getChild(wi).getType() != HiveParser.TOK_WINDOWSPEC)) { - wi = -1; - } - - return wi; - } - - Pair genWindowingProj(QB qb, WindowExpressionSpec wExpSpec, RelNode srcRel) - throws SemanticException { - RexNode w = null; - TypeInfo wHiveRetType = null; - - if (wExpSpec instanceof WindowFunctionSpec) { - WindowFunctionSpec wFnSpec = (WindowFunctionSpec) wExpSpec; - ASTNode windowProjAst = wFnSpec.getExpression(); - // TODO: do we need to get to child? - int wndSpecASTIndx = getWindowSpecIndx(windowProjAst); - // 2. Get Hive Aggregate Info - AggInfo hiveAggInfo = getHiveAggInfo(windowProjAst, wndSpecASTIndx - 1, - this.relToHiveRR.get(srcRel)); - - // 3. Get Calcite Return type for Agg Fn - wHiveRetType = hiveAggInfo.m_returnType; - RelDataType calciteAggFnRetType = TypeConverter.convert(hiveAggInfo.m_returnType, - this.cluster.getTypeFactory()); - - // 4. Convert Agg Fn args to Calcite - ImmutableMap posMap = this.relToHiveColNameCalcitePosMap.get(srcRel); - RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), - posMap, 0, false); - Builder calciteAggFnArgsBldr = ImmutableList. builder(); - Builder calciteAggFnArgsTypeBldr = ImmutableList. builder(); - RexNode rexNd = null; - for (int i = 0; i < hiveAggInfo.m_aggParams.size(); i++) { - calciteAggFnArgsBldr.add(converter.convert(hiveAggInfo.m_aggParams.get(i))); - calciteAggFnArgsTypeBldr.add(TypeConverter.convert(hiveAggInfo.m_aggParams.get(i) - .getTypeInfo(), this.cluster.getTypeFactory())); - } - ImmutableList calciteAggFnArgs = calciteAggFnArgsBldr.build(); - ImmutableList calciteAggFnArgsType = calciteAggFnArgsTypeBldr.build(); - - // 5. Get Calcite Agg Fn - final SqlAggFunction calciteAggFn = SqlFunctionConverter.getCalciteAggFn(hiveAggInfo.m_udfName, - calciteAggFnArgsType, calciteAggFnRetType); - - // 6. Translate Window spec - RowResolver inputRR = relToHiveRR.get(srcRel); - WindowSpec wndSpec = ((WindowFunctionSpec) wExpSpec).getWindowSpec(); - List partitionKeys = getPartitionKeys(wndSpec.getPartition(), converter, inputRR); - List orderKeys = getOrderKeys(wndSpec.getOrder(), converter, inputRR); - RexWindowBound upperBound = getBound(wndSpec.windowFrame.start, converter); - RexWindowBound lowerBound = getBound(wndSpec.windowFrame.end, converter); - boolean isRows = ((wndSpec.windowFrame.start instanceof RangeBoundarySpec) || (wndSpec.windowFrame.end instanceof RangeBoundarySpec)) ? true - : false; - - w = cluster.getRexBuilder().makeOver(calciteAggFnRetType, calciteAggFn, calciteAggFnArgs, - partitionKeys, ImmutableList. copyOf(orderKeys), lowerBound, - upperBound, isRows, true, false); - } else { - // TODO: Convert to Semantic Exception - throw new RuntimeException("Unsupported window Spec"); - } - - return new Pair(w, wHiveRetType); - } - - private RelNode genSelectForWindowing( - QB qb, RelNode srcRel, HashSet newColumns) throws SemanticException { - QBParseInfo qbp = getQBParseInfo(qb); - WindowingSpec wSpec = (!qb.getAllWindowingSpecs().isEmpty()) ? qb.getAllWindowingSpecs() - .values().iterator().next() : null; - if (wSpec == null) return null; - // 1. Get valid Window Function Spec - wSpec.validateAndMakeEffective(); - List windowExpressions = wSpec.getWindowExpressions(); - if (windowExpressions == null || windowExpressions.isEmpty()) return null; - - RowResolver inputRR = this.relToHiveRR.get(srcRel); - // 2. Get RexNodes for original Projections from below - List projsForWindowSelOp = new ArrayList( - HiveCalciteUtil.getProjsFromBelowAsInputRef(srcRel)); - - // 3. Construct new Row Resolver with everything from below. - RowResolver out_rwsch = new RowResolver(); - if (!RowResolver.add(out_rwsch, inputRR)) { - LOG.warn("Duplicates detected when adding columns to RR: see previous message"); - } - - // 4. Walk through Window Expressions & Construct RexNodes for those, - // Update out_rwsch - for (WindowExpressionSpec wExprSpec : windowExpressions) { - if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) { - Pair wtp = genWindowingProj(qb, wExprSpec, srcRel); - projsForWindowSelOp.add(wtp.getKey()); - - // 6.2.2 Update Output Row Schema - ColumnInfo oColInfo = new ColumnInfo( - getColumnInternalName(projsForWindowSelOp.size()), wtp.getValue(), null, false); - if (false) { - out_rwsch.put(null, wExprSpec.getAlias(), oColInfo); - } else { - out_rwsch.putExpression(wExprSpec.getExpression(), oColInfo); - } - newColumns.add(oColInfo); - } - } - - return genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel); - } - - private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, - RelNode srcRel) throws CalciteSemanticException { - // 1. Build Column Names - Set colNamesSet = new HashSet(); - List cInfoLst = out_rwsch.getRowSchema().getSignature(); - ArrayList columnNames = new ArrayList(); - String[] qualifiedColNames; - String tmpColAlias; - for (int i = 0; i < calciteColLst.size(); i++) { - ColumnInfo cInfo = cInfoLst.get(i); - qualifiedColNames = out_rwsch.reverseLookup(cInfo.getInternalName()); - /* - if (qualifiedColNames[0] != null && !qualifiedColNames[0].isEmpty()) - tmpColAlias = qualifiedColNames[0] + "." + qualifiedColNames[1]; - else - */ - tmpColAlias = qualifiedColNames[1]; - - // Prepend column names with '_o_' if it starts with '_c' - /* - * Hive treats names that start with '_c' as internalNames; so change - * the names so we don't run into this issue when converting back to - * Hive AST. - */ - if (tmpColAlias.startsWith("_c")) - tmpColAlias = "_o_" + tmpColAlias; - int suffix = 1; - while (colNamesSet.contains(tmpColAlias)) { - tmpColAlias = qualifiedColNames[1] + suffix; - suffix++; - } - - colNamesSet.add(tmpColAlias); - columnNames.add(tmpColAlias); - } - - // 3 Build Calcite Rel Node for project using converted projections & col - // names - HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, columnNames); - - // 4. Keep track of colname-to-posmap && RR for new select - this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(out_rwsch, selRel)); - this.relToHiveRR.put(selRel, out_rwsch); - - return selRel; - } - - private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, - RelNode srcRel, boolean removethismethod) throws CalciteSemanticException { - // 1. Build Column Names - // TODO: Should this be external names - ArrayList columnNames = new ArrayList(); - for (int i = 0; i < calciteColLst.size(); i++) { - columnNames.add(getColumnInternalName(i)); - } - - // 2. Prepend column names with '_o_' - /* - * Hive treats names that start with '_c' as internalNames; so change the - * names so we don't run into this issue when converting back to Hive AST. - */ - List oFieldNames = Lists.transform(columnNames, new Function() { - @Override - public String apply(String hName) { - return "_o_" + hName; - } - }); - - // 3 Build Calcite Rel Node for project using converted projections & col - // names - HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, oFieldNames); - - // 4. Keep track of colname-to-posmap && RR for new select - this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(out_rwsch, selRel)); - this.relToHiveRR.put(selRel, out_rwsch); - - return selRel; - } - - /** - * NOTE: there can only be one select caluse since we don't handle multi - * destination insert. - * - * @throws SemanticException - */ - private RelNode genSelectLogicalPlan( - QB qb, RelNode srcRel, RelNode starSrcRel) throws SemanticException { - // 0. Generate a Select Node for Windowing - // Exclude the newly-generated select columns from */etc. resolution. - HashSet excludedColumns = new HashSet(); - RelNode selForWindow = genSelectForWindowing(qb, srcRel, excludedColumns); - srcRel = (selForWindow == null) ? srcRel : selForWindow; - - boolean subQuery; - ArrayList col_list = new ArrayList(); - ArrayList> windowingRexNodes = new ArrayList>(); - - // 1. Get Select Expression List - QBParseInfo qbp = getQBParseInfo(qb); - String selClauseName = qbp.getClauseNames().iterator().next(); - ASTNode selExprList = qbp.getSelForClause(selClauseName); - - // 2.Row resolvers for input, output - RowResolver out_rwsch = new RowResolver(); - ASTNode trfm = null; - Integer pos = Integer.valueOf(0); - // TODO: will this also fix windowing? try - RowResolver inputRR = this.relToHiveRR.get(srcRel), starRR = inputRR; - if (starSrcRel != null) { - starRR = this.relToHiveRR.get(starSrcRel); - } - - // 3. Query Hints - // TODO: Handle Query Hints; currently we ignore them - boolean selectStar = false; - int posn = 0; - boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST); - if (hintPresent) { - String hint = SemanticAnalyzer.this.ctx.getTokenRewriteStream(). - toString( - selExprList.getChild(0).getTokenStartIndex(), - selExprList.getChild(0).getTokenStopIndex()); - String msg = String.format("Hint specified for %s." - + " Currently we don't support hints in CBO, turn off cbo to use hints.", hint); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - - // 4. Bailout if select involves Transform - boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM); - if (isInTransform) { - String msg = String.format("SELECT TRANSFORM is currently not supported in CBO," - + " turn off cbo to use TRANSFORM."); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - - // 5. Bailout if select involves UDTF - ASTNode expr = (ASTNode) selExprList.getChild(posn).getChild(0); - int exprType = expr.getType(); - if (exprType == HiveParser.TOK_FUNCTION || exprType == HiveParser.TOK_FUNCTIONSTAR) { - String funcName = TypeCheckProcFactory.DefaultExprProcessor.getFunctionText(expr, true); - FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName); - if (fi != null && fi.getGenericUDTF() != null) { - String msg = String.format("UDTF " + funcName + " is currently not supported in CBO," - + " turn off cbo to use UDTF " + funcName); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - } - - // 6. Iterate over all expression (after SELECT) - ASTNode exprList = selExprList; - int startPosn = posn; - int wndProjPos = 0; - List tabAliasesForAllProjs = getTabAliases(starRR); - for (int i = startPosn; i < exprList.getChildCount(); ++i) { - - // 6.1 child can be EXPR AS ALIAS, or EXPR. - ASTNode child = (ASTNode) exprList.getChild(i); - boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2); - - // 6.2 EXPR AS (ALIAS,...) parses, but is only allowed for UDTF's - // This check is not needed and invalid when there is a transform b/c - // the - // AST's are slightly different. - if (child.getChildCount() > 2) { - throw new SemanticException(generateErrorMessage((ASTNode) child.getChild(2), - ErrorMsg.INVALID_AS.getMsg())); - } - - String tabAlias; - String colAlias; - - // 6.3 Get rid of TOK_SELEXPR - expr = (ASTNode) child.getChild(0); - String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR, - autogenColAliasPrfxIncludeFuncName, i); - tabAlias = colRef[0]; - colAlias = colRef[1]; - - // 6.4 Build ExprNode corresponding to colums - if (expr.getType() == HiveParser.TOK_ALLCOLREF) { - pos = genColListRegex(".*", - expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) expr.getChild(0)) - .toLowerCase(), expr, col_list, excludedColumns, inputRR, starRR, pos, - out_rwsch, tabAliasesForAllProjs, true); - selectStar = true; - } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause - && !inputRR.getIsExprResolver() - && isRegex(unescapeIdentifier(expr.getChild(0).getText()), conf)) { - // In case the expression is a regex COL. - // This can only happen without AS clause - // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr, - col_list, excludedColumns, inputRR, starRR, pos, out_rwsch, tabAliasesForAllProjs, - true); - } else if (expr.getType() == HiveParser.DOT - && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL - && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0).getChild(0).getText() - .toLowerCase())) && !hasAsClause && !inputRR.getIsExprResolver() - && isRegex(unescapeIdentifier(expr.getChild(1).getText()), conf)) { - // In case the expression is TABLE.COL (col can be regex). - // This can only happen without AS clause - // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()), - unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), expr, - col_list, excludedColumns, inputRR, starRR, pos, out_rwsch, tabAliasesForAllProjs, - true); - } else if (expr.toStringTree().contains("TOK_FUNCTIONDI") - && !(srcRel instanceof HiveAggregate)) { - // Likely a malformed query eg, select hash(distinct c1) from t1; - throw new CalciteSemanticException("Distinct without an aggreggation."); - } else { - // Case when this is an expression - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); - // We allow stateful functions in the SELECT list (but nowhere else) - tcCtx.setAllowStatefulFunctions(true); - ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx); - String recommended = recommendName(exp, colAlias); - if (recommended != null && out_rwsch.get(null, recommended) == null) { - colAlias = recommended; - } - col_list.add(exp); - - ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos), - exp.getWritableObjectInspector(), tabAlias, false); - colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp) - .isSkewedCol() : false); - if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) { - throw new CalciteSemanticException("Cannot add column to RR: " + tabAlias + "." - + colAlias + " => " + colInfo + " due to duplication, see previous warnings"); - } - - if (exp instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp; - String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn()); - if (altMapping != null) { - out_rwsch.put(altMapping[0], altMapping[1], colInfo); - } - } - - pos = Integer.valueOf(pos.intValue() + 1); - } - } - selectStar = selectStar && exprList.getChildCount() == posn + 1; - - // 7. Convert Hive projections to Calcite - List calciteColLst = new ArrayList(); - RexNodeConverter rexNodeConv = new RexNodeConverter(cluster, srcRel.getRowType(), - buildHiveColNameToInputPosMap(col_list, inputRR), 0, false); - for (ExprNodeDesc colExpr : col_list) { - calciteColLst.add(rexNodeConv.convert(colExpr)); - } - - // 8. Build Calcite Rel - RelNode selRel = genSelectRelNode(calciteColLst, out_rwsch, srcRel); - - return selRel; - } - - private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException { - if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { - return genLogicalPlan(qbexpr.getQB(), false); - } - if (qbexpr.getOpcode() == QBExpr.Opcode.UNION) { - RelNode qbexpr1Ops = genLogicalPlan(qbexpr.getQBExpr1()); - RelNode qbexpr2Ops = genLogicalPlan(qbexpr.getQBExpr2()); - - return genUnionLogicalPlan(qbexpr.getAlias(), qbexpr.getQBExpr1().getAlias(), qbexpr1Ops, - qbexpr.getQBExpr2().getAlias(), qbexpr2Ops); - } - return null; - } - - private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticException { - RelNode srcRel = null; - RelNode filterRel = null; - RelNode gbRel = null; - RelNode gbHavingRel = null; - RelNode havingRel = null; - RelNode selectRel = null; - RelNode obRel = null; - RelNode limitRel = null; - - RelNode rootRel = null; - // First generate all the opInfos for the elements in the from clause - Map aliasToRel = new HashMap(); - - // 0. Check if we can handle the SubQuery; - // canHandleQbForCbo returns null if the query can be handled. - String reason = canHandleQbForCbo(qb, false, LOG.isDebugEnabled()); - if (reason != null) { - String msg = "CBO can not handle Sub Query"; - if (LOG.isDebugEnabled()) { - LOG.debug(msg + " because it: " + reason); - } - throw new CalciteSemanticException(msg); - } - - // 1. Build Rel For Src (SubQuery, TS, Join) - // 1.1. Recurse over the subqueries to fill the subquery part of the plan - for (String subqAlias : qb.getSubqAliases()) { - QBExpr qbexpr = qb.getSubqForAlias(subqAlias); - aliasToRel.put(subqAlias, genLogicalPlan(qbexpr)); - } - - // 1.2 Recurse over all the source tables - for (String tableAlias : qb.getTabAliases()) { - RelNode op = genTableLogicalPlan(tableAlias, qb); - aliasToRel.put(tableAlias, op); - } - - if (aliasToRel.isEmpty()) { - // // This may happen for queries like select 1; (no source table) - // We can do following which is same, as what Hive does. - // With this, we will be able to generate Calcite plan. - // qb.getMetaData().setSrcForAlias(DUMMY_TABLE, getDummyTable()); - // RelNode op = genTableLogicalPlan(DUMMY_TABLE, qb); - // qb.addAlias(DUMMY_TABLE); - // qb.setTabAlias(DUMMY_TABLE, DUMMY_TABLE); - // aliasToRel.put(DUMMY_TABLE, op); - // However, Hive trips later while trying to get Metadata for this dummy - // table - // So, for now lets just disable this. Anyway there is nothing much to - // optimize in such cases. - throw new CalciteSemanticException("Unsupported"); - - } - // 1.3 process join - if (qb.getParseInfo().getJoinExpr() != null) { - srcRel = genJoinLogicalPlan(qb.getParseInfo().getJoinExpr(), aliasToRel); - } else { - // If no join then there should only be either 1 TS or 1 SubQuery - srcRel = aliasToRel.values().iterator().next(); - } - - // 2. Build Rel for where Clause - filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, false); - srcRel = (filterRel == null) ? srcRel : filterRel; - RelNode starSrcRel = srcRel; - - // 3. Build Rel for GB Clause - gbRel = genGBLogicalPlan(qb, srcRel); - srcRel = (gbRel == null) ? srcRel : gbRel; - - // 4. Build Rel for GB Having Clause - gbHavingRel = genGBHavingLogicalPlan(qb, srcRel, aliasToRel); - srcRel = (gbHavingRel == null) ? srcRel : gbHavingRel; - - // 5. Build Rel for Select Clause - selectRel = genSelectLogicalPlan(qb, srcRel, starSrcRel); - srcRel = (selectRel == null) ? srcRel : selectRel; - - // 6. Build Rel for OB Clause - Pair obTopProjPair = genOBLogicalPlan(qb, srcRel, outerMostQB); - obRel = obTopProjPair.getKey(); - RelNode topConstrainingProjArgsRel = obTopProjPair.getValue(); - srcRel = (obRel == null) ? srcRel : obRel; - - // 7. Build Rel for Limit Clause - limitRel = genLimitLogicalPlan(qb, srcRel); - srcRel = (limitRel == null) ? srcRel : limitRel; - - // 8. Introduce top constraining select if needed. - // NOTES: - // 1. Calcite can not take an expr in OB; hence it needs to be added as VC - // in the input select; In such cases we need to introduce a select on top - // to ensure VC is not visible beyond Limit, OB. - // 2. Hive can not preserve order across select. In subqueries OB is used - // to get a deterministic set of tuples from following limit. Hence we - // introduce the constraining select above Limit (if present) instead of - // OB. - // 3. The top level OB will not introduce constraining select due to Hive - // limitation(#2) stated above. The RR for OB will not include VC. Thus - // Result Schema will not include exprs used by top OB. During AST Conv, - // in the PlanModifierForASTConv we would modify the top level OB to - // migrate exprs from input sel to SortRel (Note that Calcite doesn't - // support this; but since we are done with Calcite at this point its OK). - if (topConstrainingProjArgsRel != null) { - List originalInputRefs = Lists.transform(topConstrainingProjArgsRel.getRowType() - .getFieldList(), new Function() { - @Override - public RexNode apply(RelDataTypeField input) { - return new RexInputRef(input.getIndex(), input.getType()); - } - }); - RowResolver topConstrainingProjRR = new RowResolver(); - if (!RowResolver.add( - topConstrainingProjRR, this.relToHiveRR.get(topConstrainingProjArgsRel))) { - LOG.warn("Duplicates detected when adding columns to RR: see previous message"); - } - srcRel = genSelectRelNode(originalInputRefs, topConstrainingProjRR, srcRel); - } - - // 9. Incase this QB corresponds to subquery then modify its RR to point - // to subquery alias - // TODO: cleanup this - if (qb.getParseInfo().getAlias() != null) { - RowResolver rr = this.relToHiveRR.get(srcRel); - RowResolver newRR = new RowResolver(); - String alias = qb.getParseInfo().getAlias(); - for (ColumnInfo colInfo : rr.getColumnInfos()) { - String name = colInfo.getInternalName(); - String[] tmp = rr.reverseLookup(name); - if ("".equals(tmp[0]) || tmp[1] == null) { - // ast expression is not a valid column name for table - tmp[1] = colInfo.getInternalName(); - } - ColumnInfo newCi = new ColumnInfo(colInfo); - newCi.setTabAlias(alias); - newRR.put(alias, tmp[1], newCi); - } - relToHiveRR.put(srcRel, newRR); - relToHiveColNameCalcitePosMap.put(srcRel, buildHiveToCalciteColumnMap(newRR, srcRel)); - } - - if (LOG.isDebugEnabled()) { - LOG.debug("Created Plan for Query Block " + qb.getId()); - } - - return srcRel; - } - - private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, Map aliasToRel) - throws SemanticException { - RelNode gbFilter = null; - QBParseInfo qbp = getQBParseInfo(qb); - ASTNode havingClause = qbp.getHavingForClause(qbp.getClauseNames().iterator().next()); - - if (havingClause != null) { - if (!(srcRel instanceof HiveAggregate)) { - // ill-formed query like select * from t1 having c1 > 0; - throw new CalciteSemanticException("Having clause without any group-by."); - } - validateNoHavingReferenceToAlias(qb, (ASTNode) havingClause.getChild(0)); - gbFilter = genFilterRelNode(qb, (ASTNode) havingClause.getChild(0), srcRel, aliasToRel, - true); - } - - return gbFilter; - } - - /* - * Bail if having clause uses Select Expression aliases for Aggregation - * expressions. We could do what Hive does. But this is non standard - * behavior. Making sure this doesn't cause issues when translating through - * Calcite is not worth it. - */ - private void validateNoHavingReferenceToAlias(QB qb, ASTNode havingExpr) - throws CalciteSemanticException { - - QBParseInfo qbPI = qb.getParseInfo(); - Map exprToAlias = qbPI.getAllExprToColumnAlias(); - /* - * a mouthful, but safe: - * - a QB is guaranteed to have atleast 1 destination - * - we don't support multi insert, so picking the first dest. - */ - Set aggExprs = qbPI.getDestToAggregationExprs().values() - .iterator().next().keySet(); - - for (Map.Entry selExpr : exprToAlias.entrySet()) { - ASTNode selAST = selExpr.getKey(); - if (!aggExprs.contains(selAST.toStringTree().toLowerCase())) { - continue; - } - final String aliasToCheck = selExpr.getValue(); - final Set aliasReferences = new HashSet(); - TreeVisitorAction action = new TreeVisitorAction() { - - @Override - public Object pre(Object t) { - if (ParseDriver.adaptor.getType(t) == HiveParser.TOK_TABLE_OR_COL) { - Object c = ParseDriver.adaptor.getChild(t, 0); - if (c != null - && ParseDriver.adaptor.getType(c) == HiveParser.Identifier - && ParseDriver.adaptor.getText(c).equals(aliasToCheck)) { - aliasReferences.add(t); - } - } - return t; - } - - @Override - public Object post(Object t) { - return t; - } - }; - new TreeVisitor(ParseDriver.adaptor).visit(havingExpr, action); - - if (aliasReferences.size() > 0) { - String havingClause = SemanticAnalyzer.this.ctx - .getTokenRewriteStream().toString( - havingExpr.getTokenStartIndex(), - havingExpr.getTokenStopIndex()); - String msg = String.format( - "Encountered Select alias '%s' in having clause '%s'" - + " This non standard behavior is not supported with cbo on." - + " Turn off cbo for these queries.", aliasToCheck, - havingClause); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - } - - } - - private ImmutableMap buildHiveToCalciteColumnMap(RowResolver rr, RelNode rNode) { - ImmutableMap.Builder b = new ImmutableMap.Builder(); - int i = 0; - for (ColumnInfo ci : rr.getRowSchema().getSignature()) { - b.put(ci.getInternalName(), rr.getPosition(ci.getInternalName())); - } - return b.build(); - } - - private ImmutableMap buildHiveColNameToInputPosMap( - List col_list, RowResolver inputRR) { - // Build a map of Hive column Names (ExprNodeColumnDesc Name) - // to the positions of those projections in the input - Map hashCodeTocolumnDescMap = new HashMap(); - ExprNodeDescUtils.getExprNodeColumnDesc(col_list, hashCodeTocolumnDescMap); - ImmutableMap.Builder hiveColNameToInputPosMapBuilder = new ImmutableMap.Builder(); - String exprNodecolName; - for (ExprNodeDesc exprDesc : hashCodeTocolumnDescMap.values()) { - exprNodecolName = ((ExprNodeColumnDesc) exprDesc).getColumn(); - hiveColNameToInputPosMapBuilder.put(exprNodecolName, inputRR.getPosition(exprNodecolName)); - } - - return hiveColNameToInputPosMapBuilder.build(); - } - - private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException { - QBParseInfo qbp = qb.getParseInfo(); - if (qbp.getClauseNames().size() > 1) { - String msg = String.format("Multi Insert is currently not supported in CBO," - + " turn off cbo to use Multi Insert."); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - return qbp; - } - - private List getTabAliases(RowResolver inputRR) { - List tabAliases = new ArrayList(); // TODO: this should be unique - for (ColumnInfo ci : inputRR.getColumnInfos()) { - tabAliases.add(ci.getTabAlias()); - } - - return tabAliases; - } - } - - private static class ExceptionHelper { - private static final Field CAUSE_FIELD = getField(Throwable.class, "cause"), - TARGET_FIELD = getField(InvocationTargetException.class, "target"), - MESSAGE_FIELD = getField(Throwable.class, "detailMessage"); - private static Field getField(Class clazz, String name) { - try { - Field f = clazz.getDeclaredField(name); - f.setAccessible(true); - return f; - } catch (Throwable t) { - return null; - } - } - public static boolean resetCause(Throwable target, Throwable newCause) { - try { - if (MESSAGE_FIELD == null) return false; - Field field = (target instanceof InvocationTargetException) ? TARGET_FIELD : CAUSE_FIELD; - if (field == null) return false; - - Throwable oldCause = target.getCause(); - String oldMsg = target.getMessage(); - field.set(target, newCause); - if (oldMsg != null && oldMsg.equals(oldCause.toString())) { - MESSAGE_FIELD.set(target, newCause == null ? null : newCause.toString()); - } - } catch (Throwable se) { - return false; - } - return true; - } - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java index 6962ee9..9397b7d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java @@ -273,8 +273,12 @@ public static BaseSemanticAnalyzer get(HiveConf conf, ASTNode tree) case HiveParser.TOK_DELETE_FROM: return new UpdateDeleteSemanticAnalyzer(conf); - default: - return new SemanticAnalyzer(conf); + default: { + SemanticAnalyzer semAnalyzer = HiveConf + .getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED) ? new CalcitePlanner(conf) + : new SemanticAnalyzer(conf); + return semAnalyzer; + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java index d847a16..da14ab4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java @@ -141,7 +141,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } - Map> tableToKeysMap = new HashMap>(); + // Must be deterministic order map for consistent q-test output across Java versions + Map> tableToKeysMap = new LinkedHashMap>(); Table tbl = pGraphContext.getTopToTable().get(tso); tableToKeysMap.put(tbl.getCompleteName(), keyColNames); tableAccessCtx.addOperatorTableAccess(op, tableToKeysMap); @@ -165,7 +166,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) { JoinOperator op = (JoinOperator)nd; TableAccessCtx tableAccessCtx = (TableAccessCtx)procCtx; - Map> tableToKeysMap = new HashMap>(); + // Must be deterministic order map for consistent q-test output across Java versions + Map> tableToKeysMap = new LinkedHashMap>(); List> parentOps = op.getParentOperators(); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessInfo.java index b69830d..c027ea6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessInfo.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hive.ql.parse; -import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -33,8 +33,9 @@ Map>> operatorToTableAccessMap; public TableAccessInfo() { + // Must be deterministic order map for consistent q-test output across Java versions operatorToTableAccessMap = - new HashMap, Map>>(); + new LinkedHashMap, Map>>(); } public void add(Operator op, diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index bd610d9..706390b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -62,8 +62,10 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; @@ -128,9 +130,7 @@ public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) // If the current subExpression is pre-calculated, as in Group-By etc. ColumnInfo colInfo = input.getExpression(expr); if (colInfo != null) { - desc = new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol()); + desc = new ExprNodeColumnDesc(colInfo); ASTNode source = input.getExpressionSource(expr); if (source != null) { ctx.getUnparseTranslator().addCopyTranslation(expr, source); @@ -513,9 +513,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } // It's a column. - return new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol()); + return toExprNodeDesc(colInfo); } else { // It's a table alias. // We will process that later in DOT. @@ -547,11 +545,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } else { // It's a column. - ExprNodeColumnDesc exprNodColDesc = new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol()); - exprNodColDesc.setSkewedCol(colInfo.isSkewedCol()); - return exprNodColDesc; + return toExprNodeDesc(colInfo); } } @@ -559,6 +553,20 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } + private static ExprNodeDesc toExprNodeDesc(ColumnInfo colInfo) { + ObjectInspector inspector = colInfo.getObjectInspector(); + if (inspector instanceof ConstantObjectInspector && + inspector instanceof PrimitiveObjectInspector) { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) inspector; + Object constant = ((ConstantObjectInspector) inspector).getWritableConstantValue(); + return new ExprNodeConstantDesc(colInfo.getType(), poi.getPrimitiveJavaObject(constant)); + } + // non-constant or non-primitive constants + ExprNodeColumnDesc column = new ExprNodeColumnDesc(colInfo); + column.setSkewedCol(colInfo.isSkewedCol()); + return column; + } + /** * Factory method to get ColumnExprProcessor. * @@ -979,7 +987,7 @@ private boolean isDescendant(Node ans, Node des) { return false; } - protected ExprNodeColumnDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, + protected ExprNodeDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, Object... nodeOutputs) throws SemanticException { RowResolver input = ctx.getInputRR(); String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0) @@ -993,8 +1001,7 @@ protected ExprNodeColumnDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode ex ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); return null; } - return new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), - colInfo.getTabAlias(), colInfo.getIsVirtualCol()); + return toExprNodeDesc(colInfo); } @Override @@ -1080,16 +1087,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, for (Map.Entry colMap : columns.entrySet()) { ColumnInfo colInfo = colMap.getValue(); if (!colInfo.getIsVirtualCol()) { - columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), false)); + columnList.addColumn(toExprNodeDesc(colInfo)); } } } else { // all columns (select *, for example) for (ColumnInfo colInfo : input.getColumnInfos()) { if (!colInfo.getIsVirtualCol()) { - columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), false)); + columnList.addColumn(toExprNodeDesc(colInfo)); } } } @@ -1127,7 +1132,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, expr.getChildCount() - childrenBegin); for (int ci = childrenBegin; ci < expr.getChildCount(); ci++) { if (nodeOutputs[ci] instanceof ExprNodeColumnListDesc) { - children.addAll(((ExprNodeColumnListDesc)nodeOutputs[ci]).getChildren()); + children.addAll(((ExprNodeColumnListDesc) nodeOutputs[ci]).getChildren()); } else { children.add((ExprNodeDesc) nodeOutputs[ci]); } @@ -1142,8 +1147,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, RowResolver input = ctx.getInputRR(); for (ColumnInfo colInfo : input.getColumnInfos()) { if (!colInfo.getIsVirtualCol()) { - children.add(new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), false)); + children.add(toExprNodeDesc(colInfo)); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java index 65f5eaa..4eedb14 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java @@ -19,7 +19,8 @@ import java.io.IOException; import java.util.HashMap; -import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -158,7 +159,8 @@ private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException { rewrittenQueryStr.append(" select ROW__ID"); Map setColExprs = null; Map setCols = null; - Set setRCols = new HashSet(); + // Must be deterministic order set for consistent q-test output across Java versions + Set setRCols = new LinkedHashSet(); if (updating()) { // An update needs to select all of the columns, as we rewrite the entire row. Also, // we need to figure out which columns we are going to replace. We won't write the set @@ -171,7 +173,8 @@ private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException { // Get the children of the set clause, each of which should be a column assignment List assignments = setClause.getChildren(); - setCols = new HashMap(assignments.size()); + // Must be deterministic order map for consistent q-test output across Java versions + setCols = new LinkedHashMap(assignments.size()); setColExprs = new HashMap(assignments.size()); for (Node a : assignments) { ASTNode assignment = (ASTNode)a; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java index f8738cd..be78e73 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java @@ -30,16 +30,17 @@ */ public class ExprNodeColumnListDesc extends ExprNodeDesc { - List columns = new ArrayList(); + // column or constant + final List columns = new ArrayList(); - public void addColumn(ExprNodeColumnDesc column) { + public void addColumn(ExprNodeDesc column) { columns.add(column); } @Override public ExprNodeDesc clone() { ExprNodeColumnListDesc clone = new ExprNodeColumnListDesc(); - clone.columns = new ArrayList(columns); + clone.columns.addAll(columns); return clone; } @@ -73,11 +74,7 @@ public String getTypeString() { @Override public List getCols() { - List cols = new ArrayList(); - for (ExprNodeColumnDesc column : columns) { - cols.add(column.getColumn()); - } - return cols; + throw new IllegalStateException(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java index 32d84ea..ef5a655 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java @@ -20,14 +20,16 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.TreeMap; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.ListSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.parse.SplitSample; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** * FetchWork. @@ -52,7 +54,7 @@ private SplitSample splitSample; private transient List> rowsComputedFromStats; - private transient ObjectInspector statRowOI; + private transient StructObjectInspector statRowOI; /** * Serialization Null Format for the serde used to fetch data. @@ -62,12 +64,12 @@ public FetchWork() { } - public FetchWork(List> rowsComputedFromStats,ObjectInspector statRowOI) { + public FetchWork(List> rowsComputedFromStats, StructObjectInspector statRowOI) { this.rowsComputedFromStats = rowsComputedFromStats; this.statRowOI = statRowOI; } - public ObjectInspector getStatRowOI() { + public StructObjectInspector getStatRowOI() { return statRowOI; } @@ -99,8 +101,8 @@ public FetchWork(List partDir, List partDesc, public void initializeForFetch() { if (source == null) { - sink = new ListSinkOperator(); - sink.setConf(new ListSinkDesc(serializationNullFormat)); + ListSinkDesc desc = new ListSinkDesc(serializationNullFormat); + sink = (ListSinkOperator) OperatorFactory.get(desc); source = sink; } } @@ -173,6 +175,11 @@ public void setPartDir(ArrayList partDir) { return partDesc; } + public List getPathLists() { + return isPartitioned() ? partDir == null ? + null : new ArrayList(partDir) : Arrays.asList(tblDir); + } + /** * Get Partition descriptors in sorted (ascending) order of partition directory * diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java index 67260d0..0e34aee 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.OutputFormat; +import org.apache.hadoop.util.ReflectionUtils; import org.apache.hive.common.util.HiveStringUtils; /** @@ -84,8 +85,17 @@ public Deserializer getDeserializer() throws Exception { * Return a deserializer object corresponding to the tableDesc. */ public Deserializer getDeserializer(Configuration conf) throws Exception { - Deserializer de = getDeserializerClass().newInstance(); - SerDeUtils.initializeSerDe(de, conf, properties, null); + return getDeserializer(conf, false); + } + + public Deserializer getDeserializer(Configuration conf, boolean ignoreError) throws Exception { + Deserializer de = ReflectionUtils.newInstance( + getDeserializerClass().asSubclass(Deserializer.class), conf); + if (ignoreError) { + SerDeUtils.initializeSerDeWithoutErrorCheck(de, conf, properties, null); + } else { + SerDeUtils.initializeSerDe(de, conf, properties, null); + } return de; } diff --git ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 5683126..076d2fa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -310,7 +310,8 @@ public SessionState(HiveConf conf, String userName) { this.userName = userName; isSilent = conf.getBoolVar(HiveConf.ConfVars.HIVESESSIONSILENT); ls = new LineageState(); - overriddenConfigurations = new HashMap(); + // Must be deterministic order map for consistent q-test output across Java versions + overriddenConfigurations = new LinkedHashMap(); overriddenConfigurations.putAll(HiveConf.getConfSystemProperties()); // if there isn't already a session name, go ahead and create it. if (StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HIVESESSIONID))) { @@ -1204,7 +1205,8 @@ public void setStackTraces(Map>> stackTraces) { public Map getOverriddenConfigurations() { if (overriddenConfigurations == null) { - overriddenConfigurations = new HashMap(); + // Must be deterministic order map for consistent q-test output across Java versions + overriddenConfigurations = new LinkedHashMap(); } return overriddenConfigurations; } diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java index 056c56d..bdce960 100644 --- ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java @@ -42,7 +42,7 @@ public static void initialize() { @Before public void setup() throws SemanticException { pd = new ParseDriver(); - sA = new SemanticAnalyzer(conf); + sA = new CalcitePlanner(conf); } ASTNode parse(String query) throws ParseException { diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestQBJoinTreeApplyPredicate.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestQBJoinTreeApplyPredicate.java index aaf1f52..70d86c1 100644 --- ql/src/test/org/apache/hadoop/hive/ql/parse/TestQBJoinTreeApplyPredicate.java +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestQBJoinTreeApplyPredicate.java @@ -42,7 +42,7 @@ public static void initialize() { @Before public void setup() throws SemanticException { - sA = new SemanticAnalyzer(conf); + sA = new CalcitePlanner(conf); } static ASTNode constructIdentifier(String nm) { diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestQBSubQuery.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestQBSubQuery.java index 8b36f21..77ff79a 100644 --- ql/src/test/org/apache/hadoop/hive/ql/parse/TestQBSubQuery.java +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestQBSubQuery.java @@ -54,7 +54,7 @@ public static void initialize() { @Before public void setup() throws SemanticException { pd = new ParseDriver(); - sA = new SemanticAnalyzer(conf); + sA = new CalcitePlanner(conf); } ASTNode parse(String query) throws ParseException { diff --git ql/src/test/org/apache/hadoop/hive/ql/plan/TestConditionalResolverCommonJoin.java ql/src/test/org/apache/hadoop/hive/ql/plan/TestConditionalResolverCommonJoin.java index 3af0257..ef846a6 100644 --- ql/src/test/org/apache/hadoop/hive/ql/plan/TestConditionalResolverCommonJoin.java +++ ql/src/test/org/apache/hadoop/hive/ql/plan/TestConditionalResolverCommonJoin.java @@ -29,6 +29,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.Set; public class TestConditionalResolverCommonJoin { @@ -52,8 +53,9 @@ public void testResolvingDriverAlias() throws Exception { task2.setId("alias3"); // joins alias1, alias2, alias3 (alias1 was not eligible for big pos) + // Must be deterministic order map for consistent q-test output across Java versions HashMap, Set> taskToAliases = - new HashMap, Set>(); + new LinkedHashMap, Set>(); taskToAliases.put(task1, new HashSet(Arrays.asList("alias2"))); taskToAliases.put(task2, new HashSet(Arrays.asList("alias3"))); diff --git ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q index 34f91fc..712ece7 100644 --- ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q +++ ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q @@ -1,3 +1,5 @@ +-- JAVA_VERSION_SPECIFIC_OUTPUT + DROP TABLE Employee_Part; CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) diff --git ql/src/test/queries/clientnegative/select_star_suffix.q ql/src/test/queries/clientnegative/select_star_suffix.q new file mode 100644 index 0000000..6fbb8c5 --- /dev/null +++ ql/src/test/queries/clientnegative/select_star_suffix.q @@ -0,0 +1,3 @@ +-- Check SELECT * syntax. +-- Check that there should not be any identifier after STAR. +select *abcdef from src; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/nonmr_fetch.q ql/src/test/queries/clientpositive/nonmr_fetch.q index 2a92d17..2a52888 100644 --- ql/src/test/queries/clientpositive/nonmr_fetch.q +++ ql/src/test/queries/clientpositive/nonmr_fetch.q @@ -9,7 +9,6 @@ select * from src limit 10; explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; --- negative, select expression explain select key from src limit 10; select key from src limit 10; @@ -62,6 +61,16 @@ select * from src TABLESAMPLE (0.25 PERCENT); explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT); select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT); +-- sub query +explain +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20; +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20; + +-- lateral view +explain +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20; +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20; + -- non deterministic func explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1; select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1; @@ -78,8 +87,5 @@ explain create table srcx as select distinct key, value from src; -- negative, analyze explain analyze table src compute statistics; --- negative, subq -explain select a.* from (select * from src) a; - -- negative, join explain select * from src join src src2 on src.key=src2.key; diff --git ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q index b1a7cb5..959212b 100644 --- ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q +++ ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q @@ -3,6 +3,11 @@ set hive.fetch.task.conversion=more; explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; explain select cast(key as int) * 10, upper(value) from src limit 10; +set hive.fetch.task.conversion.threshold=10000; + +explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; +explain select cast(key as int) * 10, upper(value) from src limit 10; + set hive.fetch.task.conversion.threshold=100; -- from HIVE-7397, limit + partition pruning filter diff --git ql/src/test/queries/clientpositive/outer_join_ppr.q ql/src/test/queries/clientpositive/outer_join_ppr.q index c32e023..ba8f882 100644 --- ql/src/test/queries/clientpositive/outer_join_ppr.q +++ ql/src/test/queries/clientpositive/outer_join_ppr.q @@ -1,6 +1,7 @@ set hive.optimize.ppd=true; -- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT EXPLAIN EXTENDED FROM diff --git ql/src/test/queries/clientpositive/parquet_map_null.q ql/src/test/queries/clientpositive/parquet_map_null.q index d31cb99..61058f3 100644 --- ql/src/test/queries/clientpositive/parquet_map_null.q +++ ql/src/test/queries/clientpositive/parquet_map_null.q @@ -1,4 +1,5 @@ -- This test attempts to write a parquet table from an avro table that contains map null values +-- JAVA_VERSION_SPECIFIC_OUTPUT DROP TABLE IF EXISTS avro_table; DROP TABLE IF EXISTS parquet_table; @@ -10,4 +11,4 @@ CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table; SELECT * FROM parquet_table; DROP TABLE avro_table; -DROP TABLE parquet_table; \ No newline at end of file +DROP TABLE parquet_table; diff --git ql/src/test/queries/clientpositive/subquery_multiinsert.q ql/src/test/queries/clientpositive/subquery_multiinsert.q index c250737..06763f0 100644 --- ql/src/test/queries/clientpositive/subquery_multiinsert.q +++ ql/src/test/queries/clientpositive/subquery_multiinsert.q @@ -1,6 +1,7 @@ set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook; -- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT CREATE TABLE src_4( key STRING, diff --git ql/src/test/queries/clientpositive/subquery_notin_having.q ql/src/test/queries/clientpositive/subquery_notin_having.q index a181d59..1494d1e 100644 --- ql/src/test/queries/clientpositive/subquery_notin_having.q +++ ql/src/test/queries/clientpositive/subquery_notin_having.q @@ -1,4 +1,6 @@ -- non agg, non corr +-- JAVA_VERSION_SPECIFIC_OUTPUT + explain select key, count(*) from src @@ -53,4 +55,4 @@ having b.p_mfgr not in group by p_mfgr having max(p_retailprice) - min(p_retailprice) > 600 ) -; \ No newline at end of file +; diff --git ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q index fa1dd6e..e958eb8 100644 --- ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q +++ ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q @@ -91,3 +91,8 @@ set hive.cbo.enable=false; explain select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket; select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket; + +-- with CBO +explain +select percentile_approx(key, 0.5) from bucket; +select percentile_approx(key, 0.5) from bucket; diff --git ql/src/test/queries/clientpositive/udtf_explode.q ql/src/test/queries/clientpositive/udtf_explode.q index 50f5f5e..db9018c 100644 --- ql/src/test/queries/clientpositive/udtf_explode.q +++ ql/src/test/queries/clientpositive/udtf_explode.q @@ -21,8 +21,3 @@ SELECT src.key, myKey, myVal FROM src lateral view explode(map(1,'one',2,'two',3 -- HIVE-4295 SELECT BLOCK__OFFSET__INSIDE__FILE, src.key, myKey, myVal FROM src lateral view explode(map(1,'one',2,'two',3,'three')) x AS myKey,myVal LIMIT 3; - --- cp knob is removed, hardly convincible --- set hive.optimize.cp=false; --- SELECT src.key, myKey, myVal FROM src lateral view explode(map(1,'one',2,'two',3,'three')) x AS myKey,myVal LIMIT 3; --- SELECT BLOCK__OFFSET__INSIDE__FILE, src.key, myKey, myVal FROM src lateral view explode(map(1,'one',2,'two',3,'three')) x AS myKey,myVal LIMIT 3; \ No newline at end of file diff --git ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.7.out ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.7.out new file mode 100644 index 0000000..4ea70e3 --- /dev/null +++ ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.7.out @@ -0,0 +1,73 @@ +PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT + +DROP TABLE Employee_Part +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT + +DROP TABLE Employee_Part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@Employee_Part +POSTHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@Employee_Part +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +FAILED: SemanticException [Error 30007]: Invalid partitioning key/value specified in ANALYZE statement : {employeesalary=4000.0, country=Canada} diff --git ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.8.out ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.8.out new file mode 100644 index 0000000..7cae55e --- /dev/null +++ ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.8.out @@ -0,0 +1,73 @@ +PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT + +DROP TABLE Employee_Part +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT + +DROP TABLE Employee_Part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@Employee_Part +POSTHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@Employee_Part +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +FAILED: SemanticException [Error 30007]: Invalid partitioning key/value specified in ANALYZE statement : {country=Canada, employeesalary=4000.0} diff --git ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.out ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.out index 3261f78..e69de29 100644 --- ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.out +++ ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.out @@ -1,69 +0,0 @@ -PREHOOK: query: DROP TABLE Employee_Part -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE Employee_Part -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) -row format delimited fields terminated by '|' stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@Employee_Part -POSTHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) -row format delimited fields terminated by '|' stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@Employee_Part -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK -FAILED: SemanticException [Error 30007]: Invalid partitioning key/value specified in ANALYZE statement : {employeesalary=4000.0, country=Canada} diff --git ql/src/test/results/clientnegative/select_star_suffix.q.out ql/src/test/results/clientnegative/select_star_suffix.q.out new file mode 100644 index 0000000..b873100 --- /dev/null +++ ql/src/test/results/clientnegative/select_star_suffix.q.out @@ -0,0 +1 @@ +FAILED: ParseException line 3:8 missing EOF at 'abcdef' near '*' diff --git ql/src/test/results/clientnegative/unset_table_property.q.out ql/src/test/results/clientnegative/unset_table_property.q.out index 8f94c9d..158ed38 100644 --- ql/src/test/results/clientnegative/unset_table_property.q.out +++ ql/src/test/results/clientnegative/unset_table_property.q.out @@ -29,4 +29,4 @@ totalSize 0 #### A masked pattern was here #### FAILED: SemanticException [Error 10215]: Please use the following syntax if not sure whether the property existed or not: ALTER TABLE tableName UNSET TBLPROPERTIES IF EXISTS (key1, key2, ...) - The following property z does not exist in testtable + The following property x does not exist in testtable diff --git ql/src/test/results/clientpositive/annotate_stats_select.q.out ql/src/test/results/clientpositive/annotate_stats_select.q.out index 03e1f6f..65270d0 100644 --- ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -750,11 +750,9 @@ STAGE PLANS: alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/auto_join26.q.out ql/src/test/results/clientpositive/auto_join26.q.out index 5580f49..0874b7f 100644 --- ql/src/test/results/clientpositive/auto_join26.q.out +++ ql/src/test/results/clientpositive/auto_join26.q.out @@ -70,11 +70,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/auto_join27.q.out ql/src/test/results/clientpositive/auto_join27.q.out index 06159c5..d82cdb4 100644 --- ql/src/test/results/clientpositive/auto_join27.q.out +++ ql/src/test/results/clientpositive/auto_join27.q.out @@ -115,21 +115,17 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Statistics: Num rows: 273 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 1 (type: int) + Group By Operator + aggregations: count(1) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 273 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col0) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan Union Statistics: Num rows: 249 Data size: 2644 Basic stats: COMPLETE Column stats: NONE @@ -140,21 +136,17 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Statistics: Num rows: 273 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 1 (type: int) + Group By Operator + aggregations: count(1) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 273 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col0) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/auto_join_without_localtask.q.out ql/src/test/results/clientpositive/auto_join_without_localtask.q.out index 8bb92e4..1ad03f4 100644 --- ql/src/test/results/clientpositive/auto_join_without_localtask.q.out +++ ql/src/test/results/clientpositive/auto_join_without_localtask.q.out @@ -944,8 +944,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### RUN: Stage-10:CONDITIONAL -RUN: Stage-14:MAPREDLOCAL -RUN: Stage-9:MAPRED +RUN: Stage-13:MAPREDLOCAL +RUN: Stage-8:MAPRED RUN: Stage-7:CONDITIONAL RUN: Stage-12:MAPREDLOCAL RUN: Stage-6:MAPRED @@ -1005,7 +1005,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### RUN: Stage-10:CONDITIONAL -RUN: Stage-14:MAPREDLOCAL +RUN: Stage-13:MAPREDLOCAL RUN: Stage-3:MAPRED RUN: Stage-7:CONDITIONAL RUN: Stage-12:MAPREDLOCAL diff --git ql/src/test/results/clientpositive/bucket_groupby.q.out ql/src/test/results/clientpositive/bucket_groupby.q.out index feb5ac8..f23f25b 100644 --- ql/src/test/results/clientpositive/bucket_groupby.q.out +++ ql/src/test/results/clientpositive/bucket_groupby.q.out @@ -58,11 +58,11 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -167,11 +167,11 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -250,11 +250,11 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: length(key) (type: int), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: length(key) (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -324,11 +324,11 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: abs(length(key)) (type: int), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: abs(length(key)) (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -400,12 +400,12 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 3 (type: int), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) - keys: _col0 (type: string), _col1 (type: int) + aggregations: count(1) + keys: _col0 (type: string), 3 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -483,11 +483,11 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -564,11 +564,11 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -943,12 +943,12 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 3 (type: int), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) - keys: _col0 (type: string), _col1 (type: int) + aggregations: count(1) + keys: _col0 (type: string), 3 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -1055,11 +1055,11 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) bucketGroup: true keys: _col0 (type: string) mode: hash @@ -1137,11 +1137,11 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -1218,11 +1218,11 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) bucketGroup: true keys: _col0 (type: string), _col1 (type: string) mode: hash @@ -1352,11 +1352,11 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) bucketGroup: true keys: _col0 (type: string) mode: hash @@ -1434,11 +1434,11 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: value (type: string), key (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/column_access_stats.q.out ql/src/test/results/clientpositive/column_access_stats.q.out index 9330ff7..2154f97 100644 --- ql/src/test/results/clientpositive/column_access_stats.q.out +++ ql/src/test/results/clientpositive/column_access_stats.q.out @@ -360,10 +360,10 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -Table:default@t2 +Table:default@t1 Columns:key,val -Table:default@t1 +Table:default@t2 Columns:key,val 1 11 1 1 @@ -441,10 +441,10 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -Table:default@t2 +Table:default@t1 Columns:key -Table:default@t1 +Table:default@t2 Columns:key 1 @@ -460,10 +460,10 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -Table:default@t2 +Table:default@t1 Columns:key,val -Table:default@t1 +Table:default@t2 Columns:key,val PREHOOK: query: -- Map join @@ -474,10 +474,10 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -Table:default@t2 +Table:default@t1 Columns:key,val -Table:default@t1 +Table:default@t2 Columns:key,val 1 11 1 1 @@ -556,10 +556,10 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -Table:default@t2 +Table:default@t1 Columns:key,val -Table:default@t1 +Table:default@t2 Columns:key,val PREHOOK: query: EXPLAIN @@ -653,10 +653,10 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -Table:default@t2 +Table:default@t1 Columns:key,val -Table:default@t1 +Table:default@t2 Columns:key,val PREHOOK: query: -- Join followed by join @@ -802,10 +802,10 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -Table:default@t2 +Table:default@t1 Columns:key -Table:default@t1 +Table:default@t2 Columns:key Table:default@t3 diff --git ql/src/test/results/clientpositive/combine2.q.out ql/src/test/results/clientpositive/combine2.q.out index 7c94b96..c29e940 100644 --- ql/src/test/results/clientpositive/combine2.q.out +++ ql/src/test/results/clientpositive/combine2.q.out @@ -188,11 +188,9 @@ STAGE PLANS: Statistics: Num rows: 12 Data size: 14 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 14 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -656,11 +654,11 @@ STAGE PLANS: alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: ds (type: string) + outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/correlationoptimizer1.q.out ql/src/test/results/clientpositive/correlationoptimizer1.q.out index 50662a9..7575b48 100644 --- ql/src/test/results/clientpositive/correlationoptimizer1.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer1.q.out @@ -70,11 +70,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -231,13 +231,13 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 @@ -379,11 +379,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -540,22 +540,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -706,33 +702,29 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int) + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -845,11 +837,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -1000,13 +992,13 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 @@ -1137,22 +1129,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1291,22 +1279,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1449,11 +1433,11 @@ STAGE PLANS: outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1583,11 +1567,11 @@ STAGE PLANS: outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1725,22 +1709,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1880,33 +1860,29 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int) + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2019,11 +1995,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -2173,11 +2149,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -2335,11 +2311,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -2489,11 +2465,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -2652,11 +2628,11 @@ STAGE PLANS: outputColumnNames: _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -2813,11 +2789,11 @@ STAGE PLANS: outputColumnNames: _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -2975,11 +2951,11 @@ STAGE PLANS: outputColumnNames: _col2 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col2 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -3135,11 +3111,11 @@ STAGE PLANS: outputColumnNames: _col2 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col2 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/correlationoptimizer10.q.out ql/src/test/results/clientpositive/correlationoptimizer10.q.out index 3551cab..c92891e 100644 --- ql/src/test/results/clientpositive/correlationoptimizer10.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer10.q.out @@ -81,22 +81,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -296,39 +292,35 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Mux Operator + Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE Join Operator diff --git ql/src/test/results/clientpositive/correlationoptimizer15.q.out ql/src/test/results/clientpositive/correlationoptimizer15.q.out index 89b9b9f..b57203e 100644 --- ql/src/test/results/clientpositive/correlationoptimizer15.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer15.q.out @@ -72,22 +72,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -339,39 +335,35 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Mux Operator + Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/correlationoptimizer2.q.out ql/src/test/results/clientpositive/correlationoptimizer2.q.out index 54bfb33..b74c04e 100644 --- ql/src/test/results/clientpositive/correlationoptimizer2.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer2.q.out @@ -1570,22 +1570,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1790,22 +1786,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator keys: KEY._col0 (type: string) mode: mergepartial @@ -1821,22 +1813,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git ql/src/test/results/clientpositive/correlationoptimizer3.q.out ql/src/test/results/clientpositive/correlationoptimizer3.q.out index bb505aa..bb8a3c5 100644 --- ql/src/test/results/clientpositive/correlationoptimizer3.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer3.q.out @@ -200,11 +200,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -398,13 +398,13 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 @@ -600,11 +600,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -916,11 +916,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -1114,13 +1114,13 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 @@ -1316,11 +1316,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/correlationoptimizer6.q.out ql/src/test/results/clientpositive/correlationoptimizer6.q.out index f325cb7..f0cf64c 100644 --- ql/src/test/results/clientpositive/correlationoptimizer6.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer6.q.out @@ -75,22 +75,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -196,22 +192,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-6 Map Reduce @@ -373,39 +365,35 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Select Operator + expressions: _col2 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Join Operator condition map: Inner Join 0 to 1 @@ -414,39 +402,35 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Select Operator + expressions: _col2 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -575,22 +559,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -609,22 +589,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -762,11 +738,11 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -905,11 +881,11 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -1095,22 +1071,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1299,39 +1271,35 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Mux Operator + Statistics: Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator Statistics: Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE Join Operator @@ -1462,22 +1430,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1709,39 +1673,35 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Mux Operator + Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1876,22 +1836,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2168,39 +2124,35 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col3 (type: string), _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Join Operator condition map: Inner Join 0 to 1 @@ -2365,22 +2317,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2627,41 +2575,37 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 1526 Data size: 16134 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + Mux Operator + Statistics: Num rows: 1526 Data size: 16134 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator Statistics: Num rows: 1526 Data size: 16134 Basic stats: COMPLETE Column stats: NONE Join Operator @@ -3390,22 +3334,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -3531,22 +3471,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Map Reduce @@ -3679,22 +3615,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -3792,39 +3724,35 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 163 Data size: 1653 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Mux Operator + Statistics: Num rows: 163 Data size: 1653 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -3935,22 +3863,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -4018,22 +3942,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/correlationoptimizer7.q.out ql/src/test/results/clientpositive/correlationoptimizer7.q.out index a738ebd..eca6f27 100644 --- ql/src/test/results/clientpositive/correlationoptimizer7.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer7.q.out @@ -70,22 +70,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -274,22 +270,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -465,22 +457,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -669,22 +657,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/correlationoptimizer8.q.out ql/src/test/results/clientpositive/correlationoptimizer8.q.out index 096da5e..83b0d61 100644 --- ql/src/test/results/clientpositive/correlationoptimizer8.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer8.q.out @@ -49,11 +49,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -146,11 +146,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -249,11 +249,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -271,11 +271,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -473,11 +473,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -567,11 +567,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -690,11 +690,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -712,11 +712,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -925,11 +925,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -1022,11 +1022,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1103,11 +1103,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -1201,11 +1201,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/count.q.out ql/src/test/results/clientpositive/count.q.out index b04a846..c6d1731 100644 --- ql/src/test/results/clientpositive/count.q.out +++ ql/src/test/results/clientpositive/count.q.out @@ -115,11 +115,11 @@ STAGE PLANS: alias: abcd Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), a (type: int), b (type: int), c (type: int), d (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0), count(), count(_col1), count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4) + aggregations: count(1), count(), count(_col1), count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4) keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 @@ -240,17 +240,16 @@ STAGE PLANS: alias: abcd Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), a (type: int), b (type: int), c (type: int), d (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: ++++ Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) + aggregations: count(1), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/create_genericudaf.q.out ql/src/test/results/clientpositive/create_genericudaf.q.out index 774e64f..2df7c55 100644 --- ql/src/test/results/clientpositive/create_genericudaf.q.out +++ ql/src/test/results/clientpositive/create_genericudaf.q.out @@ -40,11 +40,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 + expressions: substr(value, 5) (type: string) + outputColumnNames: _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: test_avg(_col0), test_avg(_col1) + aggregations: test_avg(1), test_avg(_col1) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE diff --git ql/src/test/results/clientpositive/decimal_udf.q.out ql/src/test/results/clientpositive/decimal_udf.q.out index f89b8ec..0c760b0 100644 --- ql/src/test/results/clientpositive/decimal_udf.q.out +++ ql/src/test/results/clientpositive/decimal_udf.q.out @@ -2043,11 +2043,11 @@ STAGE PLANS: alias: decimal_udf Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: decimal(20,10)), 3 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: decimal(20,10)) + outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: histogram_numeric(_col0, _col1) + aggregations: histogram_numeric(_col0, 3) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE diff --git ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out index 6687f0d..2b5f79b 100644 --- ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out +++ ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out @@ -1557,12 +1557,12 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 'day' (type: string), key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), value (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col2) - keys: _col0 (type: string), _col1 (type: string) + keys: 'day' (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1673,12 +1673,12 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 'day' (type: string), key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), value (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col2) - keys: _col0 (type: string), _col1 (type: string) + keys: 'day' (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/explain_logical.q.out ql/src/test/results/clientpositive/explain_logical.q.out index 41c8a2b..d28c685 100644 --- ql/src/test/results/clientpositive/explain_logical.q.out +++ ql/src/test/results/clientpositive/explain_logical.q.out @@ -105,11 +105,11 @@ $hdt$_0:$hdt$_0:srcpart alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_2) - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator (GBY_5) - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -189,11 +189,11 @@ $hdt$_0:$hdt$_0:src alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_1) - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator (GBY_4) - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/groupby4_map.q.out ql/src/test/results/clientpositive/groupby4_map.q.out index 56b7a29..a8a7fbe 100644 --- ql/src/test/results/clientpositive/groupby4_map.q.out +++ ql/src/test/results/clientpositive/groupby4_map.q.out @@ -25,11 +25,9 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/groupby4_map_skew.q.out ql/src/test/results/clientpositive/groupby4_map_skew.q.out index 84f47e5..3f38895 100644 --- ql/src/test/results/clientpositive/groupby4_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby4_map_skew.q.out @@ -25,11 +25,9 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/groupby_position.q.out ql/src/test/results/clientpositive/groupby_position.q.out index e18423f..7152101 100644 --- ql/src/test/results/clientpositive/groupby_position.q.out +++ ql/src/test/results/clientpositive/groupby_position.q.out @@ -432,11 +432,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) <= 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/groupby_sort_11.q.out ql/src/test/results/clientpositive/groupby_sort_11.q.out index 7becc5f..ec262e3 100644 --- ql/src/test/results/clientpositive/groupby_sort_11.q.out +++ ql/src/test/results/clientpositive/groupby_sort_11.q.out @@ -108,11 +108,11 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT _col0), count(_col1), count(_col0), sum(DISTINCT _col0) + aggregations: count(DISTINCT _col0), count(1), count(_col0), sum(DISTINCT _col0) bucketGroup: true keys: _col0 (type: string) mode: hash @@ -176,11 +176,11 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT _col0), count(_col1), count(_col0), sum(DISTINCT _col0) + aggregations: count(DISTINCT _col0), count(1), count(_col0), sum(DISTINCT _col0) bucketGroup: true keys: _col0 (type: string) mode: hash @@ -251,11 +251,11 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT _col0), count(_col1), count(_col0), sum(DISTINCT _col0) + aggregations: count(DISTINCT _col0), count(1), count(_col0), sum(DISTINCT _col0) bucketGroup: true keys: _col0 (type: string) mode: hash @@ -390,12 +390,10 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT _col0) - keys: _col0 (type: int) + aggregations: count(DISTINCT 1) + keys: 1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/groupby_sort_1_23.q.out ql/src/test/results/clientpositive/groupby_sort_1_23.q.out index cb94faa..dd450cb 100644 --- ql/src/test/results/clientpositive/groupby_sort_1_23.q.out +++ ql/src/test/results/clientpositive/groupby_sort_1_23.q.out @@ -98,11 +98,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 @@ -457,11 +457,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), val (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -688,11 +688,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 @@ -1096,11 +1096,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 @@ -1495,12 +1495,12 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: 1 (type: int), key (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) - keys: _col0 (type: int), _col1 (type: string) + aggregations: count(1) + keys: 1 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -1721,12 +1721,12 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int), val (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string) + aggregations: count(1) + keys: _col0 (type: string), 1 (type: int), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -1942,11 +1942,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), (UDFToDouble(key) + 1.0) (type: double), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -2199,11 +2199,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 @@ -2477,11 +2477,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 @@ -2528,11 +2528,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 @@ -2982,11 +2982,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: (UDFToDouble(key) + UDFToDouble(key)) (type: double), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: (UDFToDouble(key) + UDFToDouble(key)) (type: double) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -3086,11 +3086,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 @@ -4239,11 +4239,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) bucketGroup: true keys: _col0 (type: string) mode: hash @@ -4469,12 +4469,12 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int), val (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string) + aggregations: count(1) + keys: _col0 (type: string), 1 (type: int), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -4712,12 +4712,12 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int), val (type: string), 2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int) + aggregations: count(1) + keys: _col0 (type: string), 1 (type: int), _col2 (type: string), 2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -4959,12 +4959,12 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int), val (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col3) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string) + aggregations: count(1) + keys: _col0 (type: string), 1 (type: int), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -5245,12 +5245,13 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 2 (type: int), val (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col3) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string) + aggregations: count(1) + bucketGroup: true + keys: _col0 (type: string), 2 (type: int), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/groupby_sort_2.q.out ql/src/test/results/clientpositive/groupby_sort_2.q.out index 8aca740..b5e52f1 100644 --- ql/src/test/results/clientpositive/groupby_sort_2.q.out +++ ql/src/test/results/clientpositive/groupby_sort_2.q.out @@ -65,11 +65,11 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: val (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: val (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) bucketGroup: true keys: _col0 (type: string) mode: hash diff --git ql/src/test/results/clientpositive/groupby_sort_3.q.out ql/src/test/results/clientpositive/groupby_sort_3.q.out index 82db449..c16911a 100644 --- ql/src/test/results/clientpositive/groupby_sort_3.q.out +++ ql/src/test/results/clientpositive/groupby_sort_3.q.out @@ -68,11 +68,11 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), val (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 @@ -205,11 +205,11 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/groupby_sort_4.q.out ql/src/test/results/clientpositive/groupby_sort_4.q.out index 4407cb1..a6b1c3d 100644 --- ql/src/test/results/clientpositive/groupby_sort_4.q.out +++ ql/src/test/results/clientpositive/groupby_sort_4.q.out @@ -65,11 +65,11 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) bucketGroup: true keys: _col0 (type: string) mode: hash @@ -172,11 +172,11 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), val (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/groupby_sort_5.q.out ql/src/test/results/clientpositive/groupby_sort_5.q.out index 39dd720..369e2b5 100644 --- ql/src/test/results/clientpositive/groupby_sort_5.q.out +++ ql/src/test/results/clientpositive/groupby_sort_5.q.out @@ -72,11 +72,11 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), val (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 @@ -243,11 +243,11 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), val (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 @@ -419,11 +419,11 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) bucketGroup: true keys: _col0 (type: string) mode: hash diff --git ql/src/test/results/clientpositive/groupby_sort_6.q.out ql/src/test/results/clientpositive/groupby_sort_6.q.out index b62e3d1..4e5c96f 100644 --- ql/src/test/results/clientpositive/groupby_sort_6.q.out +++ ql/src/test/results/clientpositive/groupby_sort_6.q.out @@ -359,11 +359,11 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/groupby_sort_7.q.out ql/src/test/results/clientpositive/groupby_sort_7.q.out index 0f2b362..7264695 100644 --- ql/src/test/results/clientpositive/groupby_sort_7.q.out +++ ql/src/test/results/clientpositive/groupby_sort_7.q.out @@ -75,11 +75,11 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), val (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/groupby_sort_9.q.out ql/src/test/results/clientpositive/groupby_sort_9.q.out index db5a291..95a29a8 100644 --- ql/src/test/results/clientpositive/groupby_sort_9.q.out +++ ql/src/test/results/clientpositive/groupby_sort_9.q.out @@ -65,11 +65,11 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) bucketGroup: true keys: _col0 (type: string) mode: hash diff --git ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out index bbc0de4..2f08999 100644 --- ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out +++ ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out @@ -98,11 +98,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 @@ -458,11 +458,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), val (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -753,11 +753,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 @@ -1161,11 +1161,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 @@ -1561,12 +1561,12 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: 1 (type: int), key (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) - keys: _col0 (type: int), _col1 (type: string) + aggregations: count(1) + keys: 1 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -1852,12 +1852,12 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int), val (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string) + aggregations: count(1) + keys: _col0 (type: string), 1 (type: int), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2138,11 +2138,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), (UDFToDouble(key) + 1.0) (type: double), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -2460,11 +2460,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 @@ -2802,11 +2802,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 @@ -2853,11 +2853,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 @@ -3308,11 +3308,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: (UDFToDouble(key) + UDFToDouble(key)) (type: double), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: (UDFToDouble(key) + UDFToDouble(key)) (type: double) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -3476,11 +3476,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 @@ -4695,11 +4695,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) bucketGroup: true keys: _col0 (type: string) mode: hash @@ -4990,12 +4990,12 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int), val (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string) + aggregations: count(1) + keys: _col0 (type: string), 1 (type: int), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -5298,12 +5298,12 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int), val (type: string), 2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int) + aggregations: count(1) + keys: _col0 (type: string), 1 (type: int), _col2 (type: string), 2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -5610,12 +5610,12 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 1 (type: int), val (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col3) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string) + aggregations: count(1) + keys: _col0 (type: string), 1 (type: int), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -5961,12 +5961,13 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), 2 (type: int), val (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col3) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string) + aggregations: count(1) + bucketGroup: true + keys: _col0 (type: string), 2 (type: int), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/groupby_sort_test_1.q.out ql/src/test/results/clientpositive/groupby_sort_test_1.q.out index f9604d6..8c1765d 100644 --- ql/src/test/results/clientpositive/groupby_sort_test_1.q.out +++ ql/src/test/results/clientpositive/groupby_sort_test_1.q.out @@ -61,11 +61,11 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/input24.q.out ql/src/test/results/clientpositive/input24.q.out index a579ba6..75689b3 100644 --- ql/src/test/results/clientpositive/input24.q.out +++ ql/src/test/results/clientpositive/input24.q.out @@ -31,11 +31,9 @@ STAGE PLANS: alias: x Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/input30.q.out ql/src/test/results/clientpositive/input30.q.out index f74dd85..c4848a1 100644 --- ql/src/test/results/clientpositive/input30.q.out +++ ql/src/test/results/clientpositive/input30.q.out @@ -37,19 +37,15 @@ STAGE PLANS: Filter Operator predicate: (((hash(rand(460476415)) & 2147483647) % 32) = 0) (type: boolean) Statistics: Num rows: 250 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: 1 (type: int) + Group By Operator + aggregations: count(1) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(_col0) - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/input31.q.out ql/src/test/results/clientpositive/input31.q.out index 28015bb..264ebe5 100644 --- ql/src/test/results/clientpositive/input31.q.out +++ ql/src/test/results/clientpositive/input31.q.out @@ -38,11 +38,9 @@ STAGE PLANS: predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/input32.q.out ql/src/test/results/clientpositive/input32.q.out index 2e62356..c8fdfd4 100644 --- ql/src/test/results/clientpositive/input32.q.out +++ ql/src/test/results/clientpositive/input32.q.out @@ -35,11 +35,9 @@ STAGE PLANS: alias: srcbucket Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/join29.q.out ql/src/test/results/clientpositive/join29.q.out index 33b8d4b..6d650e3 100644 --- ql/src/test/results/clientpositive/join29.q.out +++ ql/src/test/results/clientpositive/join29.q.out @@ -49,11 +49,11 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -219,11 +219,11 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/join31.q.out ql/src/test/results/clientpositive/join31.q.out index 795d801..9248cd9 100644 --- ql/src/test/results/clientpositive/join31.q.out +++ ql/src/test/results/clientpositive/join31.q.out @@ -107,10 +107,10 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -192,10 +192,10 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -233,11 +233,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/join35.q.out ql/src/test/results/clientpositive/join35.q.out index 579920b..76de7bf 100644 --- ql/src/test/results/clientpositive/join35.q.out +++ ql/src/test/results/clientpositive/join35.q.out @@ -167,11 +167,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -504,11 +504,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/join40.q.out ql/src/test/results/clientpositive/join40.q.out index 86be07b..1984940 100644 --- ql/src/test/results/clientpositive/join40.q.out +++ ql/src/test/results/clientpositive/join40.q.out @@ -3780,21 +3780,17 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 1 (type: int) + Group By Operator + aggregations: count(1) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col0) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git ql/src/test/results/clientpositive/lateral_view_noalias.q.out ql/src/test/results/clientpositive/lateral_view_noalias.q.out index c73697a..5eeee01 100644 --- ql/src/test/results/clientpositive/lateral_view_noalias.q.out +++ ql/src/test/results/clientpositive/lateral_view_noalias.q.out @@ -5,20 +5,38 @@ POSTHOOK: query: --HIVE-2608 Do not require AS a,b,c part in LATERAL VIEW EXPLAIN SELECT myTab.* from src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab limit 2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Lateral View Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Lateral View Join Operator + outputColumnNames: _col5, _col6 + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: string), _col6 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + ListSink + Select Operator + expressions: map('key1':100,'key2':200) (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode Lateral View Join Operator outputColumnNames: _col5, _col6 Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE @@ -29,43 +47,7 @@ STAGE PLANS: Limit Number of rows: 2 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: map('key1':100,'key2':200) (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Lateral View Join Operator - outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: string), _col6 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 2 - Processor Tree: - ListSink + ListSink PREHOOK: query: SELECT myTab.* from src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab limit 2 PREHOOK: type: QUERY @@ -82,39 +64,27 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT explode(map('key1', 100, 'key2', 200)) from src limit 2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: map('key1':100,'key2':200) (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 2 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: map('key1':100,'key2':200) (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: SELECT explode(map('key1', 100, 'key2', 200)) from src limit 2 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/limit_pushdown.q.out ql/src/test/results/clientpositive/limit_pushdown.q.out index 9df8c2d..d48ac15 100644 --- ql/src/test/results/clientpositive/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/limit_pushdown.q.out @@ -778,11 +778,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -886,11 +886,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.7.out ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.7.out new file mode 100644 index 0000000..cfaadd8 --- /dev/null +++ ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.7.out @@ -0,0 +1,391 @@ +PREHOOK: query: -- run this test case in minimr to ensure it works in cluster +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: -- run this test case in minimr to ensure it works in cluster +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_static_part +PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + list_bucketing_static_part + TOK_PARTSPEC + TOK_PARTVAL + ds + '2008-04-08' + TOK_PARTVAL + hr + '11' + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_static_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_static_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_static_part +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_static_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_static_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 4 + numRows 0 + rawDataSize 0 + totalSize 5520 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key] +Skewed Values: [[484], [51], [103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484, [103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [51]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=51} +Storage Desc Params: + serialization.format 1 diff --git ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.8.out ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.8.out new file mode 100644 index 0000000..f872301 --- /dev/null +++ ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.8.out @@ -0,0 +1,391 @@ +PREHOOK: query: -- run this test case in minimr to ensure it works in cluster +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: -- run this test case in minimr to ensure it works in cluster +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_static_part +PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + list_bucketing_static_part + TOK_PARTSPEC + TOK_PARTVAL + ds + '2008-04-08' + TOK_PARTVAL + hr + '11' + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_static_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_static_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_static_part +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_static_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_static_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 4 + numRows 0 + rawDataSize 0 + totalSize 5520 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key] +Skewed Values: [[484], [51], [103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [51]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=51, [484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484} +Storage Desc Params: + serialization.format 1 diff --git ql/src/test/results/clientpositive/list_bucket_dml_10.q.out ql/src/test/results/clientpositive/list_bucket_dml_10.q.out index 92c4d44..e69de29 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_10.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_10.q.out @@ -1,389 +0,0 @@ -PREHOOK: query: -- run this test case in minimr to ensure it works in cluster - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','51','103') - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- run this test case in minimr to ensure it works in cluster - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','51','103') - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - src - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_static_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - '11' - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Protect Mode: None -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 4 - numRows 0 - rawDataSize 0 - totalSize 5520 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key] -Skewed Values: [[484], [51], [103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484, [103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [51]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=51} -Storage Desc Params: - serialization.format 1 diff --git ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.8.out ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.8.out index 68856e0..677cc7d 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.8.out +++ ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.8.out @@ -270,7 +270,7 @@ Stored As SubDirectories: Yes Skewed Columns: [col2, col4] Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] #### A masked pattern was here #### -Skewed Value to Truncated Path: {[466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466, [82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=82/col4=val_82, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=287/col4=val_287} +Skewed Value to Truncated Path: {[466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=287/col4=val_287, [82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=82/col4=val_82} Storage Desc Params: serialization.format 1 PREHOOK: query: explain extended diff --git ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out index cfdfb0b..504af16 100644 --- ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out @@ -563,11 +563,11 @@ STAGE PLANS: predicate: (x = 484) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: y (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: y (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -759,11 +759,11 @@ STAGE PLANS: predicate: (x = 484) (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 484 (type: int), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: 484 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/merge1.q.out ql/src/test/results/clientpositive/merge1.q.out index 9c0353b..94089fc 100644 --- ql/src/test/results/clientpositive/merge1.q.out +++ ql/src/test/results/clientpositive/merge1.q.out @@ -36,11 +36,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/merge2.q.out ql/src/test/results/clientpositive/merge2.q.out index ae1fbd8..a3a0e8e 100644 --- ql/src/test/results/clientpositive/merge2.q.out +++ ql/src/test/results/clientpositive/merge2.q.out @@ -36,11 +36,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/metadata_only_queries.q.out ql/src/test/results/clientpositive/metadata_only_queries.q.out index c04c98e..e5f7a6a 100644 --- ql/src/test/results/clientpositive/metadata_only_queries.q.out +++ ql/src/test/results/clientpositive/metadata_only_queries.q.out @@ -194,11 +194,11 @@ STAGE PLANS: alias: stats_tbl Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), 0.2 (type: double), s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(), sum(_col0), sum(_col1), count(_col0), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE @@ -248,11 +248,11 @@ STAGE PLANS: alias: stats_tbl_part Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), 0.2 (type: double), s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(), sum(_col0), sum(_col1), count(_col0), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/multiMapJoin2.q.out ql/src/test/results/clientpositive/multiMapJoin2.q.out index eb3bc9c..5deac50 100644 --- ql/src/test/results/clientpositive/multiMapJoin2.q.out +++ ql/src/test/results/clientpositive/multiMapJoin2.q.out @@ -1098,8 +1098,8 @@ RUN: Stage-18:MAPREDLOCAL RUN: Stage-2:MAPRED RUN: Stage-8:MAPRED RUN: Stage-12:CONDITIONAL -RUN: Stage-16:MAPREDLOCAL -RUN: Stage-11:MAPRED +RUN: Stage-15:MAPREDLOCAL +RUN: Stage-10:MAPRED RUN: Stage-4:MAPRED RUN: Stage-5:MAPRED 128 1 diff --git ql/src/test/results/clientpositive/nonmr_fetch.q.out ql/src/test/results/clientpositive/nonmr_fetch.q.out index 2df90c0..7a8ee05 100644 --- ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -86,11 +86,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 278 val_278 2008-04-08 11 98 val_98 2008-04-08 11 484 val_484 2008-04-08 11 -PREHOOK: query: -- negative, select expression -explain select key from src limit 10 +PREHOOK: query: explain select key from src limit 10 PREHOOK: type: QUERY -POSTHOOK: query: -- negative, select expression -explain select key from src limit 10 +POSTHOOK: query: explain select key from src limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage @@ -775,6 +773,150 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 86 val_86 2008-04-09 11 12 238 val_238 2008-04-09 12 0 86 val_86 2008-04-09 12 12 +PREHOOK: query: -- sub query +explain +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- sub query +explain +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(key) > 200.0) and (UDFToDouble(key) < 250.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_238 238 +val_224 224 +val_213 213 +val_209 209 +val_219 219 +val_237 237 +val_207 207 +val_208 208 +val_247 247 +val_203 203 +val_205 205 +val_221 221 +val_208 208 +val_239 239 +val_213 213 +val_216 216 +val_221 221 +val_241 241 +val_230 230 +val_217 217 +PREHOOK: query: -- lateral view +explain +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- lateral view +explain +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col7 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + ListSink + Select Operator + expressions: array(key,value) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col7 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +238 238 +238 val_238 +86 86 +86 val_86 +311 311 +311 val_311 +27 27 +27 val_27 +165 165 +165 val_165 +409 409 +409 val_409 +255 255 +255 val_255 +278 278 +278 val_278 +98 98 +98 val_98 +484 484 +484 val_484 PREHOOK: query: -- non deterministic func explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 PREHOOK: type: QUERY @@ -1017,29 +1159,6 @@ STAGE PLANS: Stage: Stage-1 Stats-Aggr Operator -PREHOOK: query: -- negative, subq -explain select a.* from (select * from src) a -PREHOOK: type: QUERY -POSTHOOK: query: -- negative, subq -explain select a.* from (select * from src) a -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - ListSink - PREHOOK: query: -- negative, join explain select * from src join src src2 on src.key=src2.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out index cb0d332..d7bd42b 100644 --- ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out @@ -46,6 +46,54 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE ListSink +PREHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + ListSink + PREHOOK: query: -- from HIVE-7397, limit + partition pruning filter explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/notable_alias1.q.out ql/src/test/results/clientpositive/notable_alias1.q.out index 383fab5..200b31c 100644 --- ql/src/test/results/clientpositive/notable_alias1.q.out +++ ql/src/test/results/clientpositive/notable_alias1.q.out @@ -30,11 +30,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) < 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/notable_alias2.q.out ql/src/test/results/clientpositive/notable_alias2.q.out index 96177c9..4df8073 100644 --- ql/src/test/results/clientpositive/notable_alias2.q.out +++ ql/src/test/results/clientpositive/notable_alias2.q.out @@ -30,11 +30,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) < 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/nullgroup.q.out ql/src/test/results/clientpositive/nullgroup.q.out index 25a0275..2cdec52 100644 --- ql/src/test/results/clientpositive/nullgroup.q.out +++ ql/src/test/results/clientpositive/nullgroup.q.out @@ -19,11 +19,9 @@ STAGE PLANS: predicate: (UDFToDouble(key) > 9999.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -85,11 +83,9 @@ STAGE PLANS: predicate: (UDFToDouble(key) > 9999.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -152,17 +148,14 @@ STAGE PLANS: predicate: (UDFToDouble(key) > 9999.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(1) mode: partial1 outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -235,16 +228,13 @@ STAGE PLANS: predicate: (UDFToDouble(key) > 9999.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(1) mode: complete outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/nullgroup2.q.out ql/src/test/results/clientpositive/nullgroup2.q.out index c4df10e..fc5ebf1 100644 --- ql/src/test/results/clientpositive/nullgroup2.q.out +++ ql/src/test/results/clientpositive/nullgroup2.q.out @@ -20,11 +20,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) > 9999.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -113,11 +113,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) > 9999.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -183,18 +183,17 @@ STAGE PLANS: predicate: (UDFToDouble(key) > 9999.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(1) keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0, _col1 @@ -270,18 +269,17 @@ STAGE PLANS: predicate: (UDFToDouble(key) > 9999.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(1) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/nullgroup3.q.out ql/src/test/results/clientpositive/nullgroup3.q.out index 5b96d62..54657a8 100644 --- ql/src/test/results/clientpositive/nullgroup3.q.out +++ ql/src/test/results/clientpositive/nullgroup3.q.out @@ -42,11 +42,9 @@ STAGE PLANS: alias: tstparttbl Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -135,11 +133,9 @@ STAGE PLANS: alias: tstparttbl2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -236,11 +232,9 @@ STAGE PLANS: alias: tstparttbl Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -337,11 +331,9 @@ STAGE PLANS: alias: tstparttbl2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/nullgroup4.q.out ql/src/test/results/clientpositive/nullgroup4.q.out index b2ba216..95c05dd 100644 --- ql/src/test/results/clientpositive/nullgroup4.q.out +++ ql/src/test/results/clientpositive/nullgroup4.q.out @@ -20,11 +20,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) = 9999.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0), count(DISTINCT _col1) + aggregations: count(1), count(DISTINCT _col1) keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -110,11 +110,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) = 9999.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0), count(DISTINCT _col1) + aggregations: count(1), count(DISTINCT _col1) keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -179,18 +179,17 @@ STAGE PLANS: predicate: (UDFToDouble(key) = 9999.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), count(DISTINCT KEY._col0:0._col0) + aggregations: count(1), count(DISTINCT KEY._col0:0._col0) mode: partial1 outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE @@ -263,17 +262,16 @@ STAGE PLANS: predicate: (UDFToDouble(key) = 9999.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), count(DISTINCT KEY._col0:0._col0) + aggregations: count(1), count(DISTINCT KEY._col0:0._col0) mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out index 89f7340..d0f861e 100644 --- ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out +++ ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out @@ -19,11 +19,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) = 9999.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), value (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: value (type: string), substr(value, 5) (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0), count(DISTINCT _col1), count(DISTINCT _col2) + aggregations: count(1), count(DISTINCT _col1), count(DISTINCT _col2) keys: _col1 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -87,17 +87,16 @@ STAGE PLANS: predicate: (UDFToDouble(key) = 9999.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), value (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: value (type: string), substr(value, 5) (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string), _col2 (type: string) sort order: ++ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0) + aggregations: count(1), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0) mode: complete outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/outer_join_ppr.q.java1.7.out ql/src/test/results/clientpositive/outer_join_ppr.q.java1.7.out new file mode 100644 index 0000000..25e549a --- /dev/null +++ ql/src/test/results/clientpositive/outer_join_ppr.q.java1.7.out @@ -0,0 +1,855 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT + +EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT + +EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_FULLOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string) + auto parallelism: false + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /src [$hdt$_0:$hdt$_1:a] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:b] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:b] + /srcpart/ds=2008-04-09/hr=11 [$hdt$_0:$hdt$_0:b] + /srcpart/ds=2008-04-09/hr=12 [$hdt$_0:$hdt$_0:b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + filter mappings: + 0 [1, 1] + filter predicates: + 0 {(VALUE._col1 = '2008-04-08')} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((((UDFToDouble(_col0) > 15.0) and (UDFToDouble(_col0) < 25.0)) and (UDFToDouble(_col3) > 10.0)) and (UDFToDouble(_col3) < 20.0)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 +PREHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_FULLOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /src [$hdt$_0:$hdt$_1:a] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:b] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 diff --git ql/src/test/results/clientpositive/outer_join_ppr.q.java1.8.out ql/src/test/results/clientpositive/outer_join_ppr.q.java1.8.out new file mode 100644 index 0000000..f4651a3 --- /dev/null +++ ql/src/test/results/clientpositive/outer_join_ppr.q.java1.8.out @@ -0,0 +1,855 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT + +EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT + +EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_FULLOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string) + auto parallelism: false + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /src [$hdt$_0:$hdt$_1:a] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:b] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:b] + /srcpart/ds=2008-04-09/hr=11 [$hdt$_0:$hdt$_0:b] + /srcpart/ds=2008-04-09/hr=12 [$hdt$_0:$hdt$_0:b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + filter mappings: + 0 [1, 1] + filter predicates: + 0 {(VALUE._col1 = '2008-04-08')} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) and (UDFToDouble(_col0) > 15.0)) and (UDFToDouble(_col0) < 25.0)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 +PREHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_FULLOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /src [$hdt$_0:$hdt$_1:a] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:b] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 diff --git ql/src/test/results/clientpositive/outer_join_ppr.q.out ql/src/test/results/clientpositive/outer_join_ppr.q.out index 58369ea..e69de29 100644 --- ql/src/test/results/clientpositive/outer_join_ppr.q.out +++ ql/src/test/results/clientpositive/outer_join_ppr.q.out @@ -1,853 +0,0 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS - -EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -PREHOOK: type: QUERY -POSTHOOK: query: -- SORT_QUERY_RESULTS - -EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_FULLOUTERJOIN - TOK_TABREF - TOK_TABNAME - src - a - TOK_TABREF - TOK_TABNAME - srcpart - b - AND - = - . - TOK_TABLE_OR_COL - a - key - . - TOK_TABLE_OR_COL - b - key - = - . - TOK_TABLE_OR_COL - b - ds - '2008-04-08' - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - value - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - value - TOK_WHERE - AND - AND - AND - > - . - TOK_TABLE_OR_COL - a - key - 10 - < - . - TOK_TABLE_OR_COL - a - key - 20 - > - . - TOK_TABLE_OR_COL - b - key - 15 - < - . - TOK_TABLE_OR_COL - b - key - 25 - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string) - auto parallelism: false - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /src [$hdt$_0:$hdt$_1:a] - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_0:$hdt$_0:b] - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - filter mappings: - 0 [1, 1] - filter predicates: - 0 {(VALUE._col1 = '2008-04-08')} - 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((((UDFToDouble(_col0) > 15.0) and (UDFToDouble(_col0) < 25.0)) and (UDFToDouble(_col3) > 10.0)) and (UDFToDouble(_col3) < 20.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -17 val_17 17 val_17 -17 val_17 17 val_17 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -19 val_19 19 val_19 -19 val_19 19 val_19 -PREHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_FULLOUTERJOIN - TOK_TABREF - TOK_TABNAME - src - a - TOK_TABREF - TOK_TABNAME - srcpart - b - = - . - TOK_TABLE_OR_COL - a - key - . - TOK_TABLE_OR_COL - b - key - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - value - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - value - TOK_WHERE - AND - AND - AND - AND - > - . - TOK_TABLE_OR_COL - a - key - 10 - < - . - TOK_TABLE_OR_COL - a - key - 20 - > - . - TOK_TABLE_OR_COL - b - key - 15 - < - . - TOK_TABLE_OR_COL - b - key - 25 - = - . - TOK_TABLE_OR_COL - b - ds - '2008-04-08' - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string) - auto parallelism: false - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /src [$hdt$_0:$hdt$_1:a] - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:b] - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -17 val_17 17 val_17 -17 val_17 17 val_17 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -19 val_19 19 val_19 -19 val_19 19 val_19 diff --git ql/src/test/results/clientpositive/parquet_map_null.q.java1.7.out ql/src/test/results/clientpositive/parquet_map_null.q.java1.7.out new file mode 100644 index 0000000..5b4e7b6 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_map_null.q.java1.7.out @@ -0,0 +1,69 @@ +PREHOOK: query: -- This test attempts to write a parquet table from an avro table that contains map null values +-- JAVA_VERSION_SPECIFIC_OUTPUT + +DROP TABLE IF EXISTS avro_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- This test attempts to write a parquet table from an avro table that contains map null values +-- JAVA_VERSION_SPECIFIC_OUTPUT + +DROP TABLE IF EXISTS avro_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS parquet_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS parquet_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_table +POSTHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_table +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@avro_table +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@avro_table +PREHOOK: query: CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@avro_table +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_table +POSTHOOK: query: CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@avro_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_table +PREHOOK: query: SELECT * FROM parquet_table +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_table +#### A masked pattern was here #### +{"key4":null,"key3":"val3"} +{"key4":null,"key3":"val3"} +{"key2":"val2","key1":null} +{"key4":null,"key3":"val3"} +{"key4":null,"key3":"val3"} +PREHOOK: query: DROP TABLE avro_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@avro_table +PREHOOK: Output: default@avro_table +POSTHOOK: query: DROP TABLE avro_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@avro_table +POSTHOOK: Output: default@avro_table +PREHOOK: query: DROP TABLE parquet_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_table +PREHOOK: Output: default@parquet_table +POSTHOOK: query: DROP TABLE parquet_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_table +POSTHOOK: Output: default@parquet_table diff --git ql/src/test/results/clientpositive/parquet_map_null.q.java1.8.out ql/src/test/results/clientpositive/parquet_map_null.q.java1.8.out new file mode 100644 index 0000000..dd541a5 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_map_null.q.java1.8.out @@ -0,0 +1,69 @@ +PREHOOK: query: -- This test attempts to write a parquet table from an avro table that contains map null values +-- JAVA_VERSION_SPECIFIC_OUTPUT + +DROP TABLE IF EXISTS avro_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- This test attempts to write a parquet table from an avro table that contains map null values +-- JAVA_VERSION_SPECIFIC_OUTPUT + +DROP TABLE IF EXISTS avro_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS parquet_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS parquet_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_table +POSTHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_table +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@avro_table +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@avro_table +PREHOOK: query: CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@avro_table +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_table +POSTHOOK: query: CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@avro_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_table +PREHOOK: query: SELECT * FROM parquet_table +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_table +#### A masked pattern was here #### +{"key3":"val3","key4":null} +{"key3":"val3","key4":null} +{"key1":null,"key2":"val2"} +{"key3":"val3","key4":null} +{"key3":"val3","key4":null} +PREHOOK: query: DROP TABLE avro_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@avro_table +PREHOOK: Output: default@avro_table +POSTHOOK: query: DROP TABLE avro_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@avro_table +POSTHOOK: Output: default@avro_table +PREHOOK: query: DROP TABLE parquet_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_table +PREHOOK: Output: default@parquet_table +POSTHOOK: query: DROP TABLE parquet_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_table +POSTHOOK: Output: default@parquet_table diff --git ql/src/test/results/clientpositive/parquet_map_null.q.out ql/src/test/results/clientpositive/parquet_map_null.q.out index 5772ef2..e69de29 100644 --- ql/src/test/results/clientpositive/parquet_map_null.q.out +++ ql/src/test/results/clientpositive/parquet_map_null.q.out @@ -1,67 +0,0 @@ -PREHOOK: query: -- This test attempts to write a parquet table from an avro table that contains map null values - -DROP TABLE IF EXISTS avro_table -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- This test attempts to write a parquet table from an avro table that contains map null values - -DROP TABLE IF EXISTS avro_table -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE IF EXISTS parquet_table -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS parquet_table -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_table -POSTHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_table -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@avro_table -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@avro_table -PREHOOK: query: CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@avro_table -PREHOOK: Output: database:default -PREHOOK: Output: default@parquet_table -POSTHOOK: query: CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@avro_table -POSTHOOK: Output: database:default -POSTHOOK: Output: default@parquet_table -PREHOOK: query: SELECT * FROM parquet_table -PREHOOK: type: QUERY -PREHOOK: Input: default@parquet_table -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM parquet_table -POSTHOOK: type: QUERY -POSTHOOK: Input: default@parquet_table -#### A masked pattern was here #### -{"key4":null,"key3":"val3"} -{"key4":null,"key3":"val3"} -{"key2":"val2","key1":null} -{"key4":null,"key3":"val3"} -{"key4":null,"key3":"val3"} -PREHOOK: query: DROP TABLE avro_table -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@avro_table -PREHOOK: Output: default@avro_table -POSTHOOK: query: DROP TABLE avro_table -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@avro_table -POSTHOOK: Output: default@avro_table -PREHOOK: query: DROP TABLE parquet_table -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@parquet_table -PREHOOK: Output: default@parquet_table -POSTHOOK: query: DROP TABLE parquet_table -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@parquet_table -POSTHOOK: Output: default@parquet_table diff --git ql/src/test/results/clientpositive/partition_boolexpr.q.out ql/src/test/results/clientpositive/partition_boolexpr.q.out index 68c8baf..7d414ff 100644 --- ql/src/test/results/clientpositive/partition_boolexpr.q.out +++ ql/src/test/results/clientpositive/partition_boolexpr.q.out @@ -43,11 +43,9 @@ STAGE PLANS: alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -130,11 +128,9 @@ STAGE PLANS: alias: srcpart Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -182,11 +178,9 @@ STAGE PLANS: alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -234,11 +228,9 @@ STAGE PLANS: alias: srcpart Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/plan_json.q.java1.7.out ql/src/test/results/clientpositive/plan_json.q.java1.7.out index f0f0cae..285bb7b 100644 --- ql/src/test/results/clientpositive/plan_json.q.java1.7.out +++ ql/src/test/results/clientpositive/plan_json.q.java1.7.out @@ -10,4 +10,4 @@ POSTHOOK: query: -- explain plan json: the query gets the formatted json output EXPLAIN FORMATTED SELECT count(1) FROM src POSTHOOK: type: QUERY -{"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Reduce Operator Tree:":{"Group By Operator":{"mode:":"mergepartial","aggregations:":["count(VALUE._col0)"],"outputColumnNames:":["_col0"],"children":{"Select Operator":{"expressions:":"_col0 (type: bigint)","outputColumnNames:":["_col0"],"children":{"File Output Operator":{"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE"}},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE"}},"Map Operator Tree:":[{"TableScan":{"alias:":"src","children":{"Select Operator":{"expressions:":"1 (type: int)","outputColumnNames:":["_col0"],"children":{"Group By Operator":{"mode:":"hash","aggregations:":["count(_col0)"],"outputColumnNames:":["_col0"],"children":{"Reduce Output Operator":{"sort order:":"","value expressions:":"_col0 (type: bigint)","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE"}},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE"}},"Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE"}},"Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE"}}]}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}}} +{"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Reduce Operator Tree:":{"Group By Operator":{"mode:":"mergepartial","aggregations:":["count(VALUE._col0)"],"outputColumnNames:":["_col0"],"children":{"Select Operator":{"expressions:":"_col0 (type: bigint)","outputColumnNames:":["_col0"],"children":{"File Output Operator":{"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE"}},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE"}},"Map Operator Tree:":[{"TableScan":{"alias:":"src","children":{"Select Operator":{"children":{"Group By Operator":{"mode:":"hash","aggregations:":["count(1)"],"outputColumnNames:":["_col0"],"children":{"Reduce Output Operator":{"sort order:":"","value expressions:":"_col0 (type: bigint)","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE"}},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE"}},"Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE"}},"Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE"}}]}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}}} diff --git ql/src/test/results/clientpositive/ppd_gby_join.q.out ql/src/test/results/clientpositive/ppd_gby_join.q.out index c6a0ebe..dc4094e 100644 --- ql/src/test/results/clientpositive/ppd_gby_join.q.out +++ ql/src/test/results/clientpositive/ppd_gby_join.q.out @@ -86,11 +86,11 @@ STAGE PLANS: predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -360,11 +360,11 @@ STAGE PLANS: predicate: ((_col0 > '50') or (_col1 < '50')) (type: boolean) Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_1.q.out ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_1.q.out index 5b94217..d46b0a9 100644 --- ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_1.q.out +++ ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_1.q.out @@ -1142,11 +1142,9 @@ STAGE PLANS: alias: tbl Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_2.q.out ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_2.q.out index bb5d3d7..3810109 100644 --- ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_2.q.out +++ ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_2.q.out @@ -118,11 +118,9 @@ STAGE PLANS: alias: lineitem_ix Statistics: Num rows: 0 Data size: 12099 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 12099 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -514,11 +512,11 @@ STAGE PLANS: alias: lineitem_ix Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: l_orderkey (type: int) + outputColumnNames: _col0 Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -986,11 +984,11 @@ STAGE PLANS: predicate: (l_orderkey < 7) (type: boolean) Statistics: Num rows: 1008 Data size: 4033 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: l_orderkey (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1008 Data size: 4033 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -1658,11 +1656,11 @@ STAGE PLANS: alias: lineitem_ix Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (l_orderkey + 1) (type: int), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: (l_orderkey + 1) (type: int) + outputColumnNames: _col0 Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/select_dummy_source.q.out ql/src/test/results/clientpositive/select_dummy_source.q.out index 08311f0..b1f0939 100644 --- ql/src/test/results/clientpositive/select_dummy_source.q.out +++ ql/src/test/results/clientpositive/select_dummy_source.q.out @@ -190,37 +190,25 @@ POSTHOOK: query: explain select explode(array('a', 'b')) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: array('a','b') (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: array('a','b') (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + function name: explode + ListSink PREHOOK: query: select explode(array('a', 'b')) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/stats1.q.out ql/src/test/results/clientpositive/stats1.q.out index f55f4f2..61407e4 100644 --- ql/src/test/results/clientpositive/stats1.q.out +++ ql/src/test/results/clientpositive/stats1.q.out @@ -34,11 +34,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/stats_list_bucket.q.java1.8.out ql/src/test/results/clientpositive/stats_list_bucket.q.java1.8.out index 80a0f9c..1b26365 100644 --- ql/src/test/results/clientpositive/stats_list_bucket.q.java1.8.out +++ ql/src/test/results/clientpositive/stats_list_bucket.q.java1.8.out @@ -175,7 +175,7 @@ Stored As SubDirectories: Yes Skewed Columns: [c1, c2] Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] #### A masked pattern was here #### -Skewed Value to Truncated Path: {[466, val_466]=/stats_list_bucket_1/c1=466/c2=val_466, [287, val_287]=/stats_list_bucket_1/c1=287/c2=val_287, [82, val_82]=/stats_list_bucket_1/c1=82/c2=val_82} +Skewed Value to Truncated Path: {[466, val_466]=/stats_list_bucket_1/c1=466/c2=val_466, [82, val_82]=/stats_list_bucket_1/c1=82/c2=val_82, [287, val_287]=/stats_list_bucket_1/c1=287/c2=val_287} Storage Desc Params: serialization.format 1 PREHOOK: query: drop table stats_list_bucket diff --git ql/src/test/results/clientpositive/subq2.q.out ql/src/test/results/clientpositive/subq2.q.out index 80ad37b..0f6e239 100644 --- ql/src/test/results/clientpositive/subq2.q.out +++ ql/src/test/results/clientpositive/subq2.q.out @@ -23,11 +23,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) >= 90.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.7.out ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.7.out new file mode 100644 index 0000000..01fc9c6 --- /dev/null +++ ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.7.out @@ -0,0 +1,1001 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT + +CREATE TABLE src_4( + key STRING, + value STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_4 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT + +CREATE TABLE src_4( + key STRING, + value STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_4 +RUN: Stage-0:DDL +PREHOOK: query: CREATE TABLE src_5( + key STRING, + value STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_5 +POSTHOOK: query: CREATE TABLE src_5( + key STRING, + value STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_5 +RUN: Stage-0:DDL +Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-10 is a root stage + Stage-2 depends on stages: Stage-10 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '2') and key is null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is null (type: boolean) + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + + Stage: Stage-7 + Stats-Aggr Operator + +Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_4 +PREHOOK: Output: default@src_5 +POSTHOOK: query: from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_4 +POSTHOOK: Output: default@src_5 +POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +RUN: Stage-10:MAPRED +RUN: Stage-2:MAPRED +RUN: Stage-3:MAPRED +RUN: Stage-6:MAPRED +RUN: Stage-4:MAPRED +RUN: Stage-0:MOVE +RUN: Stage-1:MOVE +RUN: Stage-7:STATS +RUN: Stage-5:STATS +PREHOOK: query: select * from src_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_4 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_4 +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: select * from src_5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_5 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_5 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +Warning: Map Join MAPJOIN[107][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-10 is a root stage + Stage-14 depends on stages: Stage-10 , consists of Stage-17, Stage-2 + Stage-17 has a backup stage: Stage-2 + Stage-13 depends on stages: Stage-17 + Stage-15 depends on stages: Stage-2, Stage-13 + Stage-12 depends on stages: Stage-15 + Stage-0 depends on stages: Stage-12 + Stage-7 depends on stages: Stage-0 + Stage-16 depends on stages: Stage-2, Stage-13 + Stage-4 depends on stages: Stage-16 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + Stage-2 + +STAGE PLANS: + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '2') and key is null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-14 + Conditional Operator + + Stage: Stage-17 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-15 + Map Reduce Local Work + Alias -> Map Local Tables: + sq_1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + sq_1:a + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + + Stage: Stage-7 + Stats-Aggr Operator + + Stage: Stage-16 + Map Reduce Local Work + Alias -> Map Local Tables: + sq_2:s1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + sq_2:s1 + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is null (type: boolean) + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + +Warning: Map Join MAPJOIN[107][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_4 +PREHOOK: Output: default@src_5 +POSTHOOK: query: from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_4 +POSTHOOK: Output: default@src_5 +POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +RUN: Stage-10:MAPRED +RUN: Stage-14:CONDITIONAL +RUN: Stage-17:MAPREDLOCAL +RUN: Stage-13:MAPRED +RUN: Stage-15:MAPREDLOCAL +RUN: Stage-16:MAPREDLOCAL +RUN: Stage-12:MAPRED +RUN: Stage-4:MAPRED +RUN: Stage-0:MOVE +RUN: Stage-1:MOVE +RUN: Stage-7:STATS +RUN: Stage-5:STATS +PREHOOK: query: select * from src_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_4 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_4 +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: select * from src_5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_5 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_5 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.8.out ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.8.out new file mode 100644 index 0000000..4c1d06c --- /dev/null +++ ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.8.out @@ -0,0 +1,1001 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT + +CREATE TABLE src_4( + key STRING, + value STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_4 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT + +CREATE TABLE src_4( + key STRING, + value STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_4 +RUN: Stage-0:DDL +PREHOOK: query: CREATE TABLE src_5( + key STRING, + value STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_5 +POSTHOOK: query: CREATE TABLE src_5( + key STRING, + value STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_5 +RUN: Stage-0:DDL +Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-10 is a root stage + Stage-2 depends on stages: Stage-10 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '2') and key is null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is null (type: boolean) + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + + Stage: Stage-7 + Stats-Aggr Operator + +Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_4 +PREHOOK: Output: default@src_5 +POSTHOOK: query: from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_4 +POSTHOOK: Output: default@src_5 +POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +RUN: Stage-10:MAPRED +RUN: Stage-2:MAPRED +RUN: Stage-3:MAPRED +RUN: Stage-6:MAPRED +RUN: Stage-4:MAPRED +RUN: Stage-0:MOVE +RUN: Stage-1:MOVE +RUN: Stage-7:STATS +RUN: Stage-5:STATS +PREHOOK: query: select * from src_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_4 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_4 +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: select * from src_5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_5 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_5 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +Warning: Map Join MAPJOIN[107][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-10 is a root stage + Stage-14 depends on stages: Stage-10 , consists of Stage-17, Stage-2 + Stage-17 has a backup stage: Stage-2 + Stage-13 depends on stages: Stage-17 + Stage-15 depends on stages: Stage-2, Stage-13 + Stage-4 depends on stages: Stage-15 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + Stage-16 depends on stages: Stage-2, Stage-13 + Stage-12 depends on stages: Stage-16 + Stage-0 depends on stages: Stage-12 + Stage-7 depends on stages: Stage-0 + Stage-2 + +STAGE PLANS: + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '2') and key is null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-14 + Conditional Operator + + Stage: Stage-17 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-15 + Map Reduce Local Work + Alias -> Map Local Tables: + sq_2:s1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + sq_2:s1 + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is null (type: boolean) + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-16 + Map Reduce Local Work + Alias -> Map Local Tables: + sq_1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + sq_1:a + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + + Stage: Stage-7 + Stats-Aggr Operator + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + +Warning: Map Join MAPJOIN[107][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_4 +PREHOOK: Output: default@src_5 +POSTHOOK: query: from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_4 +POSTHOOK: Output: default@src_5 +POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +RUN: Stage-10:MAPRED +RUN: Stage-14:CONDITIONAL +RUN: Stage-17:MAPREDLOCAL +RUN: Stage-13:MAPRED +RUN: Stage-15:MAPREDLOCAL +RUN: Stage-16:MAPREDLOCAL +RUN: Stage-4:MAPRED +RUN: Stage-12:MAPRED +RUN: Stage-1:MOVE +RUN: Stage-0:MOVE +RUN: Stage-5:STATS +RUN: Stage-7:STATS +PREHOOK: query: select * from src_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_4 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_4 +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: select * from src_5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_5 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_5 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.out ql/src/test/results/clientpositive/subquery_multiinsert.q.out index 556e289..e69de29 100644 --- ql/src/test/results/clientpositive/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -1,999 +0,0 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS - -CREATE TABLE src_4( - key STRING, - value STRING -) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_4 -POSTHOOK: query: -- SORT_QUERY_RESULTS - -CREATE TABLE src_4( - key STRING, - value STRING -) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_4 -RUN: Stage-0:DDL -PREHOOK: query: CREATE TABLE src_5( - key STRING, - value STRING -) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_5 -POSTHOOK: query: CREATE TABLE src_5( - key STRING, - value STRING -) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_5 -RUN: Stage-0:DDL -Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 - Stage-6 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '2') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: 0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col5 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-5 - Stats-Aggr Operator - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-7 - Stats-Aggr Operator - -Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@src_4 -PREHOOK: Output: default@src_5 -POSTHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@src_4 -POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED -RUN: Stage-2:MAPRED -RUN: Stage-3:MAPRED -RUN: Stage-6:MAPRED -RUN: Stage-4:MAPRED -RUN: Stage-0:MOVE -RUN: Stage-1:MOVE -RUN: Stage-7:STATS -RUN: Stage-5:STATS -PREHOOK: query: select * from src_4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_4 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_4 -#### A masked pattern was here #### -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 -PREHOOK: query: select * from src_5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_5 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_5 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -10 val_10 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -105 val_105 -11 val_11 -111 val_111 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -120 val_120 -120 val_120 -125 val_125 -125 val_125 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -129 val_129 -129 val_129 -131 val_131 -133 val_133 -134 val_134 -134 val_134 -136 val_136 -137 val_137 -137 val_137 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -143 val_143 -145 val_145 -146 val_146 -146 val_146 -149 val_149 -149 val_149 -15 val_15 -15 val_15 -150 val_150 -152 val_152 -152 val_152 -153 val_153 -155 val_155 -156 val_156 -157 val_157 -158 val_158 -160 val_160 -162 val_162 -163 val_163 -164 val_164 -164 val_164 -165 val_165 -165 val_165 -166 val_166 -167 val_167 -167 val_167 -167 val_167 -168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -17 val_17 -170 val_170 -172 val_172 -172 val_172 -174 val_174 -174 val_174 -175 val_175 -175 val_175 -176 val_176 -176 val_176 -177 val_177 -178 val_178 -179 val_179 -179 val_179 -18 val_18 -18 val_18 -180 val_180 -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -19 val_19 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -2 val_2 -Warning: Map Join MAPJOIN[107][bigTable=b] in task 'Stage-13:MAPRED' is a cross product -Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-14 depends on stages: Stage-10 , consists of Stage-17, Stage-2 - Stage-17 has a backup stage: Stage-2 - Stage-13 depends on stages: Stage-17 - Stage-15 depends on stages: Stage-2, Stage-13 - Stage-12 depends on stages: Stage-15 - Stage-0 depends on stages: Stage-12 - Stage-7 depends on stages: Stage-0 - Stage-16 depends on stages: Stage-2, Stage-13 - Stage-4 depends on stages: Stage-16 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 - Stage-2 - -STAGE PLANS: - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '2') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: 0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-14 - Conditional Operator - - Stage: Stage-17 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-15 - Map Reduce Local Work - Alias -> Map Local Tables: - sq_1:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - sq_1:a - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-7 - Stats-Aggr Operator - - Stage: Stage-16 - Map Reduce Local Work - Alias -> Map Local Tables: - sq_2:s1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - sq_2:s1 - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col5 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-5 - Stats-Aggr Operator - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - -Warning: Map Join MAPJOIN[107][bigTable=b] in task 'Stage-13:MAPRED' is a cross product -Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@src_4 -PREHOOK: Output: default@src_5 -POSTHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@src_4 -POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED -RUN: Stage-14:CONDITIONAL -RUN: Stage-17:MAPREDLOCAL -RUN: Stage-13:MAPRED -RUN: Stage-15:MAPREDLOCAL -RUN: Stage-16:MAPREDLOCAL -RUN: Stage-12:MAPRED -RUN: Stage-4:MAPRED -RUN: Stage-0:MOVE -RUN: Stage-1:MOVE -RUN: Stage-7:STATS -RUN: Stage-5:STATS -PREHOOK: query: select * from src_4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_4 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_4 -#### A masked pattern was here #### -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 -PREHOOK: query: select * from src_5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_5 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_5 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -10 val_10 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -105 val_105 -11 val_11 -111 val_111 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -120 val_120 -120 val_120 -125 val_125 -125 val_125 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -129 val_129 -129 val_129 -131 val_131 -133 val_133 -134 val_134 -134 val_134 -136 val_136 -137 val_137 -137 val_137 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -143 val_143 -145 val_145 -146 val_146 -146 val_146 -149 val_149 -149 val_149 -15 val_15 -15 val_15 -150 val_150 -152 val_152 -152 val_152 -153 val_153 -155 val_155 -156 val_156 -157 val_157 -158 val_158 -160 val_160 -162 val_162 -163 val_163 -164 val_164 -164 val_164 -165 val_165 -165 val_165 -166 val_166 -167 val_167 -167 val_167 -167 val_167 -168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -17 val_17 -170 val_170 -172 val_172 -172 val_172 -174 val_174 -174 val_174 -175 val_175 -175 val_175 -176 val_176 -176 val_176 -177 val_177 -178 val_178 -179 val_179 -179 val_179 -18 val_18 -18 val_18 -180 val_180 -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -19 val_19 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -2 val_2 diff --git ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out new file mode 100644 index 0000000..ebc6efd --- /dev/null +++ ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out @@ -0,0 +1,762 @@ +Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: -- non agg, non corr +-- JAVA_VERSION_SPECIFIC_OUTPUT + +explain +select key, count(*) +from src +group by key +having key not in + ( select key from src s1 + where s1.key > '12' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, non corr +-- JAVA_VERSION_SPECIFIC_OUTPUT + +explain +select key, count(*) +from src +group by key +having key not in + ( select key from src s1 + where s1.key > '12' + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '12') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '12') and key is null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: -- non agg, corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-4 is a root stage + Stage-5 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: double) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: double) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: double) + 1 _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col1), min(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 is null or _col2 is null) and ((_col1 - _col2) > 600.0)) (type: boolean) + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col1), max(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col2 - _col1) > 600.0) (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 1173.15 +Manufacturer#2 1690.68 +Warning: Shuffle Join JOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-3:MAPRED' is a cross product +PREHOOK: query: -- agg, non corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- agg, non corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-4 is a root stage + Stage-5 is a root stage + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is null (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col1), min(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 - _col2) > 600.0) (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_mfgr is null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: null (type: void), p_retailprice (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col1), min(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 - _col2) > 600.0) (type: boolean) + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-3:MAPRED' is a cross product +PREHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#2 1690.68 +Manufacturer#1 1173.15 diff --git ql/src/test/results/clientpositive/subquery_notin_having.q.java1.8.out ql/src/test/results/clientpositive/subquery_notin_having.q.java1.8.out new file mode 100644 index 0000000..756f20d --- /dev/null +++ ql/src/test/results/clientpositive/subquery_notin_having.q.java1.8.out @@ -0,0 +1,762 @@ +Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: -- non agg, non corr +-- JAVA_VERSION_SPECIFIC_OUTPUT + +explain +select key, count(*) +from src +group by key +having key not in + ( select key from src s1 + where s1.key > '12' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, non corr +-- JAVA_VERSION_SPECIFIC_OUTPUT + +explain +select key, count(*) +from src +group by key +having key not in + ( select key from src s1 + where s1.key > '12' + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '12') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '12') and key is null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: -- non agg, corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-4 is a root stage + Stage-5 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: double) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: double) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: double) + 1 _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col1), min(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col1 - _col2) > 600.0) and (_col0 is null or _col2 is null)) (type: boolean) + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col1), max(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col2 - _col1) > 600.0) (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 1173.15 +Manufacturer#2 1690.68 +Warning: Shuffle Join JOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-3:MAPRED' is a cross product +PREHOOK: query: -- agg, non corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- agg, non corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-4 is a root stage + Stage-5 is a root stage + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is null (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col1), min(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 - _col2) > 600.0) (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_mfgr is null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: null (type: void), p_retailprice (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col1), min(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 - _col2) > 600.0) (type: boolean) + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-3:MAPRED' is a cross product +PREHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#2 1690.68 +Manufacturer#1 1173.15 diff --git ql/src/test/results/clientpositive/subquery_notin_having.q.out ql/src/test/results/clientpositive/subquery_notin_having.q.out index 2e84283..e69de29 100644 --- ql/src/test/results/clientpositive/subquery_notin_having.q.out +++ ql/src/test/results/clientpositive/subquery_notin_having.q.out @@ -1,758 +0,0 @@ -Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: -- non agg, non corr -explain -select key, count(*) -from src -group by key -having key not in - ( select key from src s1 - where s1.key > '12' - ) -PREHOOK: type: QUERY -POSTHOOK: query: -- non agg, non corr -explain -select key, count(*) -from src -group by key -having key not in - ( select key from src s1 - where s1.key > '12' - ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: bigint) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '12') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '12') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: -- non agg, corr -explain -select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a - where min(p_retailprice) = l and r - l > 600 - ) -PREHOOK: type: QUERY -POSTHOOK: query: -- non agg, corr -explain -select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a - where min(p_retailprice) = l and r - l > 600 - ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-4 is a root stage - Stage-5 depends on stages: Stage-4 - Stage-6 is a root stage - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: double) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: double) - 1 _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(_col1), min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 is null or _col2 is null) and ((_col1 - _col2) > 600.0)) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col1), max(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col2 - _col1) > 600.0) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a - where min(p_retailprice) = l and r - l > 600 - ) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a - where min(p_retailprice) = l and r - l > 600 - ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -Manufacturer#1 1173.15 -Manufacturer#2 1690.68 -Warning: Shuffle Join JOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-3:MAPRED' is a cross product -PREHOOK: query: -- agg, non corr -explain -select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from part a - group by p_mfgr - having max(p_retailprice) - min(p_retailprice) > 600 - ) -PREHOOK: type: QUERY -POSTHOOK: query: -- agg, non corr -explain -select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from part a - group by p_mfgr - having max(p_retailprice) - min(p_retailprice) > 600 - ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-4 is a root stage - Stage-5 is a root stage - Stage-6 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is null (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: double) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(_col1), min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 - _col2) > 600.0) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: null (type: void), p_retailprice (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(_col1), min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 - _col2) > 600.0) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-3:MAPRED' is a cross product -PREHOOK: query: select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from part a - group by p_mfgr - having max(p_retailprice) - min(p_retailprice) > 600 - ) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from part a - group by p_mfgr - having max(p_retailprice) - min(p_retailprice) > 600 - ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -Manufacturer#2 1690.68 -Manufacturer#1 1173.15 diff --git ql/src/test/results/clientpositive/symlink_text_input_format.q.out ql/src/test/results/clientpositive/symlink_text_input_format.q.out index f52429c..f9d517c 100644 --- ql/src/test/results/clientpositive/symlink_text_input_format.q.out +++ ql/src/test/results/clientpositive/symlink_text_input_format.q.out @@ -176,11 +176,9 @@ STAGE PLANS: alias: symlink_text_input_format Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/table_access_keys_stats.q.out ql/src/test/results/clientpositive/table_access_keys_stats.q.out index a0449aa..cf6f794 100644 --- ql/src/test/results/clientpositive/table_access_keys_stats.q.out +++ ql/src/test/results/clientpositive/table_access_keys_stats.q.out @@ -148,11 +148,11 @@ SELECT key, count(1) as c FROM T1 GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_12 +Operator:GBY_4 Table:default@t1 Keys:key -Operator:GBY_4 +Operator:GBY_12 Table:default@t1 Keys:key @@ -278,10 +278,10 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### Operator:JOIN_6 -Table:default@t2 -Keys:key Table:default@t1 Keys:key +Table:default@t2 +Keys:key 1 11 1 1 2 12 2 1 @@ -297,10 +297,10 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### Operator:JOIN_6 -Table:default@t2 -Keys:key,val Table:default@t1 Keys:key,val +Table:default@t2 +Keys:key,val PREHOOK: query: -- map join SELECT /*+ MAPJOIN(a) */ * @@ -311,10 +311,10 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### Operator:JOIN_6 -Table:default@t2 -Keys:key Table:default@t1 Keys:key +Table:default@t2 +Keys:key 1 11 1 1 2 12 2 1 @@ -331,10 +331,10 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### Operator:JOIN_6 -Table:default@t2 -Keys:key Table:default@t1 Keys:key +Table:default@t2 +Keys:key PREHOOK: query: -- subqueries SELECT * @@ -352,10 +352,10 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### Operator:JOIN_10 -Table:default@t2 -Keys:val Table:default@t1 Keys:val +Table:default@t2 +Keys:val PREHOOK: query: SELECT * FROM @@ -370,10 +370,10 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### Operator:JOIN_8 -Table:default@t2 -Keys:val Table:default@t1 Keys:val +Table:default@t2 +Keys:val PREHOOK: query: -- with column aliases in subqueries SELECT * @@ -391,10 +391,10 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### Operator:JOIN_10 -Table:default@t2 -Keys:val Table:default@t1 Keys:val +Table:default@t2 +Keys:val PREHOOK: query: -- with constants in subqueries SELECT * @@ -412,10 +412,10 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### Operator:JOIN_8 -Table:default@t2 -Keys:key Table:default@t1 Keys:val,key +Table:default@t2 +Keys:key PREHOOK: query: -- multiple levels of constants in subqueries SELECT * @@ -436,10 +436,10 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### Operator:JOIN_9 -Table:default@t2 -Keys:val,key Table:default@t1 Keys:key +Table:default@t2 +Keys:val,key PREHOOK: query: -- no mapping on functions SELECT * @@ -473,10 +473,10 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### Operator:JOIN_8 -Table:default@t2 -Keys:key Table:default@t1 Keys:val,key +Table:default@t2 +Keys:key PREHOOK: query: -- join followed by union SELECT * @@ -503,15 +503,15 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -Operator:GBY_16 -Table:default@t3 -Keys:val - Operator:JOIN_8 -Table:default@t2 -Keys:key Table:default@t1 Keys:val,key +Table:default@t2 +Keys:key + +Operator:GBY_16 +Table:default@t3 +Keys:val 11.0 1 12.0 1 @@ -542,8 +542,8 @@ PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### Operator:JOIN_8 -Table:default@t2 -Keys:key Table:default@t1 Keys:val,key +Table:default@t2 +Keys:key diff --git ql/src/test/results/clientpositive/tez/correlationoptimizer1.q.out ql/src/test/results/clientpositive/tez/correlationoptimizer1.q.out index 24365eb..cd51c47 100644 --- ql/src/test/results/clientpositive/tez/correlationoptimizer1.q.out +++ ql/src/test/results/clientpositive/tez/correlationoptimizer1.q.out @@ -78,11 +78,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -226,11 +226,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -360,11 +360,11 @@ STAGE PLANS: 1 Map 4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -535,22 +535,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -688,22 +684,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -837,11 +829,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -979,11 +971,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -1126,22 +1118,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1268,22 +1256,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1414,11 +1398,11 @@ STAGE PLANS: outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1546,11 +1530,11 @@ STAGE PLANS: outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1686,22 +1670,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1828,22 +1808,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1977,11 +1953,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -2119,11 +2095,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -2269,11 +2245,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -2411,11 +2387,11 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -2562,11 +2538,11 @@ STAGE PLANS: outputColumnNames: _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -2711,11 +2687,11 @@ STAGE PLANS: outputColumnNames: _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -2861,11 +2837,11 @@ STAGE PLANS: outputColumnNames: _col2 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col2 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -3009,11 +2985,11 @@ STAGE PLANS: outputColumnNames: _col2 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col2 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/tez/count.q.out ql/src/test/results/clientpositive/tez/count.q.out index 24e2cd2..3366b2d 100644 --- ql/src/test/results/clientpositive/tez/count.q.out +++ ql/src/test/results/clientpositive/tez/count.q.out @@ -126,11 +126,11 @@ STAGE PLANS: alias: abcd Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), a (type: int), b (type: int), c (type: int), d (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0), count(), count(_col1), count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4) + aggregations: count(1), count(), count(_col1), count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4) keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 @@ -263,18 +263,17 @@ STAGE PLANS: alias: abcd Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), a (type: int), b (type: int), c (type: int), d (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: ++++ Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) + aggregations: count(1), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out index 28bc2a0..a734432 100644 --- ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out +++ ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out @@ -1625,12 +1625,12 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 'day' (type: string), key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), value (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col2) - keys: _col0 (type: string), _col1 (type: string) + keys: 'day' (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1752,12 +1752,12 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 'day' (type: string), key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), value (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col2) - keys: _col0 (type: string), _col1 (type: string) + keys: 'day' (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/tez/limit_pushdown.q.out ql/src/test/results/clientpositive/tez/limit_pushdown.q.out index 949a609..0a575ab 100644 --- ql/src/test/results/clientpositive/tez/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/tez/limit_pushdown.q.out @@ -821,11 +821,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -843,11 +843,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/tez/merge1.q.out ql/src/test/results/clientpositive/tez/merge1.q.out index cfe33e4..b321080 100644 --- ql/src/test/results/clientpositive/tez/merge1.q.out +++ ql/src/test/results/clientpositive/tez/merge1.q.out @@ -37,11 +37,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/tez/merge2.q.out ql/src/test/results/clientpositive/tez/merge2.q.out index 8fa1b36..d7be565 100644 --- ql/src/test/results/clientpositive/tez/merge2.q.out +++ ql/src/test/results/clientpositive/tez/merge2.q.out @@ -37,11 +37,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out index 647764b..9ec575f 100644 --- ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out +++ ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out @@ -199,11 +199,11 @@ STAGE PLANS: alias: stats_tbl Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), 0.2 (type: double), s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(), sum(_col0), sum(_col1), count(_col0), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE @@ -259,11 +259,11 @@ STAGE PLANS: alias: stats_tbl_part Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), 0.2 (type: double), s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(), sum(_col0), sum(_col1), count(_col0), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/tez/select_dummy_source.q.out ql/src/test/results/clientpositive/tez/select_dummy_source.q.out index 6f08083..fa99b76 100644 --- ql/src/test/results/clientpositive/tez/select_dummy_source.q.out +++ ql/src/test/results/clientpositive/tez/select_dummy_source.q.out @@ -71,40 +71,22 @@ explain select explode(array('a', 'b')) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: array('a','b') (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Select Operator + expressions: array('a','b') (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode + ListSink PREHOOK: query: select explode(array('a', 'b')) PREHOOK: type: QUERY @@ -185,40 +167,22 @@ POSTHOOK: query: explain select explode(array('a', 'b')) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: array('a','b') (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Select Operator + expressions: array('a','b') (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode + ListSink PREHOOK: query: select explode(array('a', 'b')) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/union2.q.out ql/src/test/results/clientpositive/tez/union2.q.out index 06cb30b..ce76676 100644 --- ql/src/test/results/clientpositive/tez/union2.q.out +++ ql/src/test/results/clientpositive/tez/union2.q.out @@ -31,10 +31,8 @@ STAGE PLANS: alias: s1 Select Operator Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Reduce Output Operator @@ -46,10 +44,8 @@ STAGE PLANS: alias: s1 Select Operator Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Reduce Output Operator diff --git ql/src/test/results/clientpositive/tez/union3.q.out ql/src/test/results/clientpositive/tez/union3.q.out index d4b5119..3dad621 100644 --- ql/src/test/results/clientpositive/tez/union3.q.out +++ ql/src/test/results/clientpositive/tez/union3.q.out @@ -127,15 +127,11 @@ STAGE PLANS: Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: 1 (type: int) + sort order: + + Map-reduce partition columns: 1 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reducer 3 Reduce Operator Tree: Select Operator @@ -155,15 +151,11 @@ STAGE PLANS: Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: 2 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: 2 (type: int) + sort order: + + Map-reduce partition columns: 2 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reducer 7 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/tez/union4.q.out ql/src/test/results/clientpositive/tez/union4.q.out index bc930e0..6874680 100644 --- ql/src/test/results/clientpositive/tez/union4.q.out +++ ql/src/test/results/clientpositive/tez/union4.q.out @@ -44,11 +44,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -62,11 +60,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/tez/union5.q.out ql/src/test/results/clientpositive/tez/union5.q.out index 5663cfc..f388baa 100644 --- ql/src/test/results/clientpositive/tez/union5.q.out +++ ql/src/test/results/clientpositive/tez/union5.q.out @@ -33,11 +33,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -51,11 +49,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -70,21 +66,18 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: 'tst2' (type: string) + expressions: 'tst1' (type: string) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -111,21 +104,18 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: 'tst1' (type: string) + expressions: 'tst2' (type: string) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) Union 3 Vertex: Union 3 diff --git ql/src/test/results/clientpositive/tez/union6.q.out ql/src/test/results/clientpositive/tez/union6.q.out index 67c90ad..ece48ce 100644 --- ql/src/test/results/clientpositive/tez/union6.q.out +++ ql/src/test/results/clientpositive/tez/union6.q.out @@ -44,11 +44,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/tez/union7.q.out ql/src/test/results/clientpositive/tez/union7.q.out index 7c3e778..6131ec1 100644 --- ql/src/test/results/clientpositive/tez/union7.q.out +++ ql/src/test/results/clientpositive/tez/union7.q.out @@ -33,11 +33,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -52,19 +50,16 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -74,19 +69,16 @@ STAGE PLANS: Select Operator expressions: 'tst1' (type: string) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/tez/union9.q.out ql/src/test/results/clientpositive/tez/union9.q.out index daac1ca..ff4edd7 100644 --- ql/src/test/results/clientpositive/tez/union9.q.out +++ ql/src/test/results/clientpositive/tez/union9.q.out @@ -34,10 +34,8 @@ STAGE PLANS: alias: s1 Select Operator Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Reduce Output Operator @@ -49,10 +47,8 @@ STAGE PLANS: alias: s1 Select Operator Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Reduce Output Operator @@ -64,10 +60,8 @@ STAGE PLANS: alias: s1 Select Operator Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Reduce Output Operator diff --git ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out index 93292f4..1d80f9f 100644 --- ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out @@ -129,11 +129,11 @@ STAGE PLANS: alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i (type: int), 50 (type: int), 50.0 (type: double), CAST( 50 AS decimal(10,0)) (type: decimal(10,0)) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: i (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col1), avg(_col2), avg(_col3) + aggregations: avg(50), avg(50.0), avg(50) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -143,7 +143,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Execution mode: vectorized Reducer 2 Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/tez/vector_decimal_2.q.out ql/src/test/results/clientpositive/tez/vector_decimal_2.q.out index ba84822..e67ab7b 100644 --- ql/src/test/results/clientpositive/tez/vector_decimal_2.q.out +++ ql/src/test/results/clientpositive/tez/vector_decimal_2.q.out @@ -1031,18 +1031,18 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 3.14 AS decimal(4,2)) (type: decimal(4,2)) + expressions: 3.14 (type: decimal(4,2)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(4,2)) + key expressions: 3.14 (type: decimal(3,2)) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized + value expressions: _col0 (type: decimal(4,2)) Reducer 2 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(4,2)) + expressions: VALUE._col0 (type: decimal(4,2)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1092,18 +1092,18 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 3.14 AS decimal(4,2)) (type: decimal(4,2)) + expressions: 3.14 (type: decimal(4,2)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(4,2)) + key expressions: 3.14 (type: decimal(3,2)) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized + value expressions: _col0 (type: decimal(4,2)) Reducer 2 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(4,2)) + expressions: VALUE._col0 (type: decimal(4,2)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1153,17 +1153,18 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 2012-12-19 11:12:19.1234567 AS decimal(30,8)) (type: decimal(30,8)) + expressions: 1355944339.1234567 (type: decimal(30,8)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(30,8)) + key expressions: 1355944339.1234567 (type: decimal(17,7)) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(30,8)) Reducer 2 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(30,8)) + expressions: VALUE._col0 (type: decimal(30,8)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1213,17 +1214,19 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( true AS decimal(10,0)) (type: decimal(10,0)) + expressions: 1 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(10,0)) + key expressions: 1 (type: int) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(10,0)) + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(10,0)) + expressions: VALUE._col0 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1264,17 +1267,19 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( true AS decimal(10,0)) (type: decimal(10,0)) + expressions: 1 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(10,0)) + key expressions: 1 (type: int) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(10,0)) + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(10,0)) + expressions: VALUE._col0 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1324,18 +1329,19 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 3 AS decimal(10,0)) (type: decimal(10,0)) + expressions: 3 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(10,0)) + key expressions: 3 (type: int) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized Reducer 2 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(10,0)) + expressions: VALUE._col0 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1385,18 +1391,19 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 3 AS decimal(10,0)) (type: decimal(10,0)) + expressions: 3 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(10,0)) + key expressions: 3 (type: int) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized Reducer 2 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(10,0)) + expressions: VALUE._col0 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1446,18 +1453,19 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 3 AS decimal(10,0)) (type: decimal(10,0)) + expressions: 3 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(10,0)) + key expressions: 3 (type: int) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized Reducer 2 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(10,0)) + expressions: VALUE._col0 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1507,18 +1515,19 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 3 AS decimal(10,0)) (type: decimal(10,0)) + expressions: 3 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(10,0)) + key expressions: 3 (type: int) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized Reducer 2 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(10,0)) + expressions: VALUE._col0 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1568,18 +1577,19 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 1.0 AS decimal(20,19)) (type: decimal(20,19)) + expressions: 1 (type: decimal(20,19)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(20,19)) + key expressions: 1 (type: int) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(20,19)) Execution mode: vectorized Reducer 2 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(20,19)) + expressions: VALUE._col0 (type: decimal(20,19)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1629,17 +1639,18 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( '0.99999999999999999999' AS decimal(20,20)) (type: decimal(20,20)) + expressions: 0.99999999999999999999 (type: decimal(20,20)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(20,20)) + key expressions: 0.99999999999999999999 (type: decimal(20,20)) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(20,20)) Reducer 2 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(20,20)) + expressions: VALUE._col0 (type: decimal(20,20)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out index d2a7124..e1d3ac3 100644 --- ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out @@ -2497,11 +2497,11 @@ STAGE PLANS: alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: decimal(20,10)), 3 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: decimal(20,10)) + outputColumnNames: _col0 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: histogram_numeric(_col0, _col1) + aggregations: histogram_numeric(_col0, 3) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE diff --git ql/src/test/results/clientpositive/type_widening.q.out ql/src/test/results/clientpositive/type_widening.q.out index 4c4a10f..e54ee13 100644 --- ql/src/test/results/clientpositive/type_widening.q.out +++ ql/src/test/results/clientpositive/type_widening.q.out @@ -49,7 +49,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToLong(0) (type: bigint) + expressions: 0 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Union diff --git ql/src/test/results/clientpositive/udaf_number_format.q.out ql/src/test/results/clientpositive/udaf_number_format.q.out index b2b0bf8..e73f8e7 100644 --- ql/src/test/results/clientpositive/udaf_number_format.q.out +++ ql/src/test/results/clientpositive/udaf_number_format.q.out @@ -24,11 +24,9 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'a' (type: string) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_col0), avg(_col0), variance(_col0), std(_col0) + aggregations: sum('a'), avg('a'), variance('a'), std('a') mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out index 32ae74b..029ccbe 100644 --- ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out +++ ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out @@ -569,3 +569,68 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@bucket #### A masked pattern was here #### 341.5 +PREHOOK: query: -- with CBO +explain +select percentile_approx(key, 0.5) from bucket +PREHOOK: type: QUERY +POSTHOOK: query: -- with CBO +explain +select percentile_approx(key, 0.5) from bucket +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: bucket + Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: double) + outputColumnNames: key + Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: percentile_approx(key, 0.5) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: array) + Reduce Operator Tree: + Group By Operator + aggregations: percentile_approx(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select percentile_approx(key, 0.5) from bucket +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket +#### A masked pattern was here #### +POSTHOOK: query: select percentile_approx(key, 0.5) from bucket +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket +#### A masked pattern was here #### +255.5 diff --git ql/src/test/results/clientpositive/udf3.q.out ql/src/test/results/clientpositive/udf3.q.out index c63c3de..8b2ad31 100644 --- ql/src/test/results/clientpositive/udf3.q.out +++ ql/src/test/results/clientpositive/udf3.q.out @@ -27,11 +27,9 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: null (type: void) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0), sum(_col0), avg(_col0), min(_col0), max(_col0) + aggregations: count(null), sum(null), avg(null), min(null), max(null) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/udf_count.q.out ql/src/test/results/clientpositive/udf_count.q.out index b7dcc17..82b2d6b 100644 --- ql/src/test/results/clientpositive/udf_count.q.out +++ ql/src/test/results/clientpositive/udf_count.q.out @@ -272,11 +272,9 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/udf_explode.q.out ql/src/test/results/clientpositive/udf_explode.q.out index 5add029..4ca47cc 100644 --- ql/src/test/results/clientpositive/udf_explode.q.out +++ ql/src/test/results/clientpositive/udf_explode.q.out @@ -39,101 +39,26 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Row Limit Per Split: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: array(1,2,3) (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns col - columns.types int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: src + Row Limit Per Split: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: array(1,2,3) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src tablesample (1 rows)) a GROUP BY a.myCol PREHOOK: type: QUERY @@ -379,101 +304,26 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Row Limit Per Split: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: map(1:'one',2:'two',3:'three') (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns key,value - columns.types int:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: src + Row Limit Per Split: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: map(1:'one',2:'two',3:'three') (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src tablesample (1 rows)) a GROUP BY a.key, a.val PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/udf_inline.q.out ql/src/test/results/clientpositive/udf_inline.q.out index a9cde60..45bd463 100644 --- ql/src/test/results/clientpositive/udf_inline.q.out +++ ql/src/test/results/clientpositive/udf_inline.q.out @@ -20,39 +20,27 @@ POSTHOOK: query: explain SELECT inline( ) as (id, text) FROM SRC limit 2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?')) (type: array>) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE - function name: inline - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 4880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 4880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 2 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?')) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 4880 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: SELECT inline( ARRAY( diff --git ql/src/test/results/clientpositive/udf_reflect2.q.out ql/src/test/results/clientpositive/udf_reflect2.q.out index f445acb..b9266df 100644 --- ql/src/test/results/clientpositive/udf_reflect2.q.out +++ ql/src/test/results/clientpositive/udf_reflect2.q.out @@ -320,11 +320,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), 2013-02-15 19:41:20.0 (type: timestamp) - outputColumnNames: _col0, _col1, _col2 + expressions: UDFToInteger(key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), reflect2(_col0,'byteValue') (type: tinyint), reflect2(_col0,'shortValue') (type: smallint), reflect2(_col0,'intValue') (type: int), reflect2(_col0,'longValue') (type: bigint), reflect2(_col0,'floatValue') (type: float), reflect2(_col0,'doubleValue') (type: double), reflect2(_col0,'toString') (type: string), _col1 (type: string), reflect2(_col1,'concat','_concat') (type: string), reflect2(_col1,'contains','86') (type: boolean), reflect2(_col1,'startsWith','v') (type: boolean), reflect2(_col1,'endsWith','6') (type: boolean), reflect2(_col1,'equals','val_86') (type: boolean), reflect2(_col1,'equalsIgnoreCase','VAL_86') (type: boolean), reflect2(_col1,'getBytes') (type: binary), reflect2(_col1,'indexOf','1') (type: int), reflect2(_col1,'lastIndexOf','1') (type: int), reflect2(_col1,'replace','val','VALUE') (type: string), reflect2(_col1,'substring',1) (type: string), reflect2(_col1,'substring',1,5) (type: string), reflect2(_col1,'toUpperCase') (type: string), reflect2(_col1,'trim') (type: string), _col2 (type: timestamp), reflect2(_col2,'getYear') (type: int), reflect2(_col2,'getMonth') (type: int), reflect2(_col2,'getDay') (type: int), reflect2(_col2,'getHours') (type: int), reflect2(_col2,'getMinutes') (type: int), reflect2(_col2,'getSeconds') (type: int), reflect2(_col2,'getTime') (type: bigint) + expressions: _col0 (type: int), reflect2(_col0,'byteValue') (type: tinyint), reflect2(_col0,'shortValue') (type: smallint), reflect2(_col0,'intValue') (type: int), reflect2(_col0,'longValue') (type: bigint), reflect2(_col0,'floatValue') (type: float), reflect2(_col0,'doubleValue') (type: double), reflect2(_col0,'toString') (type: string), _col1 (type: string), reflect2(_col1,'concat','_concat') (type: string), reflect2(_col1,'contains','86') (type: boolean), reflect2(_col1,'startsWith','v') (type: boolean), reflect2(_col1,'endsWith','6') (type: boolean), reflect2(_col1,'equals','val_86') (type: boolean), reflect2(_col1,'equalsIgnoreCase','VAL_86') (type: boolean), reflect2(_col1,'getBytes') (type: binary), reflect2(_col1,'indexOf','1') (type: int), reflect2(_col1,'lastIndexOf','1') (type: int), reflect2(_col1,'replace','val','VALUE') (type: string), reflect2(_col1,'substring',1) (type: string), reflect2(_col1,'substring',1,5) (type: string), reflect2(_col1,'toUpperCase') (type: string), reflect2(_col1,'trim') (type: string), 2013-02-15 19:41:20.0 (type: timestamp), 113 (type: int), 1 (type: int), 5 (type: int), 19 (type: int), 41 (type: int), 20 (type: int), 1360986080000 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Limit diff --git ql/src/test/results/clientpositive/udtf_explode.q.out ql/src/test/results/clientpositive/udtf_explode.q.out index f7855cf..7e4ac96 100644 --- ql/src/test/results/clientpositive/udtf_explode.q.out +++ ql/src/test/results/clientpositive/udtf_explode.q.out @@ -38,103 +38,28 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: array(1,2,3) (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns col - columns.types int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Stage: Stage-0 Fetch Operator limit: 3 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: array(1,2,3) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol PREHOOK: type: QUERY @@ -415,39 +340,27 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: map(1:'one',2:'two',3:'three') (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 1557 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 1557 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 3 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: map(1:'one',2:'two',3:'three') (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 1557 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/union10.q.out ql/src/test/results/clientpositive/union10.q.out index 8454829..b44777f 100644 --- ql/src/test/results/clientpositive/union10.q.out +++ ql/src/test/results/clientpositive/union10.q.out @@ -47,11 +47,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -184,11 +182,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -220,11 +216,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/union11.q.out ql/src/test/results/clientpositive/union11.q.out index c2385c3..7af34cb 100644 --- ql/src/test/results/clientpositive/union11.q.out +++ ql/src/test/results/clientpositive/union11.q.out @@ -33,11 +33,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -52,7 +50,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst3' (type: string) + expressions: 'tst1' (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -68,60 +66,48 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan Union Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan Union Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -148,11 +134,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -184,11 +168,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -203,7 +185,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst1' (type: string) + expressions: 'tst3' (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git ql/src/test/results/clientpositive/union12.q.out ql/src/test/results/clientpositive/union12.q.out index ec01c52..2fc4182 100644 --- ql/src/test/results/clientpositive/union12.q.out +++ ql/src/test/results/clientpositive/union12.q.out @@ -47,11 +47,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -184,11 +182,9 @@ STAGE PLANS: alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -220,11 +216,9 @@ STAGE PLANS: alias: s3 Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/union14.q.out ql/src/test/results/clientpositive/union14.q.out index bf29fa9..e2e5fed 100644 --- ql/src/test/results/clientpositive/union14.q.out +++ ql/src/test/results/clientpositive/union14.q.out @@ -29,11 +29,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -70,31 +68,8 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - TableScan - Union - Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -105,6 +80,21 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) + TableScan + Union + Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/union15.q.out ql/src/test/results/clientpositive/union15.q.out index 25cdbf2..9bca7bd 100644 --- ql/src/test/results/clientpositive/union15.q.out +++ ql/src/test/results/clientpositive/union15.q.out @@ -31,11 +31,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -66,22 +64,18 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 51 Data size: 470 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 51 Data size: 470 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -91,22 +85,18 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 51 Data size: 470 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 51 Data size: 470 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -116,22 +106,18 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 51 Data size: 470 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 51 Data size: 470 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/union16.q.out ql/src/test/results/clientpositive/union16.q.out index e36a6a6..1bf6d38 100644 --- ql/src/test/results/clientpositive/union16.q.out +++ ql/src/test/results/clientpositive/union16.q.out @@ -80,11 +80,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -100,11 +98,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -120,11 +116,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -140,11 +134,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -160,11 +152,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -180,11 +170,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -200,11 +188,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -220,11 +206,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -240,11 +224,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -260,11 +242,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -280,11 +260,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -300,11 +278,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -320,11 +296,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -340,11 +314,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -360,11 +332,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -380,11 +350,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -400,11 +368,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -420,11 +386,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -440,11 +404,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -460,11 +422,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -480,11 +440,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -500,11 +458,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -520,11 +476,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -540,11 +494,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -560,11 +512,9 @@ STAGE PLANS: Union Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 12500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/union2.q.out ql/src/test/results/clientpositive/union2.q.out index 673450b..a53880a 100644 --- ql/src/test/results/clientpositive/union2.q.out +++ ql/src/test/results/clientpositive/union2.q.out @@ -28,11 +28,9 @@ STAGE PLANS: Union Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -48,11 +46,9 @@ STAGE PLANS: Union Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/union20.q.out ql/src/test/results/clientpositive/union20.q.out index 44c87dd..ee117ca 100644 --- ql/src/test/results/clientpositive/union20.q.out +++ ql/src/test/results/clientpositive/union20.q.out @@ -40,11 +40,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -154,11 +152,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/union24.q.out ql/src/test/results/clientpositive/union24.q.out index 8755025..0657b65 100644 --- ql/src/test/results/clientpositive/union24.q.out +++ ql/src/test/results/clientpositive/union24.q.out @@ -197,11 +197,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) < 10.0) (type: boolean) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/union25.q.out ql/src/test/results/clientpositive/union25.q.out index 6998a12..6be39ed 100644 --- ql/src/test/results/clientpositive/union25.q.out +++ ql/src/test/results/clientpositive/union25.q.out @@ -134,31 +134,8 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) - TableScan - Union - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -169,6 +146,21 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) + TableScan + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/union3.q.out ql/src/test/results/clientpositive/union3.q.out index 353434a..810d3af 100644 --- ql/src/test/results/clientpositive/union3.q.out +++ ql/src/test/results/clientpositive/union3.q.out @@ -67,25 +67,21 @@ STAGE PLANS: Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: 1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: 1 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Operator Tree: Select Operator @@ -177,25 +173,21 @@ STAGE PLANS: Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: 2 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: 2 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: 2 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/union4.q.out ql/src/test/results/clientpositive/union4.q.out index 289c049..14fe96b 100644 --- ql/src/test/results/clientpositive/union4.q.out +++ ql/src/test/results/clientpositive/union4.q.out @@ -44,11 +44,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -166,11 +164,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/union5.q.out ql/src/test/results/clientpositive/union5.q.out index 8b11364..bdc526f 100644 --- ql/src/test/results/clientpositive/union5.q.out +++ ql/src/test/results/clientpositive/union5.q.out @@ -28,11 +28,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -47,7 +45,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst2' (type: string) + expressions: 'tst1' (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -63,41 +61,33 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan Union Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -124,11 +114,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -143,7 +131,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst1' (type: string) + expressions: 'tst2' (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git ql/src/test/results/clientpositive/union6.q.out ql/src/test/results/clientpositive/union6.q.out index be0f0b7..699b18b 100644 --- ql/src/test/results/clientpositive/union6.q.out +++ ql/src/test/results/clientpositive/union6.q.out @@ -43,11 +43,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/union7.q.out ql/src/test/results/clientpositive/union7.q.out index eb872d8..5abf151 100644 --- ql/src/test/results/clientpositive/union7.q.out +++ ql/src/test/results/clientpositive/union7.q.out @@ -27,11 +27,9 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -62,22 +60,18 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -87,22 +81,18 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), 1 (type: int) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 279 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/union9.q.out ql/src/test/results/clientpositive/union9.q.out index 08ddebd..c231d2c 100644 --- ql/src/test/results/clientpositive/union9.q.out +++ ql/src/test/results/clientpositive/union9.q.out @@ -30,11 +30,9 @@ STAGE PLANS: Union Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -50,11 +48,9 @@ STAGE PLANS: Union Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -70,11 +66,9 @@ STAGE PLANS: Union Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/union_remove_1.q.out ql/src/test/results/clientpositive/union_remove_1.q.out index a63ed0f..411f63e 100644 --- ql/src/test/results/clientpositive/union_remove_1.q.out +++ ql/src/test/results/clientpositive/union_remove_1.q.out @@ -73,11 +73,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -121,11 +121,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/union_remove_10.q.out ql/src/test/results/clientpositive/union_remove_10.q.out index 3fe780c..d80bf32 100644 --- ql/src/test/results/clientpositive/union_remove_10.q.out +++ ql/src/test/results/clientpositive/union_remove_10.q.out @@ -95,7 +95,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(1) (type: bigint) + expressions: key (type: string), 1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -153,11 +153,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -191,7 +191,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) + expressions: key (type: string), 2 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/union_remove_15.q.out ql/src/test/results/clientpositive/union_remove_15.q.out index 0bd8d7f..faf3e58 100644 --- ql/src/test/results/clientpositive/union_remove_15.q.out +++ ql/src/test/results/clientpositive/union_remove_15.q.out @@ -79,11 +79,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -133,11 +133,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/union_remove_16.q.out ql/src/test/results/clientpositive/union_remove_16.q.out index f737c2c..8656c11 100644 --- ql/src/test/results/clientpositive/union_remove_16.q.out +++ ql/src/test/results/clientpositive/union_remove_16.q.out @@ -82,11 +82,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -165,11 +165,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/union_remove_18.q.out ql/src/test/results/clientpositive/union_remove_18.q.out index 707c708..0ce6e81 100644 --- ql/src/test/results/clientpositive/union_remove_18.q.out +++ ql/src/test/results/clientpositive/union_remove_18.q.out @@ -77,11 +77,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), ds (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), ds (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -131,11 +131,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), ds (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), ds (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col2) + aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/union_remove_19.q.out ql/src/test/results/clientpositive/union_remove_19.q.out index 8d5b093..b6d557b 100644 --- ql/src/test/results/clientpositive/union_remove_19.q.out +++ ql/src/test/results/clientpositive/union_remove_19.q.out @@ -77,11 +77,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -125,11 +125,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -268,11 +268,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) = 7.0) (type: boolean) Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: '7' (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: '7' (type: string) + outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -319,11 +319,11 @@ STAGE PLANS: predicate: (UDFToDouble(key) = 7.0) (type: boolean) Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: '7' (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: '7' (type: string) + outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -424,11 +424,11 @@ STAGE PLANS: predicate: ((UDFToDouble(key) + UDFToDouble(key)) >= 7.0) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -479,11 +479,11 @@ STAGE PLANS: predicate: ((UDFToDouble(key) + UDFToDouble(key)) >= 7.0) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/union_remove_2.q.out ql/src/test/results/clientpositive/union_remove_2.q.out index f30d8cf..b9e575b 100644 --- ql/src/test/results/clientpositive/union_remove_2.q.out +++ ql/src/test/results/clientpositive/union_remove_2.q.out @@ -80,11 +80,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -128,7 +128,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(1) (type: bigint) + expressions: key (type: string), 1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -147,7 +147,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) + expressions: key (type: string), 2 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/union_remove_20.q.out ql/src/test/results/clientpositive/union_remove_20.q.out index 79aa968..b2819ce 100644 --- ql/src/test/results/clientpositive/union_remove_20.q.out +++ ql/src/test/results/clientpositive/union_remove_20.q.out @@ -75,11 +75,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -127,11 +127,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/union_remove_22.q.out ql/src/test/results/clientpositive/union_remove_22.q.out index 77c03eb..a8257d0 100644 --- ql/src/test/results/clientpositive/union_remove_22.q.out +++ ql/src/test/results/clientpositive/union_remove_22.q.out @@ -269,11 +269,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -321,11 +321,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/union_remove_24.q.out ql/src/test/results/clientpositive/union_remove_24.q.out index c4ecdd5..d0ac662 100644 --- ql/src/test/results/clientpositive/union_remove_24.q.out +++ ql/src/test/results/clientpositive/union_remove_24.q.out @@ -71,11 +71,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -123,11 +123,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/union_remove_25.q.out ql/src/test/results/clientpositive/union_remove_25.q.out index bbd3d64..4eae6d2 100644 --- ql/src/test/results/clientpositive/union_remove_25.q.out +++ ql/src/test/results/clientpositive/union_remove_25.q.out @@ -89,11 +89,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -139,11 +139,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/union_remove_4.q.out ql/src/test/results/clientpositive/union_remove_4.q.out index b3fd9fc..2291a43 100644 --- ql/src/test/results/clientpositive/union_remove_4.q.out +++ ql/src/test/results/clientpositive/union_remove_4.q.out @@ -78,11 +78,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -165,11 +165,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/union_remove_5.q.out ql/src/test/results/clientpositive/union_remove_5.q.out index c729150..26179f5 100644 --- ql/src/test/results/clientpositive/union_remove_5.q.out +++ ql/src/test/results/clientpositive/union_remove_5.q.out @@ -87,11 +87,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -174,7 +174,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(1) (type: bigint) + expressions: key (type: string), 1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -193,7 +193,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) + expressions: key (type: string), 2 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/union_remove_7.q.out ql/src/test/results/clientpositive/union_remove_7.q.out index b6b3446..f0e59cb 100644 --- ql/src/test/results/clientpositive/union_remove_7.q.out +++ ql/src/test/results/clientpositive/union_remove_7.q.out @@ -77,11 +77,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -125,11 +125,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/union_remove_8.q.out ql/src/test/results/clientpositive/union_remove_8.q.out index bbac530..2cba717 100644 --- ql/src/test/results/clientpositive/union_remove_8.q.out +++ ql/src/test/results/clientpositive/union_remove_8.q.out @@ -84,11 +84,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -132,7 +132,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(1) (type: bigint) + expressions: key (type: string), 1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -151,7 +151,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) + expressions: key (type: string), 2 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/union_remove_9.q.out ql/src/test/results/clientpositive/union_remove_9.q.out index 9ac71a4..75925ac 100644 --- ql/src/test/results/clientpositive/union_remove_9.q.out +++ ql/src/test/results/clientpositive/union_remove_9.q.out @@ -90,11 +90,11 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(_col1) + aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/union_view.q.out ql/src/test/results/clientpositive/union_view.q.out index 04a27a6..5e69ec2 100644 --- ql/src/test/results/clientpositive/union_view.q.out +++ ql/src/test/results/clientpositive/union_view.q.out @@ -262,11 +262,9 @@ STAGE PLANS: filterExpr: (ds = '1') (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -335,11 +333,9 @@ STAGE PLANS: filterExpr: (ds = '2') (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -408,11 +404,9 @@ STAGE PLANS: filterExpr: (ds = '3') (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -864,11 +858,9 @@ STAGE PLANS: Union Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -941,11 +933,9 @@ STAGE PLANS: Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -1018,11 +1008,9 @@ STAGE PLANS: Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -1164,11 +1152,9 @@ STAGE PLANS: Union Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0) + aggregations: count(1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vector_cast_constant.q.out ql/src/test/results/clientpositive/vector_cast_constant.q.out index 4e33f24..c8ac59a 100644 --- ql/src/test/results/clientpositive/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/vector_cast_constant.q.out @@ -124,11 +124,11 @@ STAGE PLANS: alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i (type: int), 50 (type: int), 50.0 (type: double), CAST( 50 AS decimal(10,0)) (type: decimal(10,0)) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: i (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col1), avg(_col2), avg(_col3) + aggregations: avg(50), avg(50.0), avg(50) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -138,7 +138,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/vector_decimal_2.q.out ql/src/test/results/clientpositive/vector_decimal_2.q.out index fceb027..b22d00c 100644 --- ql/src/test/results/clientpositive/vector_decimal_2.q.out +++ ql/src/test/results/clientpositive/vector_decimal_2.q.out @@ -914,17 +914,17 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 3.14 AS decimal(4,2)) (type: decimal(4,2)) + expressions: 3.14 (type: decimal(4,2)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(4,2)) + key expressions: 3.14 (type: decimal(3,2)) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized + value expressions: _col0 (type: decimal(4,2)) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(4,2)) + expressions: VALUE._col0 (type: decimal(4,2)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -968,17 +968,17 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 3.14 AS decimal(4,2)) (type: decimal(4,2)) + expressions: 3.14 (type: decimal(4,2)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(4,2)) + key expressions: 3.14 (type: decimal(3,2)) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized + value expressions: _col0 (type: decimal(4,2)) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(4,2)) + expressions: VALUE._col0 (type: decimal(4,2)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1022,16 +1022,17 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 2012-12-19 11:12:19.1234567 AS decimal(30,8)) (type: decimal(30,8)) + expressions: 1355944339.1234567 (type: decimal(30,8)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(30,8)) + key expressions: 1355944339.1234567 (type: decimal(17,7)) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(30,8)) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(30,8)) + expressions: VALUE._col0 (type: decimal(30,8)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1075,16 +1076,18 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( true AS decimal(10,0)) (type: decimal(10,0)) + expressions: 1 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(10,0)) + key expressions: 1 (type: int) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(10,0)) + Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(10,0)) + expressions: VALUE._col0 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1119,16 +1122,18 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( true AS decimal(10,0)) (type: decimal(10,0)) + expressions: 1 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(10,0)) + key expressions: 1 (type: int) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(10,0)) + Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(10,0)) + expressions: VALUE._col0 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1172,17 +1177,18 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 3 AS decimal(10,0)) (type: decimal(10,0)) + expressions: 3 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(10,0)) + key expressions: 3 (type: int) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(10,0)) + expressions: VALUE._col0 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1226,17 +1232,18 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 3 AS decimal(10,0)) (type: decimal(10,0)) + expressions: 3 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(10,0)) + key expressions: 3 (type: int) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(10,0)) + expressions: VALUE._col0 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1280,17 +1287,18 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 3 AS decimal(10,0)) (type: decimal(10,0)) + expressions: 3 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(10,0)) + key expressions: 3 (type: int) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(10,0)) + expressions: VALUE._col0 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1334,17 +1342,18 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 3 AS decimal(10,0)) (type: decimal(10,0)) + expressions: 3 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(10,0)) + key expressions: 3 (type: int) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(10,0)) + expressions: VALUE._col0 (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1388,17 +1397,18 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 1.0 AS decimal(20,19)) (type: decimal(20,19)) + expressions: 1 (type: decimal(20,19)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(20,19)) + key expressions: 1 (type: int) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(20,19)) Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(20,19)) + expressions: VALUE._col0 (type: decimal(20,19)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1442,16 +1452,17 @@ STAGE PLANS: alias: decimal_2 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( '0.99999999999999999999' AS decimal(20,20)) (type: decimal(20,20)) + expressions: 0.99999999999999999999 (type: decimal(20,20)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(20,20)) + key expressions: 0.99999999999999999999 (type: decimal(20,20)) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(20,20)) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(20,20)) + expressions: VALUE._col0 (type: decimal(20,20)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git ql/src/test/results/clientpositive/vector_decimal_udf.q.out ql/src/test/results/clientpositive/vector_decimal_udf.q.out index 235f678..bc57930 100644 --- ql/src/test/results/clientpositive/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/vector_decimal_udf.q.out @@ -2407,11 +2407,11 @@ STAGE PLANS: alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: decimal(20,10)), 3 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: decimal(20,10)) + outputColumnNames: _col0 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: histogram_numeric(_col0, _col1) + aggregations: histogram_numeric(_col0, 3) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE diff --git ql/src/test/results/compiler/errors/wrong_distinct2.q.out ql/src/test/results/compiler/errors/wrong_distinct2.q.out index 52ddcd8..f6fc374 100644 --- ql/src/test/results/compiler/errors/wrong_distinct2.q.out +++ ql/src/test/results/compiler/errors/wrong_distinct2.q.out @@ -1 +1 @@ -Parse Error: line 2:45 cannot recognize input near 'DISTINCT' 'substr' '(' in select expression \ No newline at end of file +Parse Error: line 2:45 cannot recognize input near 'DISTINCT' 'substr' '(' in selection target \ No newline at end of file diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyVoidObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyVoidObjectInspector.java index d63d75a..7d2d8e7 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyVoidObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyVoidObjectInspector.java @@ -39,6 +39,6 @@ public Object copyObject(Object o) { @Override public Object getPrimitiveJavaObject(Object o) { - throw new RuntimeException("Internal error: cannot create Void object."); + return null; } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedListObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedListObjectInspector.java index 6a9215b..e69de29 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedListObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedListObjectInspector.java @@ -1,66 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.serde2.objectinspector; - -import java.util.List; - -public class DelegatedListObjectInspector implements ListObjectInspector { - - private ListObjectInspector delegate; - private ObjectInspector element; - - protected DelegatedListObjectInspector() { - super(); - } - public DelegatedListObjectInspector(ListObjectInspector delegate) { - this.delegate = delegate; - } - - public void reset(ListObjectInspector delegate) { - this.delegate = delegate; - if (element != null) { - DelegatedObjectInspectorFactory.reset(element, delegate.getListElementObjectInspector()); - } - } - - public ObjectInspector getListElementObjectInspector() { - return element != null ? element : - (element = DelegatedObjectInspectorFactory.wrap(delegate.getListElementObjectInspector())); - } - - public Object getListElement(Object data, int index) { - return delegate.getListElement(data, index); - } - - public int getListLength(Object data) { - return delegate.getListLength(data); - } - - public List getList(Object data) { - return delegate.getList(data); - } - - public String getTypeName() { - return delegate.getTypeName(); - } - - public Category getCategory() { - return delegate.getCategory(); - } -} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedMapObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedMapObjectInspector.java index 975d5cd..e69de29 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedMapObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedMapObjectInspector.java @@ -1,75 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.serde2.objectinspector; - -import java.util.Map; - -public class DelegatedMapObjectInspector implements MapObjectInspector { - - private MapObjectInspector delegate; - private ObjectInspector key; - private ObjectInspector value; - - protected DelegatedMapObjectInspector() { - super(); - } - public DelegatedMapObjectInspector(MapObjectInspector delegate) { - this.delegate = delegate; - } - - public void reset(MapObjectInspector current) { - this.delegate = current; - if (key != null) { - DelegatedObjectInspectorFactory.reset(key, current.getMapKeyObjectInspector()); - } - if (value != null) { - DelegatedObjectInspectorFactory.reset(value, current.getMapValueObjectInspector()); - } - } - - public ObjectInspector getMapKeyObjectInspector() { - return key != null ? key : - (key = DelegatedObjectInspectorFactory.wrap(delegate.getMapKeyObjectInspector())); - } - - public ObjectInspector getMapValueObjectInspector() { - return value != null ? value : - (value = DelegatedObjectInspectorFactory.wrap(delegate.getMapValueObjectInspector())); - } - - public Object getMapValueElement(Object data, Object key) { - return delegate.getMapValueElement(data, key); - } - - public Map getMap(Object data) { - return delegate.getMap(data); - } - - public int getMapSize(Object data) { - return delegate.getMapSize(data); - } - - public String getTypeName() { - return delegate.getTypeName(); - } - - public Category getCategory() { - return delegate.getCategory(); - } -} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedObjectInspectorFactory.java index 2db3819..e69de29 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedObjectInspectorFactory.java @@ -1,61 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.serde2.objectinspector; - -public class DelegatedObjectInspectorFactory { - - public static ObjectInspector wrap(ObjectInspector inspector) { - switch (inspector.getCategory()) { - case PRIMITIVE: - return inspector; - case LIST: - return new DelegatedListObjectInspector((ListObjectInspector) inspector); - case MAP: - return new DelegatedMapObjectInspector((MapObjectInspector) inspector); - case STRUCT: - return new DelegatedStructObjectInspector((StructObjectInspector) inspector); - case UNION: - return new DelegatedUnionObjectInspector((UnionObjectInspector) inspector); - default: - throw new RuntimeException("invalid category " + inspector.getCategory()); - } - } - - public static ObjectInspector reset(ObjectInspector prev, ObjectInspector current) { - switch (current.getCategory()) { - case PRIMITIVE: - break; - case LIST: - ((DelegatedListObjectInspector)prev).reset((ListObjectInspector) current); - break; - case MAP: - ((DelegatedMapObjectInspector)prev).reset((MapObjectInspector) current); - break; - case STRUCT: - ((DelegatedStructObjectInspector)prev).reset((StructObjectInspector) current); - break; - case UNION: - ((DelegatedUnionObjectInspector)prev).reset((UnionObjectInspector) current); - break; - default: - throw new RuntimeException("invalid category " + current.getCategory()); - } - return prev; - } -} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedStructObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedStructObjectInspector.java index ef66e97..e69de29 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedStructObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedStructObjectInspector.java @@ -1,103 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.serde2.objectinspector; - -import java.util.ArrayList; -import java.util.List; - -public class DelegatedStructObjectInspector extends StructObjectInspector { - - private StructObjectInspector delegate; - private List fields; - - protected DelegatedStructObjectInspector() { - super(); - } - public DelegatedStructObjectInspector(StructObjectInspector delegate) { - this.delegate = delegate; - } - - public void reset(StructObjectInspector current) { - this.delegate = current; - if (fields != null) { - int index = 0; - List newFields = delegate.getAllStructFieldRefs(); - for (DelegatedStructField field : fields) { - field.field = newFields.get(index++); - } - } - } - - private static class DelegatedStructField implements StructField { - private StructField field; - - public DelegatedStructField(StructField field) { - this.field = field; - } - public String getFieldName() { - return field.getFieldName(); - } - public ObjectInspector getFieldObjectInspector() { - return field.getFieldObjectInspector(); - } - public int getFieldID() { - return field.getFieldID(); - } - public String getFieldComment() { - return field.getFieldComment(); - } - } - - @Override - public List getAllStructFieldRefs() { - if (fields != null || delegate.getAllStructFieldRefs() == null) { - return fields; - } - List fields = delegate.getAllStructFieldRefs(); - List delegate = new ArrayList(fields.size()); - for (StructField field : fields) { - delegate.add(new DelegatedStructField(field)); - } - return this.fields = delegate; - } - - @Override - public StructField getStructFieldRef(String fieldName) { - StructField field = delegate.getStructFieldRef(fieldName); - return field == null ? null : new DelegatedStructField(field); - } - - @Override - public Object getStructFieldData(Object data, StructField fieldRef) { - return delegate.getStructFieldData(data, ((DelegatedStructField) fieldRef).field); - } - - @Override - public List getStructFieldsDataAsList(Object data) { - return delegate.getStructFieldsDataAsList(data); - } - - public String getTypeName() { - return delegate.getTypeName(); - } - - public Category getCategory() { - return delegate.getCategory(); - } -} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedUnionObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedUnionObjectInspector.java index 521fdd6..e69de29 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedUnionObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedUnionObjectInspector.java @@ -1,74 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.serde2.objectinspector; - -import java.util.ArrayList; -import java.util.List; - -public class DelegatedUnionObjectInspector implements UnionObjectInspector { - - private UnionObjectInspector delegate; - private List children; - - protected DelegatedUnionObjectInspector() { - super(); - } - public DelegatedUnionObjectInspector(UnionObjectInspector delegate) { - this.delegate = delegate; - } - - public void reset(UnionObjectInspector current) { - this.delegate = current; - if (children != null) { - int index = 0; - List newOIs = delegate.getObjectInspectors(); - for (ObjectInspector child : children) { - DelegatedObjectInspectorFactory.reset(child, newOIs.get(index++)); - } - } - } - - public List getObjectInspectors() { - if (children != null || delegate.getObjectInspectors() == null) { - return children; - } - List inspectors = delegate.getObjectInspectors(); - List delegated = new ArrayList(); - for (ObjectInspector inspector : inspectors) { - delegated.add(DelegatedObjectInspectorFactory.wrap(inspector)); - } - return children = delegated; - } - - public byte getTag(Object o) { - return delegate.getTag(o); - } - - public Object getField(Object o) { - return delegate.getField(o); - } - - public String getTypeName() { - return delegate.getTypeName(); - } - - public Category getCategory() { - return delegate.getCategory(); - } -} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java index 8a42577..2a47d97 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java @@ -209,7 +209,7 @@ public static ObjectInspector getConvertedOI( * can contain non-settable fields only if inputOI equals outputOI and equalsCheck is * true. */ - private static ObjectInspector getConvertedOI( + public static ObjectInspector getConvertedOI( ObjectInspector inputOI, ObjectInspector outputOI, Map oiSettableProperties, diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index e844979..5e67a07 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -1000,6 +1000,9 @@ public static boolean compareTypes(ObjectInspector o1, ObjectInspector o2) { } public static ConstantObjectInspector getConstantObjectInspector(ObjectInspector oi, Object value) { + if (oi instanceof ConstantObjectInspector) { + return (ConstantObjectInspector) oi; + } ObjectInspector writableOI = getStandardObjectInspector(oi, ObjectInspectorCopyOption.WRITABLE); Object writableValue = ObjectInspectorConverters.getConverter(oi, writableOI).convert(value); diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java index 49884b8..cb996a8 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java @@ -478,6 +478,9 @@ public HiveVarcharConverter(PrimitiveObjectInspector inputOI, @Override public Object convert(Object input) { + if (input == null) { + return null; + } switch (inputOI.getPrimitiveCategory()) { case BOOLEAN: return outputOI.set(hc, @@ -504,6 +507,9 @@ public HiveCharConverter(PrimitiveObjectInspector inputOI, @Override public Object convert(Object input) { + if (input == null) { + return null; + } switch (inputOI.getPrimitiveCategory()) { case BOOLEAN: return outputOI.set(hc, diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableVoidObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableVoidObjectInspector.java index 02b672a..f3f4838 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableVoidObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableVoidObjectInspector.java @@ -44,6 +44,6 @@ public Object getWritableConstantValue() { @Override public Object getPrimitiveJavaObject(Object o) { - throw new RuntimeException("Internal error: cannot create Void object."); + return null; } }