diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 44269f0..7409d3e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -337,10 +337,9 @@ private long evaluateExpression(Statistics stats, ExprNodeDesc pred, // can be boolean column in which case return true count ExprNodeColumnDesc encd = (ExprNodeColumnDesc) pred; String colName = encd.getColumn(); - String tabAlias = encd.getTabAlias(); String colType = encd.getTypeString(); if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { - ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + ColStatistics cs = stats.getColumnStatisticsFromColName(colName); if (cs != null) { return cs.getNumTrues(); } @@ -393,10 +392,9 @@ private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, // NOT on boolean columns is possible. in which case return false count. ExprNodeColumnDesc encd = (ExprNodeColumnDesc) leaf; String colName = encd.getColumn(); - String tabAlias = encd.getTabAlias(); String colType = encd.getTypeString(); if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { - ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + ColStatistics cs = stats.getColumnStatisticsFromColName(colName); if (cs != null) { return cs.getNumFalses(); } @@ -423,8 +421,7 @@ private long evaluateColEqualsNullExpr(Statistics stats, ExprNodeDesc pred) { if (leaf instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf; String colName = colDesc.getColumn(); - String tabAlias = colDesc.getTabAlias(); - ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + ColStatistics cs = stats.getColumnStatisticsFromColName(colName); if (cs != null) { return cs.getNumNulls(); } @@ -450,7 +447,6 @@ private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, if (udf instanceof GenericUDFOPEqual || udf instanceof GenericUDFOPEqualNS) { String colName = null; - String tabAlias = null; boolean isConst = false; Object prevConst = null; @@ -483,7 +479,7 @@ private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, return numRows; } - ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + ColStatistics cs = stats.getColumnStatisticsFromColName(colName); if (cs != null) { long dvs = cs.getCountDistint(); numRows = dvs == 0 ? numRows / 2 : numRows / dvs; @@ -492,7 +488,6 @@ private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, } else if (leaf instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf; colName = colDesc.getColumn(); - tabAlias = colDesc.getTabAlias(); // if const is first argument then evaluate the result if (isConst) { @@ -504,7 +499,7 @@ private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, return numRows; } - ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + ColStatistics cs = stats.getColumnStatisticsFromColName(colName); if (cs != null) { long dvs = cs.getCountDistint(); numRows = dvs == 0 ? numRows / 2 : numRows / dvs; @@ -826,9 +821,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // for those newly added columns if (!colExprMap.containsKey(ci.getInternalName())) { String colName = ci.getInternalName(); - String tabAlias = ci.getTabAlias(); String colType = ci.getTypeName(); - ColStatistics cs = new ColStatistics(tabAlias, colName, colType); + ColStatistics cs = new ColStatistics(colName, colType); cs.setCountDistint(stats.getNumRows()); cs.setNumNulls(0); cs.setAvgColLen(StatsUtils.getAvgColLenOfFixedLengthTypes(colType)); @@ -1053,10 +1047,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // statistics object that is combination of statistics from all // relations involved in JOIN Statistics stats = new Statistics(); - Map rowCountParents = new HashMap(); List distinctVals = Lists.newArrayList(); int numParent = parents.size(); - Map joinedColStats = Maps.newHashMap(); + Map rowCountParents = Maps.newHashMap(); + Map joinStats = Maps.newHashMap(); Map> joinKeys = Maps.newHashMap(); List rowCounts = Lists.newArrayList(); @@ -1072,35 +1066,24 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // get the join keys from parent ReduceSink operators for (int pos = 0; pos < parents.size(); pos++) { ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); - Statistics parentStats = parent.getStatistics(); keyExprs = parent.getConf().getOutputKeyColumnNames(); - // Parent RS may have column statistics from multiple parents. + // Parent RS may have column statistics from multiple parents. // Populate table alias to row count map, this will be used later to // scale down/up column statistics based on new row count // NOTE: JOIN with UNION as parent of RS will not have table alias // propagated properly. UNION operator does not propagate the table // alias of subqueries properly to expression nodes. Hence union20.q // will have wrong number of rows. - Set tableAliases = StatsUtils.getAllTableAlias(parent.getColumnExprMap()); - for (String tabAlias : tableAliases) { - rowCountParents.put(tabAlias, parentStats.getNumRows()); - } + rowCountParents.put(pos, parentStats.getNumRows()); rowCounts.add(parentStats.getNumRows()); - // compute fully qualified join key column names. this name will be - // used to quickly look-up for column statistics of join key. - // TODO: expressions in join condition will be ignored. assign // internal name for expressions and estimate column statistics for expression. - List fqCols = StatsUtils.getFullyQualifedReducerKeyNames(keyExprs, - parent.getColumnExprMap()); - joinKeys.put(pos, fqCols); + joinKeys.put(pos, keyExprs); // get column statistics for all output columns - for (ColStatistics cs : parentStats.getColumnStats()) { - joinedColStats.put(cs.getFullyQualifiedColName(), cs); - } + joinStats.put(pos, parentStats); // since new statistics is derived from all relations involved in // JOIN, we need to update the state information accordingly @@ -1116,12 +1099,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, for (int idx = 0; idx < numAttr; idx++) { for (Integer i : joinKeys.keySet()) { String col = joinKeys.get(i).get(idx); - ColStatistics cs = joinedColStats.get(col); + ColStatistics cs = joinStats.get(i).getColumnStatisticsFromColName(col); if (cs != null) { perAttrDVs.add(cs.getCountDistint()); } } - distinctVals.add(getDenominator(perAttrDVs)); perAttrDVs.clear(); } @@ -1136,40 +1118,34 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } } else { - for (List jkeys : joinKeys.values()) { - for (String jk : jkeys) { - ColStatistics cs = joinedColStats.get(jk); - if (cs != null) { - distinctVals.add(cs.getCountDistint()); - } + for (Integer i : joinKeys.keySet()) { + String col = joinKeys.get(i).get(0); + ColStatistics cs = joinStats.get(i).getColumnStatisticsFromColName(col); + if (cs != null) { + distinctVals.add(cs.getCountDistint()); } } denom = getDenominator(distinctVals); } // Update NDV of joined columns to be min(V(R,y), V(S,y)) - updateJoinColumnsNDV(joinKeys, joinedColStats, numAttr); + updateJoinColumnsNDV(joinKeys, joinStats, numAttr); - // column statistics from different sources are put together and rename - // fully qualified column names based on output schema of join operator + // column statistics from different sources are put together and + // rename based on output schema of join operator Map colExprMap = jop.getColumnExprMap(); RowSchema rs = jop.getSchema(); List outColStats = Lists.newArrayList(); - Map outInTabAlias = new HashMap(); for (ColumnInfo ci : rs.getSignature()) { String key = ci.getInternalName(); ExprNodeDesc end = colExprMap.get(key); if (end instanceof ExprNodeColumnDesc) { String colName = ((ExprNodeColumnDesc) end).getColumn(); - String tabAlias = ((ExprNodeColumnDesc) end).getTabAlias(); - String fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName); - ColStatistics cs = joinedColStats.get(fqColName); + int pos = jop.getConf().getReversedExprs().get(key); + ColStatistics cs = joinStats.get(pos).getColumnStatisticsFromColName(colName); String outColName = key; - String outTabAlias = ci.getTabAlias(); - outInTabAlias.put(outTabAlias, tabAlias); if (cs != null) { cs.setColumnName(outColName); - cs.setTableAlias(outTabAlias); } outColStats.add(cs); } @@ -1178,7 +1154,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // update join statistics stats.setColumnStats(outColStats); long newRowCount = pkfkInferred ? newNumRows : computeNewRowCount(rowCounts, denom); - updateStatsForJoinType(stats, newRowCount, jop, rowCountParents,outInTabAlias); + updateStatsForJoinType(stats, newRowCount, jop, rowCountParents); jop.setStatistics(stats); if (isDebugEnabled) { @@ -1365,12 +1341,10 @@ private float getSelectivityComplexTree(Operator op) { if (op != null && op instanceof ReduceSinkOperator) { ReduceSinkOperator rsOp = (ReduceSinkOperator) op; List keys = rsOp.getConf().getOutputKeyColumnNames(); - List fqCols = StatsUtils.getFullyQualifedReducerKeyNames(keys, - rsOp.getColumnExprMap()); - if (fqCols.size() == 1) { - String joinCol = fqCols.get(0); + if (keys.size() == 1) { + String joinCol = keys.get(0); if (rsOp.getStatistics() != null) { - ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromFQColName(joinCol); + ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromColName(joinCol); if (cs != null && !cs.isPrimaryKey()) { if (StatsUtils.inferForeignKey(csPK, cs)) { result.add(i); @@ -1396,12 +1370,10 @@ private float getSelectivityComplexTree(Operator op) { if (op instanceof ReduceSinkOperator) { ReduceSinkOperator rsOp = (ReduceSinkOperator) op; List keys = rsOp.getConf().getOutputKeyColumnNames(); - List fqCols = StatsUtils.getFullyQualifedReducerKeyNames(keys, - rsOp.getColumnExprMap()); - if (fqCols.size() == 1) { - String joinCol = fqCols.get(0); + if (keys.size() == 1) { + String joinCol = keys.get(0); if (rsOp.getStatistics() != null) { - ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromFQColName(joinCol); + ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromColName(joinCol); if (cs != null && cs.isPrimaryKey()) { result.add(i); } @@ -1429,8 +1401,7 @@ private Long getEasedOutDenominator(List distinctVals) { private void updateStatsForJoinType(Statistics stats, long newNumRows, CommonJoinOperator jop, - Map rowCountParents, - Map outInTabAlias) { + Map rowCountParents) { if (newNumRows < 0) { LOG.info("STATS-" + jop.toString() + ": Overflow in number of rows." @@ -1447,7 +1418,8 @@ private void updateStatsForJoinType(Statistics stats, long newNumRows, // and stats for columns from 2nd parent should be scaled down by 200x List colStats = stats.getColumnStats(); for (ColStatistics cs : colStats) { - long oldRowCount = rowCountParents.get(outInTabAlias.get(cs.getTableAlias())); + int pos = jop.getConf().getReversedExprs().get(cs.getColumnName()); + long oldRowCount = rowCountParents.get(pos); double ratio = (double) newNumRows / (double) oldRowCount; long oldDV = cs.getCountDistint(); long newDV = oldDV; @@ -1499,15 +1471,16 @@ private long computeNewRowCount(List rowCountParents, long denom) { } private void updateJoinColumnsNDV(Map> joinKeys, - Map joinedColStats, int numAttr) { + Map joinStats, int numAttr) { int joinColIdx = 0; while (numAttr > 0) { long minNDV = Long.MAX_VALUE; // find min NDV for joining columns for (Map.Entry> entry : joinKeys.entrySet()) { + int pos = entry.getKey(); String key = entry.getValue().get(joinColIdx); - ColStatistics cs = joinedColStats.get(key); + ColStatistics cs = joinStats.get(pos).getColumnStatisticsFromColName(key); if (cs != null && cs.getCountDistint() < minNDV) { minNDV = cs.getCountDistint(); } @@ -1516,8 +1489,9 @@ private void updateJoinColumnsNDV(Map> joinKeys, // set min NDV value to both columns involved in join if (minNDV != Long.MAX_VALUE) { for (Map.Entry> entry : joinKeys.entrySet()) { + int pos = entry.getKey(); String key = entry.getValue().get(joinColIdx); - ColStatistics cs = joinedColStats.get(key); + ColStatistics cs = joinStats.get(pos).getColumnStatisticsFromColName(key); if (cs != null) { cs.setCountDistint(minNDV); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java index c420190..2271e34 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java @@ -23,10 +23,8 @@ public class ColStatistics { - private String tabAlias; private String colName; private String colType; - private String fqColName; private long countDistint; private long numNulls; private double avgColLen; @@ -35,16 +33,14 @@ private Range range; private boolean isPrimaryKey; - public ColStatistics(String tabAlias, String colName, String colType) { - this.setTableAlias(tabAlias); + public ColStatistics(String colName, String colType) { this.setColumnName(colName); this.setColumnType(colType); - this.setFullyQualifiedColName(StatsUtils.getFullyQualifiedColumnName(tabAlias, colName)); this.setPrimaryKey(false); } public ColStatistics() { - this(null, null, null); + this(null, null); } public String getColumnName() { @@ -53,7 +49,6 @@ public String getColumnName() { public void setColumnName(String colName) { this.colName = colName; - this.fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName); } public String getColumnType() { @@ -88,23 +83,6 @@ public void setAvgColLen(double avgColLen) { this.avgColLen = avgColLen; } - public String getFullyQualifiedColName() { - return fqColName; - } - - public void setFullyQualifiedColName(String fqColName) { - this.fqColName = fqColName; - } - - public String getTableAlias() { - return tabAlias; - } - - public void setTableAlias(String tabName) { - this.tabAlias = tabName; - this.fqColName = StatsUtils.getFullyQualifiedColumnName(tabName, colName); - } - public long getNumTrues() { return numTrues; } @@ -136,8 +114,6 @@ public void setRange(Range r) { @Override public String toString() { StringBuilder sb = new StringBuilder(); - sb.append(" fqColName: "); - sb.append(fqColName); sb.append(" colName: "); sb.append(colName); sb.append(" colType: "); @@ -163,8 +139,7 @@ public String toString() { @Override public ColStatistics clone() throws CloneNotSupportedException { - ColStatistics clone = new ColStatistics(tabAlias, colName, colType); - clone.setFullyQualifiedColName(fqColName); + ColStatistics clone = new ColStatistics(colName, colType); clone.setAvgColLen(avgColLen); clone.setCountDistint(countDistint); clone.setNumNulls(numNulls); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java index f66279f..8d1547c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -176,7 +176,7 @@ public void addToColumnStats(List colStats) { ColStatistics updatedCS = null; if (cs != null) { - String key = cs.getFullyQualifiedColName(); + String key = cs.getColumnName(); // if column statistics for a column is already found then merge the statistics if (columnStats.containsKey(key) && columnStats.get(key) != null) { updatedCS = columnStats.get(key); @@ -229,14 +229,7 @@ public long getAvgRowSize() { return dataSize; } - - public ColStatistics getColumnStatisticsFromFQColName(String fqColName) { - if (columnStats != null) { - return columnStats.get(fqColName); - } - return null; - } - + public ColStatistics getColumnStatisticsFromColName(String colName) { if (columnStats == null) { return null; @@ -249,16 +242,10 @@ public ColStatistics getColumnStatisticsFromColName(String colName) { return null; } - public ColStatistics getColumnStatisticsForColumn(String tabAlias, String colName) { - String fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName); - return getColumnStatisticsFromFQColName(fqColName); - } - public List getColumnStats() { if (columnStats != null) { return Lists.newArrayList(columnStats.values()); } return null; } - } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 508d880..985d31c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -240,8 +240,7 @@ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList pa for (Partition part : partList.getNotDeniedPartns()) { partNames.add(part.getName()); } - Map colToTabAlias = new HashMap(); - neededColumns = processNeededColumns(schema, neededColumns, colToTabAlias); + neededColumns = processNeededColumns(schema, neededColumns); AggrStats aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), table.getTableName(), neededColumns, partNames); if (null == aggrStats) { @@ -262,8 +261,7 @@ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList pa LOG.debug("Column stats requested for : " + neededColumns.size() + " columns. Able to" + " retrieve for " + colStats.size() + " columns"); } - List columnStats = convertColStats(colStats, table.getTableName(), - colToTabAlias); + List columnStats = convertColStats(colStats, table.getTableName()); addParitionColumnStats(conf, neededColumns, referencedColumns, schema, table, partList, columnStats); @@ -355,8 +353,8 @@ private static void addParitionColumnStats(HiveConf conf, List neededCol // currently metastore does not store column stats for // partition column, so we calculate the NDV from pruned // partition list - ColStatistics partCS = new ColStatistics(table.getTableName(), - ci.getInternalName(), ci.getType().getTypeName()); + ColStatistics partCS = new ColStatistics(ci.getInternalName(), ci.getType() + .getTypeName()); long numPartitions = getNDVPartitionColumn(partList.getPartitions(), ci.getInternalName()); partCS.setCountDistint(numPartitions); @@ -532,7 +530,7 @@ public static long getSumIgnoreNegatives(List vals) { */ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tabName, String colName) { - ColStatistics cs = new ColStatistics(tabName, colName, cso.getColType()); + ColStatistics cs = new ColStatistics(colName, cso.getColType()); String colType = cso.getColType(); ColumnStatisticsData csd = cso.getStatsData(); if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME) @@ -613,13 +611,12 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tab Table table, List schema, List neededColumns) { String dbName = table.getDbName(); String tabName = table.getTableName(); - Map colToTabAlias = new HashMap(schema.size()); - List neededColsInTable = processNeededColumns(schema, neededColumns, colToTabAlias); + List neededColsInTable = processNeededColumns(schema, neededColumns); List stats = null; try { List colStat = Hive.get().getTableColumnStatistics( dbName, tabName, neededColsInTable); - stats = convertColStats(colStat, tabName, colToTabAlias); + stats = convertColStats(colStat, tabName); } catch (HiveException e) { LOG.error("Failed to retrieve table statistics: ", e); stats = null; @@ -627,28 +624,21 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tab return stats; } - private static List convertColStats(List colStats, String tabName, - Map colToTabAlias) { + private static List convertColStats(List colStats, String tabName) { List stats = new ArrayList(colStats.size()); for (ColumnStatisticsObj statObj : colStats) { ColStatistics cs = getColStatistics(statObj, tabName, statObj.getColName()); - cs.setTableAlias(colToTabAlias.get(cs.getColumnName())); stats.add(cs); } return stats; } private static List processNeededColumns(List schema, - List neededColumns, Map colToTabAlias) { - for (ColumnInfo col : schema) { - if (col.isHiddenVirtualCol()) continue; - colToTabAlias.put(col.getInternalName(), col.getTabAlias()); - } + List neededColumns) { // Remove hidden virtual columns, as well as needed columns that are not // part of the table. TODO: the latter case should not really happen... List neededColsInTable = null; int limit = neededColumns.size(); for (int i = 0; i < limit; ++i) { - if (colToTabAlias.containsKey(neededColumns.get(i))) continue; if (neededColsInTable == null) { neededColsInTable = Lists.newArrayList(neededColumns); } @@ -1013,12 +1003,10 @@ public static long getWritableSize(ObjectInspector oi, Object value) { if (colExprMap != null && rowSchema != null) { for (ColumnInfo ci : rowSchema.getSignature()) { String outColName = ci.getInternalName(); - String outTabAlias = ci.getTabAlias(); ExprNodeDesc end = colExprMap.get(outColName); ColStatistics colStat = getColStatisticsFromExpression(conf, parentStats, end); if (colStat != null) { colStat.setColumnName(outColName); - colStat.setTableAlias(outTabAlias); cs.add(colStat); } } @@ -1059,10 +1047,6 @@ public static long getWritableSize(ObjectInspector oi, Object value) { colStat = null; } if (colStat != null) { - ColumnInfo ci = rowSchema.getColumnInfo(colStat.getColumnName()); - if (ci != null) { - colStat.setTableAlias(ci.getTabAlias()); - } cs.add(colStat); } } @@ -1094,13 +1078,11 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis long numNulls = 0; ObjectInspector oi = null; long numRows = parentStats.getNumRows(); - String tabAlias = null; if (end instanceof ExprNodeColumnDesc) { // column projection ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; colName = encd.getColumn(); - tabAlias = encd.getTabAlias(); if (encd.getIsPartitionColOrVirtualCol()) { @@ -1117,7 +1099,7 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis } else { // clone the column stats and return - ColStatistics result = parentStats.getColumnStatisticsForColumn(tabAlias, colName); + ColStatistics result = parentStats.getColumnStatisticsFromColName(colName); if (result != null) { try { return result.clone(); @@ -1189,7 +1171,7 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis avgColSize = getAvgColLenOfFixedLengthTypes(colType); } - ColStatistics colStats = new ColStatistics(tabAlias, colName, colType); + ColStatistics colStats = new ColStatistics(colName, colType); colStats.setAvgColLen(avgColSize); colStats.setCountDistint(countDistincts); colStats.setNumNulls(numNulls); @@ -1372,81 +1354,6 @@ private static String getFullyQualifiedName(String... names) { return Joiner.on(".").join(nonNullAndEmptyNames); } - /** - * Get fully qualified column name from output key column names and column expression map - * @param keyExprs - * - output key names - * @param map - * - column expression map - * @return list of fully qualified names - */ - public static List getFullyQualifedReducerKeyNames(List keyExprs, - Map map) { - List result = Lists.newArrayList(); - if (keyExprs != null) { - for (String key : keyExprs) { - String colName = key; - ExprNodeDesc end = map.get(colName); - // if we couldn't get expression try prepending "KEY." prefix to reducer key column names - if (end == null) { - colName = Utilities.ReduceField.KEY.toString() + "." + key; - end = map.get(colName); - if (end == null) { - continue; - } - } - if (end instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; - String tabAlias = encd.getTabAlias(); - result.add(getFullyQualifiedColumnName(tabAlias, colName)); - } else if (end instanceof ExprNodeGenericFuncDesc) { - ExprNodeGenericFuncDesc enf = (ExprNodeGenericFuncDesc) end; - String tabAlias = ""; - for (ExprNodeDesc childEnd : enf.getChildren()) { - if (childEnd instanceof ExprNodeColumnDesc) { - tabAlias = ((ExprNodeColumnDesc) childEnd).getTabAlias(); - break; - } - } - result.add(getFullyQualifiedColumnName(tabAlias, colName)); - } else if (end instanceof ExprNodeConstantDesc) { - ExprNodeConstantDesc encd = (ExprNodeConstantDesc) end; - result.add(encd.getValue().toString()); - } - } - } - return result; - } - - /** - * Returns all table aliases from expression nodes - * @param columnExprMap - column expression map - * @return - */ - public static Set getAllTableAlias( - Map columnExprMap) { - Set result = new HashSet(); - if (columnExprMap != null) { - for (ExprNodeDesc end : columnExprMap.values()) { - getTableAliasFromExprNode(end, result); - } - } - return result; - } - - private static void getTableAliasFromExprNode(ExprNodeDesc end, - Set output) { - - if (end instanceof ExprNodeColumnDesc) { - output.add(((ExprNodeColumnDesc) end).getTabAlias()); - } else if (end instanceof ExprNodeGenericFuncDesc) { - for (ExprNodeDesc child : end.getChildren()) { - getTableAliasFromExprNode(child, output); - } - } - - } - public static long getAvailableMemory(Configuration conf) { int memory = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE); if (memory <= 0) {