Index: conf/hive-default.xml =================================================================== --- conf/hive-default.xml (revision 1157970) +++ conf/hive-default.xml (working copy) @@ -1145,6 +1145,19 @@ + hive.autogen.columnalias.prefix.label + _c + String used as a prefix when auto generating column alias. + By default the prefix label will be appended with a column position number to form the column alias. Auto generation would happen if an aggregate function is used in a select clause without an explicit alias. + + + + hive.autogen.columnalias.prefix.includefuncname + false + Whether to include function name in the column alias auto generated by hive. + + + hive.exec.perf.logger org.apache.hadoop.hive.ql.log.PerfLogger The class responsible logging client side performance metrics. Must be a subclass of org.apache.hadoop.hive.ql.log.PerfLogger Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1157970) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -461,6 +461,11 @@ HIVE_REWORK_MAPREDWORK("hive.rework.mapredwork", false), HIVE_CONCATENATE_CHECK_INDEX ("hive.exec.concatenate.check.index", true), + //prefix used to auto generated column aliases + HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL("hive.autogen.columnalias.prefix.label", "_c"), + HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME( + "hive.autogen.columnalias.prefix.includefuncname", false), + // The class responsible for logging client side performance metrics // Must be a subclass of org.apache.hadoop.hive.ql.log.PerfLogger HIVE_PERF_LOGGER("hive.exec.perf.logger", "org.apache.hadoop.hive.ql.log.PerfLogger"), Index: ql/src/test/results/clientpositive/autogen_colalias.q.out =================================================================== --- ql/src/test/results/clientpositive/autogen_colalias.q.out (revision 0) +++ ql/src/test/results/clientpositive/autogen_colalias.q.out (revision 0) @@ -0,0 +1,82 @@ +PREHOOK: query: CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax' +PREHOOK: type: CREATEFUNCTION +POSTHOOK: query: CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax' +POSTHOOK: type: CREATEFUNCTION +PREHOOK: query: create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1, +1, +0) + from src group by src.key +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1, +1, +0) + from src group by src.key +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_grouped_old1 +PREHOOK: query: describe dest_grouped_old1 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe dest_grouped_old1 +POSTHOOK: type: DESCTABLE +_c0 int +zz int +key string +_c3 int +_c4 bigint +_c5 double +_c6 bigint +_c7 bigint +_c8 int +_c9 int +PREHOOK: query: create table dest_grouped_old2 as select distinct src.key from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table dest_grouped_old2 as select distinct src.key from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_grouped_old2 +PREHOOK: query: describe dest_grouped_old2 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe dest_grouped_old2 +POSTHOOK: type: DESCTABLE +key string +PREHOOK: query: create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10, + (src.key +5) % 2, +0) +from src group by src.key +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10, + (src.key +5) % 2, +0) +from src group by src.key +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_grouped_new1 +PREHOOK: query: describe dest_grouped_new1 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe dest_grouped_new1 +POSTHOOK: type: DESCTABLE +column_0 int +zz int +column_2 double +test_max_length_src__3 int +count_src_value_4 bigint +sin_count_src_value_5 double +count_sin_src_value_6 bigint +unix_timestamp_7 bigint +sum_if_value_10_valu_8 int +if_src_key_10_src_ke_9 double +PREHOOK: query: create table dest_grouped_new2 as select distinct src.key from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table dest_grouped_new2 as select distinct src.key from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_grouped_new2 +PREHOOK: query: describe dest_grouped_new2 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe dest_grouped_new2 +POSTHOOK: type: DESCTABLE +key string Index: ql/src/test/queries/clientpositive/autogen_colalias.q =================================================================== --- ql/src/test/queries/clientpositive/autogen_colalias.q (revision 0) +++ ql/src/test/queries/clientpositive/autogen_colalias.q (revision 0) @@ -0,0 +1,22 @@ +CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax'; + +create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1, +1, +0) + from src group by src.key; +describe dest_grouped_old1; + +create table dest_grouped_old2 as select distinct src.key from src; +describe dest_grouped_old2; + +set hive.autogen.columnalias.prefix.label=column_; +set hive.autogen.columnalias.prefix.includefuncname=true; + +create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10, + (src.key +5) % 2, +0) +from src group by src.key; +describe dest_grouped_new1; + +create table dest_grouped_new2 as select distinct src.key from src; +describe dest_grouped_new2; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1157970) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -200,6 +200,13 @@ private final UnparseTranslator unparseTranslator; private final GlobalLimitCtx globalLimitCtx = new GlobalLimitCtx(); + //prefix for column names auto generated by hive + private final String autogenColAliasPrfxLbl; + private final boolean autogenColAliasPrfxIncludeFuncName; + + //Max characters when auto generating the column name with func name + private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20; + public static class GlobalLimitCtx { private boolean enable = false; private int globalLimit = -1; @@ -268,6 +275,10 @@ groupOpToInputTables = new HashMap>(); prunedPartitions = new HashMap(); unparseTranslator = new UnparseTranslator(); + autogenColAliasPrfxLbl = HiveConf.getVar(conf, + HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL); + autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf, + HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME); } @Override @@ -1936,7 +1947,7 @@ } private static String[] getColAlias(ASTNode selExpr, String defaultName, - RowResolver inputRR) { + RowResolver inputRR, boolean includeFuncName, int colNum) { String colAlias = null; String tabAlias = null; String[] colRef = new String[2]; @@ -1973,9 +1984,29 @@ } } + //if specified generate alias using func name + if(includeFuncName && (root.getType() == HiveParser.TOK_FUNCTION)){ + + String expr_flattened = root.toStringTree(); + + //remove all TOK tokens + String expr_no_tok = expr_flattened.replaceAll("TOK_\\S+", ""); + + //remove all non alphanumeric letters, replace whitespace spans with underscore + String expr_formatted = expr_no_tok.replaceAll("\\W", " ").trim().replaceAll("\\s+", "_"); + + //limit length to 20 chars + if(expr_formatted.length()>AUTOGEN_COLALIAS_PRFX_MAXLENGTH) { + expr_formatted = expr_formatted.substring(0, AUTOGEN_COLALIAS_PRFX_MAXLENGTH); + } + + //append colnum to make it unique + colAlias = expr_formatted.concat("_" + colNum); + } + if (colAlias == null) { // Return defaultName if selExpr is not a simple xx.yy.zz - colAlias = defaultName; + colAlias = defaultName + colNum; } colRef[0] = tabAlias; @@ -2151,18 +2182,20 @@ if (isInTransform || isUDTF) { tabAlias = null; - colAlias = "_C" + i; + colAlias = autogenColAliasPrfxLbl + i; expr = child; } else { - String[] colRef = getColAlias(child, "_C" + i, inputRR); + // Get rid of TOK_SELEXPR + expr = (ASTNode) child.getChild(0); + String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR, + autogenColAliasPrfxIncludeFuncName, i); tabAlias = colRef[0]; colAlias = colRef[1]; if (hasAsClause) { unparseTranslator.addIdentifierTranslation((ASTNode) child .getChild(1)); } - // Get rid of TOK_SELEXPR - expr = (ASTNode) child.getChild(0); + } if (expr.getType() == HiveParser.TOK_ALLCOLREF) {