Index: conf/hive-default.xml
===================================================================
--- conf/hive-default.xml (revision 1157970)
+++ conf/hive-default.xml (working copy)
@@ -1145,6 +1145,19 @@
+ hive.autogen.columnalias.prefix.label
+ _c
+ String used as a prefix when auto generating column alias.
+ By default the prefix label will be appended with a column position number to form the column alias. Auto generation would happen if an aggregate function is used in a select clause without an explicit alias.
+
+
+
+ hive.autogen.columnalias.prefix.includefuncname
+ false
+ Whether to include function name in the column alias auto generated by hive.
+
+
+
hive.exec.perf.logger
org.apache.hadoop.hive.ql.log.PerfLogger
The class responsible logging client side performance metrics. Must be a subclass of org.apache.hadoop.hive.ql.log.PerfLogger
Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
===================================================================
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1157970)
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy)
@@ -461,6 +461,11 @@
HIVE_REWORK_MAPREDWORK("hive.rework.mapredwork", false),
HIVE_CONCATENATE_CHECK_INDEX ("hive.exec.concatenate.check.index", true),
+ //prefix used to auto generated column aliases
+ HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL("hive.autogen.columnalias.prefix.label", "_c"),
+ HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME(
+ "hive.autogen.columnalias.prefix.includefuncname", false),
+
// The class responsible for logging client side performance metrics
// Must be a subclass of org.apache.hadoop.hive.ql.log.PerfLogger
HIVE_PERF_LOGGER("hive.exec.perf.logger", "org.apache.hadoop.hive.ql.log.PerfLogger"),
Index: ql/src/test/results/clientpositive/autogen_colalias.q.out
===================================================================
--- ql/src/test/results/clientpositive/autogen_colalias.q.out (revision 0)
+++ ql/src/test/results/clientpositive/autogen_colalias.q.out (revision 0)
@@ -0,0 +1,82 @@
+PREHOOK: query: CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax'
+PREHOOK: type: CREATEFUNCTION
+POSTHOOK: query: CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax'
+POSTHOOK: type: CREATEFUNCTION
+PREHOOK: query: create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1,
+1,
+0)
+ from src group by src.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1,
+1,
+0)
+ from src group by src.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_grouped_old1
+PREHOOK: query: describe dest_grouped_old1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe dest_grouped_old1
+POSTHOOK: type: DESCTABLE
+_c0 int
+zz int
+key string
+_c3 int
+_c4 bigint
+_c5 double
+_c6 bigint
+_c7 bigint
+_c8 int
+_c9 int
+PREHOOK: query: create table dest_grouped_old2 as select distinct src.key from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: create table dest_grouped_old2 as select distinct src.key from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_grouped_old2
+PREHOOK: query: describe dest_grouped_old2
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe dest_grouped_old2
+POSTHOOK: type: DESCTABLE
+key string
+PREHOOK: query: create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10,
+ (src.key +5) % 2,
+0)
+from src group by src.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10,
+ (src.key +5) % 2,
+0)
+from src group by src.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_grouped_new1
+PREHOOK: query: describe dest_grouped_new1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe dest_grouped_new1
+POSTHOOK: type: DESCTABLE
+column_0 int
+zz int
+column_2 double
+test_max_length_src__3 int
+count_src_value_4 bigint
+sin_count_src_value_5 double
+count_sin_src_value_6 bigint
+unix_timestamp_7 bigint
+sum_if_value_10_valu_8 int
+if_src_key_10_src_ke_9 double
+PREHOOK: query: create table dest_grouped_new2 as select distinct src.key from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: create table dest_grouped_new2 as select distinct src.key from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_grouped_new2
+PREHOOK: query: describe dest_grouped_new2
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe dest_grouped_new2
+POSTHOOK: type: DESCTABLE
+key string
Index: ql/src/test/queries/clientpositive/autogen_colalias.q
===================================================================
--- ql/src/test/queries/clientpositive/autogen_colalias.q (revision 0)
+++ ql/src/test/queries/clientpositive/autogen_colalias.q (revision 0)
@@ -0,0 +1,22 @@
+CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax';
+
+create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1,
+1,
+0)
+ from src group by src.key;
+describe dest_grouped_old1;
+
+create table dest_grouped_old2 as select distinct src.key from src;
+describe dest_grouped_old2;
+
+set hive.autogen.columnalias.prefix.label=column_;
+set hive.autogen.columnalias.prefix.includefuncname=true;
+
+create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10,
+ (src.key +5) % 2,
+0)
+from src group by src.key;
+describe dest_grouped_new1;
+
+create table dest_grouped_new2 as select distinct src.key from src;
+describe dest_grouped_new2;
Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1157970)
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy)
@@ -200,6 +200,13 @@
private final UnparseTranslator unparseTranslator;
private final GlobalLimitCtx globalLimitCtx = new GlobalLimitCtx();
+ //prefix for column names auto generated by hive
+ private final String autogenColAliasPrfxLbl;
+ private final boolean autogenColAliasPrfxIncludeFuncName;
+
+ //Max characters when auto generating the column name with func name
+ private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20;
+
public static class GlobalLimitCtx {
private boolean enable = false;
private int globalLimit = -1;
@@ -268,6 +275,10 @@
groupOpToInputTables = new HashMap>();
prunedPartitions = new HashMap();
unparseTranslator = new UnparseTranslator();
+ autogenColAliasPrfxLbl = HiveConf.getVar(conf,
+ HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL);
+ autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf,
+ HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME);
}
@Override
@@ -1936,7 +1947,7 @@
}
private static String[] getColAlias(ASTNode selExpr, String defaultName,
- RowResolver inputRR) {
+ RowResolver inputRR, boolean includeFuncName, int colNum) {
String colAlias = null;
String tabAlias = null;
String[] colRef = new String[2];
@@ -1973,9 +1984,29 @@
}
}
+ //if specified generate alias using func name
+ if(includeFuncName && (root.getType() == HiveParser.TOK_FUNCTION)){
+
+ String expr_flattened = root.toStringTree();
+
+ //remove all TOK tokens
+ String expr_no_tok = expr_flattened.replaceAll("TOK_\\S+", "");
+
+ //remove all non alphanumeric letters, replace whitespace spans with underscore
+ String expr_formatted = expr_no_tok.replaceAll("\\W", " ").trim().replaceAll("\\s+", "_");
+
+ //limit length to 20 chars
+ if(expr_formatted.length()>AUTOGEN_COLALIAS_PRFX_MAXLENGTH) {
+ expr_formatted = expr_formatted.substring(0, AUTOGEN_COLALIAS_PRFX_MAXLENGTH);
+ }
+
+ //append colnum to make it unique
+ colAlias = expr_formatted.concat("_" + colNum);
+ }
+
if (colAlias == null) {
// Return defaultName if selExpr is not a simple xx.yy.zz
- colAlias = defaultName;
+ colAlias = defaultName + colNum;
}
colRef[0] = tabAlias;
@@ -2151,18 +2182,20 @@
if (isInTransform || isUDTF) {
tabAlias = null;
- colAlias = "_C" + i;
+ colAlias = autogenColAliasPrfxLbl + i;
expr = child;
} else {
- String[] colRef = getColAlias(child, "_C" + i, inputRR);
+ // Get rid of TOK_SELEXPR
+ expr = (ASTNode) child.getChild(0);
+ String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR,
+ autogenColAliasPrfxIncludeFuncName, i);
tabAlias = colRef[0];
colAlias = colRef[1];
if (hasAsClause) {
unparseTranslator.addIdentifierTranslation((ASTNode) child
.getChild(1));
}
- // Get rid of TOK_SELEXPR
- expr = (ASTNode) child.getChild(0);
+
}
if (expr.getType() == HiveParser.TOK_ALLCOLREF) {