diff --git hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4 hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4 index 8a351bd..eae45c5 100644 --- hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4 +++ hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4 @@ -91,6 +91,7 @@ stmt : | rollback_stmt | select_stmt | signal_stmt + | summary_stmt | update_stmt | use_stmt | truncate_stmt @@ -733,6 +734,10 @@ set_teradata_session_option : signal_stmt : // SIGNAL statement T_SIGNAL ident ; + +summary_stmt : // SUMMARY statement + T_SUMMARY (T_TOP expr)? T_FOR (select_stmt | table_name where_clause? (T_LIMIT expr)?) + ; truncate_stmt : T_TRUNCATE T_TABLE? table_name @@ -1506,6 +1511,7 @@ non_reserved_words : // Tokens that are not reserved words | T_SUBDIR | T_SUBSTRING | T_SUM + | T_SUMMARY | T_SYSDATE | T_SYS_REFCURSOR | T_TABLE @@ -1825,6 +1831,7 @@ T_STRING : S T R I N G ; T_SUBDIR : S U B D I R ; T_SUBSTRING : S U B S T R I N G ; T_SUM : S U M ; +T_SUMMARY : S U M M A R Y ; T_SYS_REFCURSOR : S Y S '_' R E F C U R S O R ; T_TABLE : T A B L E ; T_TABLESPACE : T A B L E S P A C E ; diff --git hplsql/src/main/java/org/apache/hive/hplsql/Exec.java hplsql/src/main/java/org/apache/hive/hplsql/Exec.java index 2ad3ea3..fdc75d5 100644 --- hplsql/src/main/java/org/apache/hive/hplsql/Exec.java +++ hplsql/src/main/java/org/apache/hive/hplsql/Exec.java @@ -1846,6 +1846,14 @@ public Integer visitQuit_stmt(HplsqlParser.Quit_stmtContext ctx) { @Override public Integer visitSignal_stmt(HplsqlParser.Signal_stmtContext ctx) { return exec.stmt.signal(ctx); + } + + /** + * SUMMARY statement + */ + @Override + public Integer visitSummary_stmt(HplsqlParser.Summary_stmtContext ctx) { + return exec.stmt.summary(ctx); } /** diff --git hplsql/src/main/java/org/apache/hive/hplsql/Meta.java hplsql/src/main/java/org/apache/hive/hplsql/Meta.java index 46bd55a..e9e02a2 100644 --- hplsql/src/main/java/org/apache/hive/hplsql/Meta.java +++ hplsql/src/main/java/org/apache/hive/hplsql/Meta.java @@ -88,6 +88,66 @@ Row getRowDataType(ParserRuleContext ctx, String conn, String table) { } /** + * Get data types for all columns of the SELECT statement + */ + Row getRowDataTypeForSelect(ParserRuleContext ctx, String conn, String select) { + Row row = null; + Conn.Type connType = exec.getConnectionType(conn); + // Hive does not support ResultSetMetaData on PreparedStatement, and Hive DESCRIBE + // does not support queries, so we have to execute the query with LIMIT 1 + if (connType == Conn.Type.HIVE) { + String sql = "SELECT * FROM (" + select + ") t LIMIT 1"; + Query query = new Query(sql); + exec.executeQuery(ctx, query, conn); + if (!query.error()) { + ResultSet rs = query.getResultSet(); + try { + ResultSetMetaData rm = rs.getMetaData(); + int cols = rm.getColumnCount(); + row = new Row(); + for (int i = 1; i <= cols; i++) { + String name = rm.getColumnName(i); + if (name.startsWith("t.")) { + name = name.substring(2); + } + row.addColumn(name, rm.getColumnTypeName(i)); + } + } + catch (Exception e) { + exec.signal(e); + } + } + else { + exec.signal(query.getException()); + } + exec.closeQuery(query, conn); + } + else { + Query query = exec.prepareQuery(ctx, select, conn); + if (!query.error()) { + try { + PreparedStatement stmt = query.getPreparedStatement(); + ResultSetMetaData rm = stmt.getMetaData(); + int cols = rm.getColumnCount(); + for (int i = 1; i <= cols; i++) { + String col = rm.getColumnName(i); + String typ = rm.getColumnTypeName(i); + if (row == null) { + row = new Row(); + } + row.addColumn(col.toUpperCase(), typ); + } + } + catch (Exception e) { + exec.signal(e); + } + } + exec.closeQuery(query, conn); + } + return row; + } + + /** * Read the column data from the database and cache it */ Row readColumns(ParserRuleContext ctx, String conn, String table, HashMap map) { @@ -106,11 +166,20 @@ Row readColumns(ParserRuleContext ctx, String conn, String table, HashMap maxColName) { + maxColName = col.length(); + } + } + if (table != null) { + sql.append(" FROM (SELECT * FROM " + table); + if (ctx.where_clause() != null) { + sql.append(" " + evalPop(ctx.where_clause()).toString()); + } + if (ctx.T_LIMIT() != null) { + sql.append(" LIMIT "); + int limExp = 0; + if (ctx.T_TOP() != null) { + limExp = 1; + } + sql.append(evalPop(ctx.expr(limExp)).toString()); + } + sql.append(") t"); + } + else { + sql.append(" FROM (" + select + ") t"); + } + Query query = exec.executeQuery(ctx, sql.toString(), conn); + if (query.error()) { + exec.signal(query); + return 1; + } + exec.setSqlSuccess(); + try { + ResultSet rs = query.getResultSet(); + if (rs != null) { + System.out.print("\n"); + // The summary query returns only one row + if (rs.next()) { + int i = 0, cc = 11; + String cntRows = rs.getString(1); + // Pad output + String fmt = String.format("%%-%ds\t%%-11s\t%%-11s\t%%-11s\t%%-11s\t%%-11s\t%%-11s\t%%-11s\t%%-11s" + + "\t%%-11s\t%%-11s\t%%-11s\t%%-11s\t%%-11s\n", maxColName + 1); + System.out.print(String.format(fmt, "Column", "Type", "Rows", "NonNull", "Unique", "Avg", + "Min", "Max", "StdDev", "p05", "p25", "p50", "p75", "p95")); + for(Column c : row.getColumns()) { + String avg = String.format("%.2f", rs.getDouble(4 + i*cc)); + if (rs.wasNull()) + avg = "null"; + String stddev = String.format("%.2f", rs.getDouble(7 + i*cc)); + if (rs.wasNull()) + stddev = "null"; + String p05 = String.format("%.2f", rs.getDouble(8 + i*cc)); + if (rs.wasNull()) + p05 = "null"; + String p25 = String.format("%.2f", rs.getDouble(9 + i*cc)); + if (rs.wasNull()) + p25 = "null"; + String p50 = String.format("%.2f", rs.getDouble(10 + i*cc)); + if (rs.wasNull()) + p50 = "null"; + String p75 = String.format("%.2f", rs.getDouble(11 + i*cc)); + if (rs.wasNull()) + p75 = "null"; + String p95 = String.format("%.2f", rs.getDouble(12 + i*cc)); + if (rs.wasNull()) + p95 = "null"; + System.out.print(String.format(fmt, c.getName(), c.getType(), cntRows, rs.getString(2 + i*cc), + rs.getString(3 + i*cc), avg, rs.getString(5 + i*cc), rs.getString(6 + i*cc), + stddev, p05, p25, p50, p75, p95)); + i++; + } + } + } + } + catch (SQLException e) { + exec.signal(e); + exec.closeQuery(query, conn); + return 1; + } + exec.closeQuery(query, conn); + return 0; + } + + // Summary for top column values + public Integer summaryTop(HplsqlParser.Summary_stmtContext ctx, String table, String select, + Row row, String conn, Conn.Type connType) { + StringBuilder sql = new StringBuilder("SELECT id, col, cnt FROM (" + + "SELECT id, col, cnt, ROW_NUMBER() OVER (PARTITION BY id ORDER BY cnt DESC) rn " + + "FROM (SELECT CAST(GROUPING__ID AS DECIMAL) id, COALESCE("); // CAST AS INT does not work as expected (ID is still considered as STRING in ORDER BY for some reason) + int topNum = evalPop(ctx.expr(0)).intValue(); + StringBuilder colsList = new StringBuilder(); + StringBuilder colsGrList = new StringBuilder(); + int i = 0; + for(Column c : row.getColumns()) { + String col = c.getName(); + if (connType == Conn.Type.HIVE) { + col = '`' + col + '`'; + } + if (i != 0) { + colsList.append(","); + colsGrList.append(","); + } + colsList.append(col); + colsGrList.append("(" + col + ")"); + i++; + } + sql.append(colsList); + sql.append(") col, COUNT(*) cnt"); + if (table != null) { + sql.append(" FROM (SELECT * FROM " + table); + if (ctx.where_clause() != null) { + sql.append(" " + evalPop(ctx.where_clause()).toString()); + } + if (ctx.T_LIMIT() != null) { + sql.append(" LIMIT " + evalPop(ctx.expr(1)).toString()); + } + sql.append(") t"); + } + else { + sql.append(" FROM (" + select + ") t"); + } + sql.append(" GROUP BY "); + sql.append(colsList); + sql.append(" GROUPING SETS ("); + sql.append(colsGrList); + sql.append(")) t) t WHERE rn <= " + topNum + " ORDER BY id, cnt DESC"); + // Add LIMIT as Order by-s without limit can disabled for safety reasons + sql.append(" LIMIT " + topNum * row.size()); + Query query = exec.executeQuery(ctx, sql.toString(), conn); + if (query.error()) { + exec.signal(query); + return 1; + } + exec.setSqlSuccess(); + try { + ResultSet rs = query.getResultSet(); + if (rs != null) { + int prevId = -1; + int grRow = 0; + int colNum = 0; + int maxLen = row.getColumn(colNum).getName().length(); + ArrayList outCols = new ArrayList(); + ArrayList outCnts = new ArrayList(); + ArrayList outLens = new ArrayList(); + while (rs.next()) { + int id = rs.getInt(1); + String value = rs.getString(2); + int cnt = rs.getInt(3); + if (prevId == -1) { + prevId = id; + } + // Still the same column + if (id == prevId) { + outCols.add(value); + outCnts.add(cnt); + if (value != null && value.length() > maxLen) { + maxLen = value.length() < 300 ? value.length() : 300; + } + grRow++; + } + // First value for next column + else { + // Pad with empty rows if the number of values in group is less than TOP num + for (int j = grRow; j < topNum; j++) { + outCols.add(""); + outCnts.add(0); + grRow++; + } + outCols.add(value); + outCnts.add(cnt); + outLens.add(maxLen); + colNum++; + maxLen = row.getColumn(colNum).getName().length(); + if (value != null && value.length() > maxLen) { + maxLen = value.length() < 300 ? value.length() : 300; + } + grRow = 1; + prevId = id; + } + } + for (int j = grRow; j < topNum; j++) { + outCols.add(""); + outCnts.add(0); + grRow++; + } + if (maxLen != 0) { + outLens.add(maxLen); + } + System.out.print("\n"); + // Output header + i = 0; + for(Column c : row.getColumns()) { + if (i != 0) { + System.out.print("\t"); + } + String fmt = String.format("%%-%ds", outLens.get(i) + 11 + 3); + System.out.print(String.format(fmt, c.getName())); + i++; + } + System.out.print("\n"); + // Output top values + for (int j = 0; j < topNum; j++) { + for(int k = 0; k < row.size(); k++) { + if (k != 0) { + System.out.print("\t"); + } + int cnt = outCnts.get(j + k * topNum); + if (cnt != 0) { // skip padded values + String fmt = String.format("%%-%ds", outLens.get(k)); + System.out.print(String.format(fmt, outCols.get(j + k * topNum))); + System.out.print(String.format(" %-11d", cnt)); + } + else { + String fmt = String.format("%%-%ds", outLens.get(k) + 11 + 3); + System.out.print(String.format(fmt, "")); + } + } + System.out.print("\n"); + } + } + } + catch (SQLException e) { + exec.signal(e); + exec.closeQuery(query, conn); + return 1; + } + exec.closeQuery(query, conn); + return 0; + } + /** * RESIGNAL statement */ diff --git hplsql/src/main/resources/hplsql-site.xml hplsql/src/main/resources/hplsql-site.xml index 96843dc..aca54e2 100644 --- hplsql/src/main/resources/hplsql-site.xml +++ hplsql/src/main/resources/hplsql-site.xml @@ -35,8 +35,6 @@ hplsql.conn.init.hive2conn - set hive.execution.engine=mr; - use default; Statements for execute after connection to the database