Index: src/contrib/hive/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java =================================================================== --- src/contrib/hive/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java (revision 122177) +++ src/contrib/hive/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java (working copy) @@ -85,27 +85,23 @@ } else { ret = qp.run(cmd); Vector> res = new Vector>(); - while (qp.getResults(res)) - { - SessionState ss = SessionState.get(); - OutputStream out = ss.out; - try + while (qp.getResults(res)) { + SessionState ss = SessionState.get(); + PrintStream out = ss.out; + + for (Vector row:res) { - - for (Vector row:res) + boolean firstCol = true; + for (String col:row) { - for (String col:row) - { - out.write(col == null ? Utilities.nullStringOutput.getBytes() : col.getBytes()); + if (!firstCol) out.write(Utilities.tabCode); - } - out.write(Utilities.newLineCode); - } - res.clear(); - - } catch (IOException e) { - e.printStackTrace(); + out.print(col == null ? Utilities.nullStringOutput.getBytes() : col); + firstCol = false; + } + out.write(Utilities.newLineCode); } + res.clear(); } } return ret; @@ -151,16 +147,20 @@ SessionState.initHiveLog4j(); CliSessionState ss = new CliSessionState (new HiveConf(SessionState.class)); + ss.in = System.in; + try { + ss.out = new PrintStream(System.out, true, "UTF-8"); + ss.err = new PrintStream(System.err, true, "UTF-8"); + } catch (UnsupportedEncodingException e) { + System.exit(3); + } + SessionState.start(ss); if(! oproc.process_stage2(ss)) { System.exit(2); } - ss.in = System.in; - ss.out = System.out; - ss.err = System.err; - sp = new SetProcessor(); qp = new Driver(); Index: src/contrib/hive/data/files/kv4.txt =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Property changes on: src/contrib/hive/data/files/kv4.txt ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Index: src/contrib/hive/ql/src/test/results/clientpositive/inputddl4.q.out =================================================================== --- src/contrib/hive/ql/src/test/results/clientpositive/inputddl4.q.out (revision 0) +++ src/contrib/hive/ql/src/test/results/clientpositive/inputddl4.q.out (revision 0) @@ -0,0 +1,9 @@ +viewtime datetime +userid int +page_url string +referrer_url string +friends array +properties map +ip string 'IP Address of the User' +ds datetime +country string Index: src/contrib/hive/ql/src/test/results/clientpositive/inputddl5.q.out =================================================================== --- src/contrib/hive/ql/src/test/results/clientpositive/inputddl5.q.out (revision 0) +++ src/contrib/hive/ql/src/test/results/clientpositive/inputddl5.q.out (revision 0) @@ -0,0 +1,3 @@ +name string +邵铮 +1 Index: src/contrib/hive/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java =================================================================== --- src/contrib/hive/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (revision 122177) +++ src/contrib/hive/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (working copy) @@ -369,7 +369,7 @@ outf = new File(logDir); outf = new File(outf, qf.getName().concat(".out")); FileOutputStream fo = new FileOutputStream(outf); - ss.out = new PrintStream(fo); + ss.out = new PrintStream(fo, true, "UTF-8"); ss.setIsSilent(true); cliDriver = new CliDriver(ss); SessionState.start(ss); Index: src/contrib/hive/ql/src/test/queries/clientpositive/inputddl4.q =================================================================== --- src/contrib/hive/ql/src/test/queries/clientpositive/inputddl4.q (revision 0) +++ src/contrib/hive/ql/src/test/queries/clientpositive/inputddl4.q (revision 0) @@ -0,0 +1,10 @@ +-- a simple test to test sorted/clustered syntax +CREATE TABLE INPUTDDL4(viewTime DATETIME, userid INT, + page_url STRING, referrer_url STRING, + friends ARRAY, properties MAP, + ip STRING COMMENT 'IP Address of the User') + COMMENT 'This is the page view table' + PARTITIONED BY(ds DATETIME, country STRING) + CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS; +DESCRIBE INPUTDDL4; +DROP TABLE INPUTDDL4; Index: src/contrib/hive/ql/src/test/queries/clientpositive/inputddl5.q =================================================================== --- src/contrib/hive/ql/src/test/queries/clientpositive/inputddl5.q (revision 0) +++ src/contrib/hive/ql/src/test/queries/clientpositive/inputddl5.q (revision 0) @@ -0,0 +1,8 @@ +-- test for internationalization +-- kv4.txt contains the utf-8 character 0xE982B5E993AE which we are verifying later on +CREATE TABLE INPUTDDL5(name STRING); +LOAD DATA LOCAL INPATH '../data/files/kv4.txt' INTO TABLE INPUTDDL5; +DESCRIBE INPUTDDL5; +SELECT INPUTDDL5.name from INPUTDDL5; +SELECT count(1) FROM INPUTDDL5 WHERE INPUTDDL5.name = _UTF-8 0xE982B5E993AE; +DROP TABLE INPUTDDL5; Index: src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java =================================================================== --- src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (revision 122177) +++ src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (working copy) @@ -347,11 +347,7 @@ getTTable().getSd().setBucketCols(bucketCols); } - public void setSortCols(List sortCols) throws HiveException { - List sortOrder = new ArrayList(); - for (String col : sortCols) { - sortOrder.add(new Order(col, 1)); - } + public void setSortCols(List sortOrder) throws HiveException { getTTable().getSd().setSortCols(sortOrder); } Index: src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java =================================================================== --- src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (revision 122177) +++ src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (working copy) @@ -50,6 +50,8 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; +import org.apache.commons.logging.LogFactory; +import org.apache.commons.logging.Log; @SuppressWarnings("nls") public class Utilities { @@ -60,6 +62,7 @@ public static enum ReduceField { KEY, VALUE, ALIAS }; private static volatile mapredWork gWork = null; + static final private Log LOG = LogFactory.getLog("hive.ql.exec.Utilities"); public static void clearMapRedWork (Configuration job) { try { @@ -375,6 +378,31 @@ return prefix+suffix; } + + public final static String NSTR = ""; + public static enum streamStatus {EOF, TERMINATED, NORMAL} + public static streamStatus readColumn(DataInput in, OutputStream out) throws IOException { + + while (true) { + int b; + try { + b = (int)in.readByte(); + } catch (EOFException e) { + return streamStatus.EOF; + } + + if (b == Utilities.newLineCode) { + return streamStatus.TERMINATED; + } + + if (b == Utilities.ctrlaCode) { + return streamStatus.NORMAL; + } + + out.write(b); + } + // Unreachable + } public static OutputStream createCompressedStream(JobConf jc, OutputStream out) throws IOException { @@ -405,5 +433,4 @@ keyClass, valClass, compressionType, codec)); } - } Index: src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java =================================================================== --- src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (revision 122177) +++ src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (working copy) @@ -18,8 +18,8 @@ package org.apache.hadoop.hive.ql.exec; +import java.io.DataOutput; import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; import java.io.Serializable; import java.util.Iterator; @@ -27,6 +27,8 @@ import java.util.SortedSet; import java.util.TreeSet; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -47,6 +49,7 @@ import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.hive.ql.metadata.InvalidTableException; +import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.util.StringUtils; @@ -75,8 +78,10 @@ // Create the db Hive db; + FileSystem fs; try { db = Hive.get(conf); + fs = FileSystem.get(conf); createTableDesc crtTbl = work.getCreateTblDesc(); if (crtTbl != null) { @@ -118,7 +123,42 @@ if (crtTbl.isExternal()) tbl.setProperty("EXTERNAL", "TRUE"); - + + // If the sorted columns is a superset of bucketed columns, store this fact. It can be later used to + // optimize some group-by queries. Note that, the order does not matter as long as it in the first + // 'n' columns where 'n' is the length of the bucketed columns. + if ((tbl.getBucketCols() != null) && (tbl.getSortCols() != null)) + { + List bucketCols = tbl.getBucketCols(); + List sortCols = tbl.getSortCols(); + + if (sortCols.size() >= bucketCols.size()) + { + boolean found = true; + + Iterator iterBucketCols = bucketCols.iterator(); + while (iterBucketCols.hasNext()) + { + String bucketCol = iterBucketCols.next(); + boolean colFound = false; + for (int i = 0; i < bucketCols.size(); i++) + { + if (bucketCol.equals(sortCols.get(i).getCol())) { + colFound = true; + break; + } + } + if (colFound == false) + { + found = false; + break; + } + } + if (found) + tbl.setProperty("SORTBUCKETCOLSPREFIX", "TRUE"); + } + } + // create the table db.createTable(tbl); return 0; @@ -164,21 +204,24 @@ LOG.info("DDLTask: got data for " + tbl.getName()); // write the results in the file - FileOutputStream outStream = new FileOutputStream(descTbl.getResFile()); + DataOutput os = (DataOutput)fs.create(descTbl.getResFile()); List cols = tbl.getCols(); Iterator iterCols = cols.iterator(); + boolean firstCol = true; while (iterCols.hasNext()) { + if (!firstCol) + os.write(terminator); FieldSchema col = iterCols.next(); - outStream.write(col.getName().getBytes()); - outStream.write(separator); - outStream.write(col.getType().getBytes()); + os.write(col.getName().getBytes("UTF-8")); + os.write(separator); + os.write(col.getType().getBytes("UTF-8")); if (col.getComment() != null) { - outStream.write(separator); - outStream.write(col.getComment().getBytes()); + os.write(separator); + os.write(col.getComment().getBytes("UTF-8")); } - outStream.write(terminator); + firstCol = false; } // also return the partitioning columns @@ -186,19 +229,20 @@ Iterator iterPartCols = partCols.iterator(); while (iterPartCols.hasNext()) { + os.write(terminator); FieldSchema col = iterPartCols.next(); - outStream.write(col.getName().getBytes()); - outStream.write(separator); - outStream.write(col.getType().getBytes()); + os.write(col.getName().getBytes("UTF-8")); + os.write(separator); + os.write(col.getType().getBytes("UTF-8")); if (col.getComment() != null) { - outStream.write(separator); - outStream.write(col.getComment().getBytes()); + os.write(separator); + os.write(col.getComment().getBytes("UTF-8")); } - outStream.write(terminator); } - outStream.close(); LOG.info("DDLTask: written data for " + tbl.getName()); + ((FSDataOutputStream)os).close(); + } catch (FileNotFoundException e) { LOG.info("describe table: " + StringUtils.stringifyException(e)); return 1; @@ -214,10 +258,10 @@ if (!found) { try { - FileOutputStream outStream = new FileOutputStream(descTbl.getResFile()); + DataOutput outStream = (DataOutput)fs.open(descTbl.getResFile()); String errMsg = "Table " + descTbl.getTableName() + " does not exist"; - outStream.write(errMsg.getBytes()); - outStream.close(); + outStream.write(errMsg.getBytes("UTF-8")); + ((FSDataOutputStream)outStream).close(); } catch (FileNotFoundException e) { LOG.info("describe table: " + StringUtils.stringifyException(e)); return 1; @@ -245,7 +289,7 @@ // write the results in the file try { - FileOutputStream outStream = new FileOutputStream(showTbls.getResFile()); + DataOutput outStream = (DataOutput)fs.create(showTbls.getResFile()); SortedSet sortedTbls = new TreeSet(tbls); Iterator iterTbls = sortedTbls.iterator(); boolean firstCol = true; @@ -253,10 +297,10 @@ { if (!firstCol) outStream.write(separator); - outStream.write(iterTbls.next().getBytes()); + outStream.write(iterTbls.next().getBytes("UTF-8")); firstCol = false; } - outStream.write(terminator); + ((FSDataOutputStream)outStream).close(); } catch (FileNotFoundException e) { LOG.info("show table: " + StringUtils.stringifyException(e)); return 1; Index: src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/Context.java =================================================================== --- src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/Context.java (revision 122177) +++ src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/Context.java (working copy) @@ -18,24 +18,20 @@ package org.apache.hadoop.hive.ql; -import java.io.File; +import java.io.DataInput; import java.io.IOException; import java.io.FileNotFoundException; -import java.io.InputStream; -import java.io.FileInputStream; -import java.io.FileOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.util.StringUtils; public class Context { - private File resFile; + private Path resFile; private Path resDir; private FileSystem fs; static final private Log LOG = LogFactory.getLog("hive.ql.Context"); @@ -57,14 +53,14 @@ /** * @return the resFile */ - public File getResFile() { + public Path getResFile() { return resFile; } /** * @param resFile the resFile to set */ - public void setResFile(File resFile) { + public void setResFile(Path resFile) { this.resFile = resFile; resDir = null; resDirPaths = null; @@ -105,10 +101,7 @@ { try { - FileOutputStream outStream = new FileOutputStream(resFile); - outStream.close(); - } catch (FileNotFoundException e) { - LOG.info("Context clear error: " + StringUtils.stringifyException(e)); + fs.delete(resFile, false); } catch (IOException e) { LOG.info("Context clear error: " + StringUtils.stringifyException(e)); } @@ -120,7 +113,7 @@ resDirPaths = null; } - public InputStream getStream() { + public DataInput getStream() { try { if (!initialized) { @@ -128,7 +121,7 @@ if ((resFile == null) && (resDir == null)) return null; if (resFile != null) - return (InputStream)(new FileInputStream(resFile)); + return (DataInput)fs.open(resFile); FileStatus status = fs.getFileStatus(resDir); assert status.isDir(); @@ -140,7 +133,7 @@ resDirPaths[pos++] = resFS.getPath(); if (pos == 0) return null; - return (InputStream)fs.open(resDirPaths[resDirFilesNum++]); + return (DataInput)fs.open(resDirPaths[resDirFilesNum++]); } else { return getNextStream(); @@ -154,12 +147,12 @@ } } - private InputStream getNextStream() { + private DataInput getNextStream() { try { if (resDir != null && resDirFilesNum < resDirPaths.length && (resDirPaths[resDirFilesNum] != null)) - return (InputStream)fs.open(resDirPaths[resDirFilesNum++]); + return (DataInput)fs.open(resDirPaths[resDirFilesNum++]); } catch (FileNotFoundException e) { LOG.info("getNextStream error: " + StringUtils.stringifyException(e)); return null; Index: src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NaiiveSerializer.java =================================================================== --- src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NaiiveSerializer.java (revision 122177) +++ src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NaiiveSerializer.java (working copy) @@ -149,8 +149,6 @@ } } - private final static String NSTR = ""; - private static enum streamStatus {EOF, TERMINATED, NORMAL} public HiveObject deserialize (DataInput in) throws IOException { boolean more = true; CompositeHiveObject nr = null; @@ -164,9 +162,9 @@ do { bos.reset(); - streamStatus ss = readColumn(in, bos); - if((ss == streamStatus.EOF) || - (ss == streamStatus.TERMINATED)) { + Utilities.streamStatus ss = Utilities.readColumn(in, bos); + if((ss == Utilities.streamStatus.EOF) || + (ss == Utilities.streamStatus.TERMINATED)) { // read off entire row/file more = false; } @@ -176,7 +174,7 @@ if(bos.getCount() > 0) { col = new String(bos.getData(), 0, bos.getCount(), "UTF-8"); } else { - col = NSTR; + col = Utilities.NSTR; } if(width == -1) { @@ -224,28 +222,6 @@ return writeErrorCount; } - private streamStatus readColumn(DataInput in, OutputStream out) throws IOException { - while (true) { - int b; - try { - b = (int)in.readByte(); - } catch (EOFException e) { - return streamStatus.EOF; - } - - if (b == terminator) { - return streamStatus.TERMINATED; - } - - if (b == separator) { - return streamStatus.NORMAL; - } - - out.write(b); - } - // Unreachable - } - public int compare(byte [] b1, int s1, int l1, byte [] b2, int s2, int l2) { // Since all data is strings - we just use lexicographic ordering return WritableComparator.compareBytes(b1, s1, l2, b2, s2, l2); Index: src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g =================================================================== --- src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (revision 122177) +++ src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (working copy) @@ -97,6 +97,9 @@ TOK_TABLELOCATION; TOK_TABLESAMPLE; TOK_TMP_FILE; +TOK_TABSORTCOLNAMEASC; +TOK_TABSORTCOLNAMEDESC; +TOK_CHARSETLITERAL; } @@ -183,7 +186,7 @@ tableBuckets : - KW_CLUSTERED KW_BY LPAREN bucketCols=columnNameList RPAREN (KW_SORTED KW_BY LPAREN sortCols=columnNameList RPAREN)? KW_INTO num=Number KW_BUCKETS + KW_CLUSTERED KW_BY LPAREN bucketCols=columnNameList RPAREN (KW_SORTED KW_BY LPAREN sortCols=columnNameOrderList RPAREN)? KW_INTO num=Number KW_BUCKETS -> ^(TOK_TABLEBUCKETS $bucketCols $sortCols? $num) ; @@ -240,6 +243,16 @@ Identifier ; +columnNameOrderList + : columnNameOrder (COMMA columnNameOrder)* -> ^(TOK_TABCOLNAME columnNameOrder+) + ; + +columnNameOrder + : Identifier (asc=KW_ASC | desc=KW_DESC)? + -> {$desc == null}? ^(TOK_TABSORTCOLNAMEASC Identifier) + -> ^(TOK_TABSORTCOLNAMEDESC Identifier) + ; + columnNameType : colName=Identifier colType (KW_COMMENT comment=StringLiteral)? -> {$comment == null}? ^(TOK_TABCOL $colName colType) @@ -521,9 +534,14 @@ : Number | StringLiteral + | charSetStringLiteral | booleanValue - ; + ; +charSetStringLiteral + : + csName=CharSetName csLiteral=CharSetLiteral -> ^(TOK_CHARSETLITERAL $csName $csLiteral) + ; expression: precedenceOrExpression @@ -804,6 +822,11 @@ ; fragment +HexDigit + : 'a'..'f' | 'A'..'F' + ; + +fragment Digit : '0'..'9' @@ -820,6 +843,12 @@ '\'' (~'\'')* '\'' ( '\'' (~'\'')* '\'' )* ; +CharSetLiteral + : + StringLiteral + | '0' 'X' (HexDigit|Digit)+ + ; + Number : (Digit)+ ( DOT (Digit)* (Exponent)? | Exponent)? @@ -827,9 +856,14 @@ Identifier : - (Letter | Digit | '_')+ + (Letter | Digit) (Letter | Digit | '_')* ; +CharSetName + : + '_' (Letter | Digit | '_' | '-' | '.' | ':' )+ + ; + WS : (' '|'\r'|'\t'|'\n') {$channel=HIDDEN;} ; @@ -837,3 +871,5 @@ : '--' (~('\n'|'\r'))* { $channel=HIDDEN; } ; + + Index: src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java =================================================================== --- src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (revision 122177) +++ src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (working copy) @@ -21,6 +21,7 @@ import java.util.*; import java.io.File; import java.io.Serializable; +import java.io.UnsupportedEncodingException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -32,6 +33,7 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; + public abstract class BaseSemanticAnalyzer { protected String scratchDir; protected int randomid; @@ -44,6 +46,7 @@ protected final LogHelper console; protected Context ctx; + public BaseSemanticAnalyzer(HiveConf conf) throws SemanticException { try { this.conf = conf; @@ -77,7 +80,39 @@ } return val; } - + + public static String charSetString(String charSetName, String charSetString) + throws SemanticException { + try + { + // The character set name starts with a _, so strip that + charSetName = charSetName.substring(1); + if (charSetString.charAt(0) == '\'') + return new String(unescapeSQLString(charSetString).getBytes(), charSetName); + else // hex input is also supported + { + assert charSetString.charAt(0) == '0'; + assert charSetString.charAt(1) == 'x'; + charSetString = charSetString.substring(2); + + byte[] bArray = new byte[charSetString.length()/2]; + int j = 0; + for (int i = 0; i < charSetString.length(); i += 2) + { + int val = Character.digit(charSetString.charAt(i), 16) * 16 + Character.digit(charSetString.charAt(i+1), 16); + if (val > 127) + val = val - 256; + bArray[j++] = new Integer(val).byteValue(); + } + + String res = new String(bArray, charSetName); + return res; + } + } catch (UnsupportedEncodingException e) { + throw new SemanticException(e); + } + } + @SuppressWarnings("nls") public static String unescapeSQLString(String b) { assert(b.charAt(0) == '\''); Index: src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 122177) +++ src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -560,6 +560,7 @@ case HiveParser.Number: case HiveParser.StringLiteral: + case HiveParser.TOK_CHARSETLITERAL: case HiveParser.KW_TRUE: case HiveParser.KW_FALSE: break; @@ -2897,7 +2898,10 @@ // other operators or functions Class udf = UDFRegistry.getUDFClass(funcText); if (udf == null) { - throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg((CommonTree)expr.getChild(0))); + if (isFunction) + throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg((CommonTree)expr.getChild(0))); + else + throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg((CommonTree)expr)); } desc = getFuncExprNodeDesc(funcText, children); @@ -2947,6 +2951,9 @@ case HiveParser.StringLiteral: desc = new exprNodeConstantDesc(String.class, BaseSemanticAnalyzer.unescapeSQLString(expr.getText())); break; + case HiveParser.TOK_CHARSETLITERAL: + desc = new exprNodeConstantDesc(String.class, BaseSemanticAnalyzer.charSetString(expr.getChild(0).getText(), expr.getChild(1).getText())); + break; case HiveParser.KW_TRUE: desc = new exprNodeConstantDesc(Boolean.class, Boolean.TRUE); break; Index: src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java =================================================================== --- src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (revision 122177) +++ src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (working copy) @@ -18,8 +18,10 @@ package org.apache.hadoop.hive.ql.parse; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; import org.antlr.runtime.tree.CommonTree; @@ -39,7 +41,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import java.io.File; import java.util.*; public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { @@ -75,12 +76,12 @@ analyzeDropTable(ast); else if (ast.getToken().getType() == HiveParser.TOK_DESCTABLE) { - ctx.setResFile(new File(getTmpFileName())); + ctx.setResFile(new Path(getTmpFileName())); analyzeDescribeTable(ast); } else if (ast.getToken().getType() == HiveParser.TOK_SHOWTABLES) { - ctx.setResFile(new File(getTmpFileName())); + ctx.setResFile(new Path(getTmpFileName())); analyzeShowTables(ast); } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_RENAME) @@ -96,7 +97,7 @@ List cols = getColumns(colList); List partCols = null; List bucketCols = null; - List sortCols = null; + List sortCols = null; int numBuckets = -1; String fieldDelim = null; String collItemDelim = null; @@ -124,7 +125,7 @@ numBuckets = (Integer.valueOf(child.getChild(1).getText())).intValue(); else { - sortCols = getColumnNames((CommonTree)child.getChild(1)); + sortCols = getColumnNamesOrder((CommonTree)child.getChild(1)); numBuckets = (Integer.valueOf(child.getChild(2).getText())).intValue(); } break; @@ -211,9 +212,9 @@ if (crtTblDesc.getSortCols() != null) { // all columns in cluster and sort are valid columns - Iterator sortCols = crtTblDesc.getSortCols().iterator(); + Iterator sortCols = crtTblDesc.getSortCols().iterator(); while (sortCols.hasNext()) { - String sortCol = sortCols.next(); + String sortCol = sortCols.next().getCol(); boolean found = false; Iterator colNamesIter = colNames.iterator(); while (colNamesIter.hasNext()) { @@ -287,10 +288,24 @@ int numCh = ast.getChildCount(); for (int i = 0; i < numCh; i++) { CommonTree child = (CommonTree)ast.getChild(i); - colList.add(child.getChild(0).getText()); + colList.add(child.getText()); } return colList; } + + private List getColumnNamesOrder(CommonTree ast) + { + List colList = new ArrayList(); + int numCh = ast.getChildCount(); + for (int i = 0; i < numCh; i++) { + CommonTree child = (CommonTree)ast.getChild(i); + if (child.getToken().getType() == HiveParser.TOK_TABSORTCOLNAMEASC) + colList.add(new Order(child.getChild(0).getText(), 1)); + else + colList.add(new Order(child.getChild(0).getText(), 0)); + } + return colList; + } private void analyzeDescribeTable(CommonTree ast) throws SemanticException { Index: src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/createTableDesc.java =================================================================== --- src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/createTableDesc.java (revision 122177) +++ src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/createTableDesc.java (working copy) @@ -22,6 +22,7 @@ import java.util.List; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; public class createTableDesc extends ddlDesc implements Serializable { @@ -31,7 +32,7 @@ List cols; List partCols; List bucketCols; - List sortCols; + List sortCols; int numBuckets; String fieldDelim; String collItemDelim; @@ -43,7 +44,7 @@ public createTableDesc(String tableName, boolean isExternal, List cols, List partCols, - List bucketCols, List sortCols, + List bucketCols, List sortCols, int numBuckets, String fieldDelim, String collItemDelim, String mapKeyDelim, String lineDelim, @@ -172,14 +173,14 @@ /** * @return the sortCols */ - public List getSortCols() { + public List getSortCols() { return sortCols; } /** * @param sortCols the sortCols to set */ - public void setSortCols(List sortCols) { + public void setSortCols(List sortCols) { this.sortCols = sortCols; } } Index: src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/showTablesDesc.java =================================================================== --- src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/showTablesDesc.java (revision 122177) +++ src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/showTablesDesc.java (working copy) @@ -18,19 +18,19 @@ package org.apache.hadoop.hive.ql.plan; -import java.io.File; import java.io.Serializable; +import org.apache.hadoop.fs.Path; public class showTablesDesc extends ddlDesc implements Serializable { private static final long serialVersionUID = 1L; String pattern; - File resFile; + Path resFile; /** * @param resFile */ - public showTablesDesc(File resFile) { + public showTablesDesc(Path resFile) { this.resFile = resFile; pattern = null; } @@ -38,7 +38,7 @@ /** * @param pattern names of tables to show */ - public showTablesDesc(File resFile, String pattern) { + public showTablesDesc(Path resFile, String pattern) { this.resFile = resFile; this.pattern = pattern; } @@ -60,14 +60,14 @@ /** * @return the resFile */ - public File getResFile() { + public Path getResFile() { return resFile; } /** * @param resFile the resFile to set */ - public void setResFile(File resFile) { + public void setResFile(Path resFile) { this.resFile = resFile; } } Index: src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/descTableDesc.java =================================================================== --- src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/descTableDesc.java (revision 122177) +++ src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/descTableDesc.java (working copy) @@ -18,21 +18,21 @@ package org.apache.hadoop.hive.ql.plan; -import java.io.File; import java.io.Serializable; +import org.apache.hadoop.fs.Path; public class descTableDesc extends ddlDesc implements Serializable { private static final long serialVersionUID = 1L; String tableName; - File resFile; + Path resFile; /** * @param resFile * @param tableName */ - public descTableDesc(File resFile, String tableName) { + public descTableDesc(Path resFile, String tableName) { this.resFile = resFile; this.tableName = tableName; } @@ -48,14 +48,14 @@ /** * @return the resFile */ - public File getResFile() { + public Path getResFile() { return resFile; } /** * @param resFile the resFile to set */ - public void setResFile(File resFile) { + public void setResFile(Path resFile) { this.resFile = resFile; } } Index: src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/Driver.java =================================================================== --- src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (revision 122177) +++ src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (working copy) @@ -18,9 +18,7 @@ package org.apache.hadoop.hive.ql; -import java.io.File; -import java.io.InputStream; -import java.io.FileNotFoundException; +import java.io.DataInput; import java.io.IOException; import java.util.*; @@ -39,6 +37,7 @@ import org.apache.hadoop.hive.ql.exec.MapRedTask; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.ExecDriver; +import org.apache.hadoop.hive.serde.ByteStream; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.commons.logging.Log; @@ -47,24 +46,15 @@ public class Driver implements CommandProcessor { static final private Log LOG = LogFactory.getLog("hive.ql.Driver"); - static final private int separator = Utilities.ctrlaCode; - static final private int terminator = Utilities.newLineCode; static final private int MAX_ROWS = 100; + ByteStream.Output bos = new ByteStream.Output(); - private ParseDriver pd; - private HiveConf conf; - private InputStream resStream; - private LogHelper console; - private Context ctx; + private ParseDriver pd; + private HiveConf conf; + private DataInput resStream; + private LogHelper console; + private Context ctx; - public static int getSeparator() { - return separator; - } - - public static int getTerminator() { - return terminator; - } - public int countJobs(Collection tasks) { if (tasks == null) return 0; @@ -107,13 +97,10 @@ BaseSemanticAnalyzer sem; LOG.info("Starting command: " + command); + ctx.clear(); if (resStream != null) - { - resStream.close(); resStream = null; - } - ctx.clear(); - + pd = new ParseDriver(); CommonTree tree = pd.parse(command); @@ -199,95 +186,73 @@ conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, ""); } } - if (jobs > 0) { - console.printInfo("OK"); - } + + console.printInfo("OK"); return (0); } - public boolean getResults(Vector> res) { - + public boolean getResults(Vector> res) + { if (resStream == null) resStream = ctx.getStream(); if (resStream == null) return false; - int sizeArr = 128; - char[] tmpCharArr = new char[sizeArr]; - - for (int numRows = 0; numRows < MAX_ROWS; numRows++) + int numRows = 0; + Vector row = new Vector(); + + while (numRows < MAX_ROWS) { - if (resStream == null) { - if (numRows > 0) { + if (resStream == null) + { + if (numRows > 0) return true; - } - else { + else return false; - } } - boolean eof = false; - Vector row = new Vector(); - String col; - int len = 0; - while (true) { - char c; - try { - int i = resStream.read(); - if (i == -1) - { - eof = true; - break; - } - - c = (char)i; - - if (c == terminator) { - col = new String(tmpCharArr, 0, len); - len = 0; - row.add(col.equals(Utilities.nullStringStorage) ? null : col); - res.add(row); - break; - } - else if (c == separator) { - col = new String(tmpCharArr, 0, len); - len = 0; - row.add(col.equals(Utilities.nullStringStorage) ? null : col); - } - else - { - if (sizeArr == len) - { - char[] tmp = new char[2*sizeArr]; - sizeArr *= 2; - for (int idx = 0; idx < len; idx++) - tmp[idx] = tmpCharArr[idx]; - tmpCharArr = tmp; - } - tmpCharArr[len++] = c; - } - - } - catch (java.io.IOException e) { - console.printError("FAILED: Unknown exception : " + e.getMessage(), - "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); - return false; - } + + String col = null; + bos.reset(); + Utilities.streamStatus ss = Utilities.streamStatus.NORMAL; + try + { + ss = Utilities.readColumn(resStream, bos); + if (bos.getCount() > 0) + col = new String(bos.getData(), 0, bos.getCount(), "UTF-8"); + else if (ss == Utilities.streamStatus.NORMAL) + col = Utilities.NSTR; + } catch (IOException e) { + console.printError("FAILED: Unexpected IO exception : " + e.getMessage()); + res = null; + return false; } - if (eof) + if (ss == Utilities.streamStatus.EOF) { - if (len > 0) + if (col != null) { - col = new String(tmpCharArr, 0, len); - len = 0; + numRows++; row.add(col.equals(Utilities.nullStringStorage) ? null : col); res.add(row); + row = new Vector(); } - resStream = ctx.getStream(); } + else if (ss == Utilities.streamStatus.TERMINATED) + { + numRows++; + row.add(col.equals(Utilities.nullStringStorage) ? null : col); + res.add(row); + row = new Vector(); + col = null; + } + else if (ss == Utilities.streamStatus.NORMAL) + { + row.add(col.equals(Utilities.nullStringStorage) ? null : col); + } + else + assert false; } - return true; } }