diff --git a/data/files/multibytedata.txt b/data/files/multibytedata.txt new file mode 100644 index 0000000..a76826b --- /dev/null +++ b/data/files/multibytedata.txt @@ -0,0 +1,5 @@ +0さくら +1ケーキ +2焼き肉 +3chocolate +4カタカナ diff --git a/jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveQueryResultSet.java b/jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveQueryResultSet.java index 3215178..0a1f91a 100644 --- a/jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveQueryResultSet.java +++ b/jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveQueryResultSet.java @@ -153,7 +153,7 @@ public boolean next() throws SQLException { StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector(); List fieldRefs = soi.getAllStructFieldRefs(); - Object data = serde.deserialize(new BytesWritable(rowStr.getBytes())); + Object data = serde.deserialize(new BytesWritable(rowStr.getBytes("UTF-8"))); assert row.size() == fieldRefs.size() : row.size() + ", " + fieldRefs.size(); for (int i = 0; i < fieldRefs.size(); i++) { diff --git a/jdbc/src/test/org/apache/hadoop/hive/jdbc/TestJdbcDriver.java b/jdbc/src/test/org/apache/hadoop/hive/jdbc/TestJdbcDriver.java index e1107dd..c258de9 100644 --- a/jdbc/src/test/org/apache/hadoop/hive/jdbc/TestJdbcDriver.java +++ b/jdbc/src/test/org/apache/hadoop/hive/jdbc/TestJdbcDriver.java @@ -59,9 +59,11 @@ private static final String partitionedTableComment = "Partitioned table"; private static final String dataTypeTableName = "testDataTypeTable"; private static final String dataTypeTableComment = "Table with many column data types"; + private static final String multibyteDataTableName = "testMultibyteDataTable"; private final HiveConf conf; private final Path dataFilePath; private final Path dataTypeDataFilePath; + private final Path multibyteDataFilePath; private Connection con; private boolean standAloneServer = false; @@ -72,6 +74,7 @@ public TestJdbcDriver(String name) { .replace("c:", ""); dataFilePath = new Path(dataFileDir, "kv1.txt"); dataTypeDataFilePath = new Path(dataFileDir, "datatypes.txt"); + multibyteDataFilePath = new Path(dataFileDir, "multibytedata.txt"); standAloneServer = "true".equals(System .getProperty("test.service.standalone.server")); } @@ -162,6 +165,22 @@ protected void setUp() throws Exception { + " PARTITION (dt='20090619')"); assertFalse(res.next()); + // drop table. ignore error. + try { + stmt.executeQuery("drop table " + multibyteDataTableName); + } catch (Exception ex) { + fail(ex.toString()); + } + + res = stmt.executeQuery("create table " + multibyteDataTableName + + " (id int , value string)"); + assertFalse(res.next()); + + // load data + res = stmt.executeQuery("load data local inpath '" + + multibyteDataFilePath.toString() + "' into table " + multibyteDataTableName); + assertFalse(res.next()); + // drop view. ignore error. try { stmt.executeQuery("drop view " + viewName); @@ -188,6 +207,8 @@ protected void tearDown() throws Exception { assertFalse(res.next()); res = stmt.executeQuery("drop table " + dataTypeTableName); assertFalse(res.next()); + res = stmt.executeQuery("drop table " + multibyteDataTableName); + assertFalse(res.next()); con.close(); assertTrue("Connection should be closed", con.isClosed()); @@ -478,6 +499,49 @@ public void testDataTypes() throws Exception { assertFalse(res.next()); } + public void testSelectMultibyteData() throws Exception { + Statement stmt = con.createStatement(); + + ResultSet res = stmt.executeQuery( + "select * from " + multibyteDataTableName); + ResultSetMetaData meta = res.getMetaData(); + + // row 1 + assertTrue(res.next()); + assertEquals(2, meta.getColumnCount()); + String expectedStrInUnicode = "\u3055\u304f\u3089"; + String factStr = res.getString(2); + assertEquals(expectedStrInUnicode, factStr); + + // row 2 + assertTrue(res.next()); + assertEquals(2, meta.getColumnCount()); + expectedStrInUnicode = "\u30b1\u30fc\u30ad"; + factStr = res.getString(2); + assertEquals(expectedStrInUnicode, factStr); + + // row 3 + assertTrue(res.next()); + assertEquals(2, meta.getColumnCount()); + expectedStrInUnicode = "\u713c\u304d\u8089"; + factStr = res.getString(2); + assertEquals(expectedStrInUnicode, factStr); + + // row 4 + assertTrue(res.next()); + assertEquals(2, meta.getColumnCount()); + expectedStrInUnicode = "\u0063\u0068\u006f\u0063\u006f\u006c\u0061\u0074\u0065"; + factStr = res.getString(2); + assertEquals(expectedStrInUnicode, factStr); + + // row 5 + assertTrue(res.next()); + assertEquals(2, meta.getColumnCount()); + expectedStrInUnicode = "\uff76\uff80\uff76\uff85"; + factStr = res.getString(2); + assertEquals(expectedStrInUnicode, factStr); + } + private void doTestSelectAll(String tableName, int maxRows, int fetchSize) throws Exception { boolean isPartitionTable = tableName.equals(partitionedTableName);