Index: src/test/org/apache/hcatalog/data/TestJsonSerDe.java =================================================================== --- src/test/org/apache/hcatalog/data/TestJsonSerDe.java (revision 1355850) +++ src/test/org/apache/hcatalog/data/TestJsonSerDe.java (working copy) @@ -26,6 +26,7 @@ import junit.framework.TestCase; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.io.Writable; import org.slf4j.Logger; @@ -146,4 +147,67 @@ } + public void testRobustRead() throws Exception { + /** + * This test has been added to account for HCATALOG-436 + * We write out columns with "internal column names" such + * as "_col0", but try to read with retular column names. + */ + + Configuration conf = new Configuration(); + + for (Pair e : getData()){ + Properties tblProps = e.first; + HCatRecord r = e.second; + + Properties internalTblProps = new Properties(); + for (Map.Entry pe : tblProps.entrySet()){ + if (!pe.getKey().equals(Constants.LIST_COLUMNS)){ + internalTblProps.put(pe.getKey(), pe.getValue()); + } else { + internalTblProps.put(pe.getKey(),getInternalNames((String) pe.getValue())); + } + } + + LOG.info("orig tbl props:{}",tblProps); + LOG.info("modif tbl props:{}",internalTblProps); + + JsonSerDe wjsd = new JsonSerDe(); + wjsd.initialize(conf, internalTblProps); + + JsonSerDe rjsd = new JsonSerDe(); + rjsd.initialize(conf, tblProps); + + LOG.info("ORIG:{}",r); + + Writable s = wjsd.serialize(r,wjsd.getObjectInspector()); + LOG.info("ONE:{}",s); + + Object o1 = wjsd.deserialize(s); + LOG.info("deserialized ONE : {} ", o1); + + Object o2 = rjsd.deserialize(s); + LOG.info("deserialized TWO : {} ", o2); + assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2)); + } + + } + + String getInternalNames(String columnNames){ + if (columnNames == null) { + return null; + } + if (columnNames.isEmpty()) { + return ""; + } + + StringBuffer sb = new StringBuffer(); + int numStrings = columnNames.split(",").length; + sb.append("_col0"); + for (int i = 1; i < numStrings ; i++ ){ + sb.append(","); + sb.append(HiveConf.getColumnInternalName(i)); + } + return sb.toString(); + } } Index: src/java/org/apache/hcatalog/data/JsonSerDe.java =================================================================== --- src/java/org/apache/hcatalog/data/JsonSerDe.java (revision 1355850) +++ src/java/org/apache/hcatalog/data/JsonSerDe.java (working copy) @@ -26,8 +26,11 @@ import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; @@ -167,12 +170,44 @@ throw new IOException("Field name expected"); } String fieldName = p.getText(); - int fpos = s.getPosition(fieldName); + int fpos; + try { + fpos = s.getPosition(fieldName); + } catch (NullPointerException npe){ + fpos = getPositionFromHiveInternalColumnName(fieldName); + LOG.debug("NPE finding position for field [{}] in schema [{}]",fieldName,s); + if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))){ + LOG.error("Hive internal column name {} and position " + +"encoding {} for the column name are at odds",fieldName,fpos); + throw npe; + } + if (fpos == -1){ + return; // unknown field, we return. + } + } HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos); + Object currField = extractCurrentField(p, null, hcatFieldSchema,false); + r.set(fpos,currField); + } - r.set(fpos,extractCurrentField(p, null, hcatFieldSchema,false)); + public String getHiveInternalColumnName(int fpos) { + return HiveConf.getColumnInternalName(fpos); } + public int getPositionFromHiveInternalColumnName(String internalName) { +// return HiveConf.getPositionFromInternalName(fieldName); + // The above line should have been all the implementation that + // we need, but due to a bug in that impl which recognizes + // only single-digit columns, we need another impl here. + Pattern internalPattern = Pattern.compile("_col([0-9]+)"); + Matcher m = internalPattern.matcher(internalName); + if (!m.matches()){ + return -1; + } else { + return Integer.parseInt(m.group(1)); + } + } + /** * Utility method to extract current expected field from given JsonParser *