diff --git a/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java b/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java index ac7ef6b..f02b8e8 100644 --- a/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java +++ b/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java @@ -32,6 +32,7 @@ import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.security.Credentials; import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatContext; import org.apache.hcatalog.common.HCatUtil; import org.apache.hcatalog.data.Pair; import org.apache.hcatalog.data.schema.HCatSchema; @@ -82,6 +83,7 @@ public class HCatLoader extends HCatBaseLoader { @Override public void setLocation(String location, Job job) throws IOException { + HCatContext.get(job.getConfiguration()); UDFContext udfContext = UDFContext.getUDFContext(); Properties udfProps = udfContext.getUDFProperties(this.getClass(), @@ -185,6 +187,8 @@ public class HCatLoader extends HCatBaseLoader { @Override public ResourceSchema getSchema(String location, Job job) throws IOException { + HCatContext.get(job.getConfiguration()); + Table table = phutil.getTable(location, hcatServerUri!=null?hcatServerUri:PigHCatUtil.getHCatServerUri(job), PigHCatUtil.getHCatServerPrincipal(job)); diff --git a/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java b/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java index 44aac3a..2a2eb25 100644 --- a/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java +++ b/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java @@ -26,6 +26,7 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Properties; import junit.framework.TestCase; @@ -37,6 +38,7 @@ import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.mapreduce.Job; import org.apache.hcatalog.HcatTestUtils; +import org.apache.hcatalog.common.HCatConstants; import org.apache.hcatalog.data.Pair; import org.apache.pig.ExecType; import org.apache.pig.PigServer; @@ -396,4 +398,44 @@ public class TestHCatLoader extends TestCase { ResourceStatistics statistics = hCatLoader.getStatistics(file.getAbsolutePath(), job); assertEquals(2048, (long) statistics.getmBytes()); } + + public void testConvertBooleanToInt() throws Exception { + String tbl = "test_convert_boolean_to_int"; + String inputFileName = TEST_DATA_DIR + "/testConvertBooleanToInt/data.txt"; + File inputDataDir = new File(inputFileName).getParentFile(); + inputDataDir.mkdir(); + + String[] lines = new String[] {"llama\t1", "alpaca\t0"}; + HcatTestUtils.createTestDataFile(inputFileName, lines); + + assertEquals(0, driver.run("drop table if exists " + tbl).getResponseCode()); + assertEquals(0, driver.run("create external table " + tbl + + " (a string, b boolean) row format delimited fields terminated by '\t'" + + " stored as textfile location 'file://" + + inputDataDir.getAbsolutePath() + "'").getResponseCode()); + + Properties properties = new Properties(); + properties.setProperty(HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, "true"); + PigServer server = new PigServer(ExecType.LOCAL, properties); + server.registerQuery( + "data = load 'test_convert_boolean_to_int' using org.apache.hcatalog.pig.HCatLoader();"); + Schema schema = server.dumpSchema("data"); + assertEquals(2, schema.getFields().size()); + + assertEquals("a", schema.getField(0).alias); + assertEquals(DataType.CHARARRAY, schema.getField(0).type); + assertEquals("b", schema.getField(1).alias); + assertEquals(DataType.INTEGER, schema.getField(1).type); + + Iterator iterator = server.openIterator("data"); + Tuple t = iterator.next(); + assertEquals("llama", t.get(0)); + // TODO: Figure out how to load a text file into Hive with boolean columns. This next assert + // passes because data was loaded as integers, not because it was converted. + assertEquals(1, t.get(1)); + t = iterator.next(); + assertEquals("alpaca", t.get(0)); + assertEquals(0, t.get(1)); + assertFalse(iterator.hasNext()); + } } diff --git a/src/java/org/apache/hcatalog/common/HCatConstants.java b/src/java/org/apache/hcatalog/common/HCatConstants.java index 618fb75..47d2706 100644 --- a/src/java/org/apache/hcatalog/common/HCatConstants.java +++ b/src/java/org/apache/hcatalog/common/HCatConstants.java @@ -113,4 +113,13 @@ public final class HCatConstants { // Hadoop Conf Var Names public static final String CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY = "mapreduce.job.credentials.binary"; + /**************************************************************************** + * Data-related configuration properties. + */ + + // Pig < 0.10.0 does not have boolean support. For integration the option is offered to + // convert boolean fields to integers. + public static final String HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER = + "hcat.data.convert.boolean.to.integer"; + public static final boolean HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT = false; } diff --git a/src/java/org/apache/hcatalog/common/HCatContext.java b/src/java/org/apache/hcatalog/common/HCatContext.java new file mode 100644 index 0000000..b2518e8 --- /dev/null +++ b/src/java/org/apache/hcatalog/common/HCatContext.java @@ -0,0 +1,38 @@ +package org.apache.hcatalog.common; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.conf.Configuration; + +/** + * HCatContext provides global access to configuration data. + */ +public class HCatContext { + + private static HCatContext hCatContext = null; + + private Configuration conf; + + private HCatContext(Configuration conf) { + this.conf = Preconditions.checkNotNull(conf); + } + + public static HCatContext get() { + if (hCatContext != null) { + return hCatContext; + } + throw new RuntimeException("HCatContext has not yet been configured."); + } + + public static synchronized HCatContext get(Configuration conf) { + if (hCatContext == null) { + hCatContext = new HCatContext(conf); + } else { + hCatContext.conf = conf; + } + return hCatContext; + } + + public Configuration getConf() { + return conf; + } +} diff --git a/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java b/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java index ec9b8c5..be36374 100644 --- a/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java +++ b/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java @@ -41,6 +41,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.Writable; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatContext; import org.apache.hcatalog.data.schema.HCatSchema; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -185,7 +187,15 @@ public class HCatRecordSerDe implements SerDe { ObjectInspector fieldObjectInspector) throws SerDeException { Object res = null; if (fieldObjectInspector.getCategory() == Category.PRIMITIVE){ - res = ((PrimitiveObjectInspector)fieldObjectInspector).getPrimitiveJavaObject(field); + if (field != null && + HCatContext.get().getConf().getBoolean( + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT) && + Boolean.class.isAssignableFrom(field.getClass())) { + res = ((Boolean) field) ? 1 : 0; + } else { + res = ((PrimitiveObjectInspector) fieldObjectInspector).getPrimitiveJavaObject(field); + } } else if (fieldObjectInspector.getCategory() == Category.STRUCT){ res = serializeStruct(field,(StructObjectInspector)fieldObjectInspector); } else if (fieldObjectInspector.getCategory() == Category.LIST){ diff --git a/src/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java b/src/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java index 077bbf9..378cdc9 100644 --- a/src/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java +++ b/src/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java @@ -30,6 +30,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatContext; import org.apache.hcatalog.common.HCatException; import org.apache.hcatalog.data.schema.HCatFieldSchema.Type; @@ -137,7 +139,10 @@ public class HCatSchemaUtils { private static Type getPrimitiveHType(TypeInfo basePrimitiveTypeInfo) { switch(((PrimitiveTypeInfo)basePrimitiveTypeInfo).getPrimitiveCategory()) { case BOOLEAN: - return Type.BOOLEAN; + return HCatContext.get().getConf().getBoolean( + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT) ? + Type.INT : Type.BOOLEAN; case BYTE: return Type.TINYINT; case DOUBLE: