diff --git hcatalog-pig-adapter/ivy.xml hcatalog-pig-adapter/ivy.xml
index 6b6cb8d..c0f3254 100644
--- hcatalog-pig-adapter/ivy.xml
+++ hcatalog-pig-adapter/ivy.xml
@@ -38,7 +38,11 @@
+
+
diff --git hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java
index 3ef5763..696081f 100644
--- hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java
+++ hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java
@@ -194,14 +194,27 @@ public class PigHCatUtil {
return rfSchema;
}
- private static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException {
+ protected static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException {
// there are two cases - array and array>
// in either case the element type of the array is represented in a
// tuple field schema in the bag's field schema - the second case (struct)
// more naturally translates to the tuple - in the first case (array)
// we simulate the tuple by putting the single field in a tuple
+
+ Properties props = UDFContext.getUDFContext().getClientSystemProps();
+ String innerTupleName = HCatConstants.HCAT_PIG_INNER_TUPLE_NAME_DEFAULT;
+ if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)) {
+ innerTupleName = props.getProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)
+ .replaceAll("FIELDNAME", hfs.getName());
+ }
+ String innerFieldName = HCatConstants.HCAT_PIG_INNER_FIELD_NAME_DEFAULT;
+ if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)) {
+ innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)
+ .replaceAll("FIELDNAME", hfs.getName());
+ }
+
ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
- bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple")
+ bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName)
.setDescription("The tuple in the bag")
.setType(DataType.TUPLE);
HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0);
@@ -214,7 +227,7 @@ public class PigHCatUtil {
bagSubFieldSchemas[0].setSchema(s);
} else {
ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
- innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName("innerfield")
+ innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName)
.setDescription("The inner field in the tuple in the bag")
.setType(getPigType(arrayElementFieldSchema))
.setSchema(null); // the element type is not a tuple - so no subschema
diff --git hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java
new file mode 100644
index 0000000..6ad08eb
--- /dev/null
+++ hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java
@@ -0,0 +1,72 @@
+package org.apache.hcatalog.pig;
+
+import com.google.common.collect.Lists;
+import junit.framework.Assert;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.schema.HCatFieldSchema;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.pig.ResourceSchema;
+import org.apache.pig.ResourceSchema.ResourceFieldSchema;
+import org.apache.pig.data.DataType;
+import org.apache.pig.impl.util.UDFContext;
+import org.junit.Test;
+
+public class TestPigHCatUtil {
+
+ @Test
+ public void testGetBagSubSchema() throws Exception {
+
+ // Define the expected schema.
+ ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
+ bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple")
+ .setDescription("The tuple in the bag").setType(DataType.TUPLE);
+
+ ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
+ innerTupleFieldSchemas[0] =
+ new ResourceFieldSchema().setName("innerfield").setType(DataType.CHARARRAY);
+
+ bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
+ ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);
+
+ // Get the actual converted schema.
+ HCatSchema hCatSchema = new HCatSchema(Lists.newArrayList(
+ new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
+ HCatFieldSchema hCatFieldSchema =
+ new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, hCatSchema, null);
+ ResourceSchema actual = PigHCatUtil.getBagSubSchema(hCatFieldSchema);
+
+ Assert.assertEquals(expected.toString(), actual.toString());
+ }
+
+ @Test
+ public void testGetBagSubSchemaConfigured() throws Exception {
+
+ // NOTE: pig-0.8 sets client system properties by actually getting the client
+ // system properties. Starting in pig-0.9 you must pass the properties in.
+ // When updating our pig dependency this will need updated.
+ System.setProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME, "t");
+ System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME, "FIELDNAME_tuple");
+ UDFContext.getUDFContext().setClientSystemProps();
+
+ // Define the expected schema.
+ ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
+ bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t")
+ .setDescription("The tuple in the bag").setType(DataType.TUPLE);
+
+ ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
+ innerTupleFieldSchemas[0] =
+ new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY);
+
+ bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
+ ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);
+
+ // Get the actual converted schema.
+ HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList(
+ new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
+ HCatFieldSchema actualHCatFieldSchema =
+ new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null);
+ ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema);
+
+ Assert.assertEquals(expected.toString(), actual.toString());
+ }
+}
diff --git src/java/org/apache/hcatalog/common/HCatConstants.java src/java/org/apache/hcatalog/common/HCatConstants.java
index aa2b762..618fb75 100644
--- src/java/org/apache/hcatalog/common/HCatConstants.java
+++ src/java/org/apache/hcatalog/common/HCatConstants.java
@@ -38,6 +38,10 @@ public final class HCatConstants {
public static final String HCAT_PIG_ARGS_DELIMIT = "hcat.pig.args.delimiter";
public static final String HCAT_PIG_ARGS_DELIMIT_DEFAULT = ",";
public static final String HCAT_PIG_STORER_LOCATION_SET = HCAT_PIG_STORER + ".location.set" ;
+ public static final String HCAT_PIG_INNER_TUPLE_NAME = "hcat.pig.inner.tuple.name";
+ public static final String HCAT_PIG_INNER_TUPLE_NAME_DEFAULT = "innertuple";
+ public static final String HCAT_PIG_INNER_FIELD_NAME = "hcat.pig.inner.field.name";
+ public static final String HCAT_PIG_INNER_FIELD_NAME_DEFAULT = "innerfield";
//The keys used to store info into the job Configuration
public static final String HCAT_KEY_BASE = "mapreduce.lib.hcat";