Index: src/test/org/apache/hcatalog/pig/MyPigStorage.java =================================================================== --- src/test/org/apache/hcatalog/pig/MyPigStorage.java (revision 1305484) +++ src/test/org/apache/hcatalog/pig/MyPigStorage.java (working copy) @@ -18,10 +18,7 @@ package org.apache.hcatalog.pig; import java.io.IOException; -import java.util.Properties; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hcatalog.pig.drivers.PigStorageInputDriver; import org.apache.pig.builtin.PigStorage; import org.apache.pig.data.Tuple; Index: src/test/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java =================================================================== --- src/test/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java (revision 1305484) +++ src/test/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java (working copy) @@ -53,13 +53,10 @@ import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hcatalog.common.HCatConstants; import org.apache.hcatalog.data.DefaultHCatRecord; import org.apache.hcatalog.data.HCatRecord; import org.apache.hcatalog.data.schema.HCatFieldSchema; import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.rcfile.RCFileInputDriver; -import org.apache.hcatalog.rcfile.RCFileOutputDriver; /** * Test for HCatOutputFormat. Writes a partition using HCatOutputFormat and reads @@ -72,8 +69,6 @@ protected String inputFormat = RCFileInputFormat.class.getName(); protected String outputFormat = RCFileOutputFormat.class.getName(); - protected String inputSD = RCFileInputDriver.class.getName(); - protected String outputSD = RCFileOutputDriver.class.getName(); protected String serdeClass = ColumnarSerDe.class.getName(); private static List writeRecords = new ArrayList(); Index: src/test/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java =================================================================== --- src/test/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java (revision 1305484) +++ src/test/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java (working copy) @@ -42,8 +42,6 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.util.StringUtils; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.rcfile.RCFileOutputDriver; public class TestHCatOutputFormat extends TestCase { private HiveMetaStoreClient client; Index: src/test/org/apache/hcatalog/cli/TestSemanticAnalysis.java =================================================================== --- src/test/org/apache/hcatalog/cli/TestSemanticAnalysis.java (revision 1305484) +++ src/test/org/apache/hcatalog/cli/TestSemanticAnalysis.java (working copy) @@ -20,7 +20,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.Map; import junit.framework.TestCase; @@ -43,10 +42,7 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; import org.apache.hcatalog.listener.NotificationListener; -import org.apache.hcatalog.rcfile.RCFileInputDriver; -import org.apache.hcatalog.rcfile.RCFileOutputDriver; import org.apache.thrift.TException; public class TestSemanticAnalysis extends TestCase{ Index: src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java =================================================================== --- src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java (revision 1305484) +++ src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java (working copy) @@ -1,114 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hcatalog.utils; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.InvalidOperationException; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hcatalog.rcfile.RCFileInputDriver; -import org.apache.hcatalog.rcfile.RCFileOutputDriver; -import org.apache.thrift.TException; - -/** - * A utility program to annotate partitions of a pre-created table - * with input storage driver and output storage driver information - */ -public class PartitionStorageDriverAnnotator { - - /** - * @param args - * @throws MetaException - * @throws TException - * @throws NoSuchObjectException - * @throws InvalidOperationException - */ - public static void main(String[] args) throws MetaException, NoSuchObjectException, - TException, InvalidOperationException { - String thrifturi = null; - String database = "default"; - String table = null; - String isd = null; - String osd = null; - Map m = new HashMap(); - for(int i = 0; i < args.length; i++) { - if(args[i].equals("-u")) { - thrifturi = args[i+1]; - } else if(args[i].equals("-t")) { - table = args[i+1]; - } else if (args[i].equals("-i")) { - isd = args[i+1]; - } else if (args[i].equals("-o")) { - osd = args[i+1]; - } else if (args[i].equals("-p")) { - String[] kvps = args[i+1].split(";"); - for(String kvp: kvps) { - String[] kv = kvp.split("="); - if(kv.length != 2) { - System.err.println("ERROR: key value property pairs must be specified as key1=val1;key2=val2;..;keyn=valn"); - System.exit(1); - } - m.put(kv[0], kv[1]); - } - } else if(args[i].equals("-d")) { - database = args[i+1]; - } else { - System.err.println("ERROR: Unknown option: " + args[i]); - usage(); - } - i++; // to skip the value for an option - } - if(table == null || thrifturi == null) { - System.err.println("ERROR: thrift uri and table name are mandatory"); - usage(); - } - HiveConf hiveConf = new HiveConf(PartitionStorageDriverAnnotator.class); - hiveConf.set("hive.metastore.local", "false"); - hiveConf.set("hive.metastore.uris", thrifturi); - - HiveMetaStoreClient hmsc = new HiveMetaStoreClient(hiveConf,null); - List parts = hmsc.listPartitions(database, table, Short.MAX_VALUE); - - m.put("hcat.isd", isd != null ? isd : RCFileInputDriver.class.getName()); - m.put("hcat.osd", osd != null ? osd : RCFileOutputDriver.class.getName()); - - for(Partition p: parts) { - p.setParameters(m); - hmsc.alter_partition(database, table, p); - } - } - - /** - * - */ - private static void usage() { - System.err.println("Usage: java -cp testudf.jar: org.apache.hcat.utils.PartitionStorageDriverAnnotator -u -t " + - " [-i input driver classname (Default rcfiledriver)] [-o output driver classname (default rcfiledriver)] " + - " [-p key1=val1;key2=val2;..;keyn=valn (list of key=value property pairs to associate with each partition)]" + - " [-d database (if this not supplied the default database is used)]"); - System.exit(1); - } - -} Index: src/java/org/apache/hcatalog/rcfile/RCFileInputDriver.java =================================================================== --- src/java/org/apache/hcatalog/rcfile/RCFileInputDriver.java (revision 1305484) +++ src/java/org/apache/hcatalog/rcfile/RCFileInputDriver.java (working copy) @@ -1,234 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hcatalog.rcfile; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Map.Entry; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; -import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.hive.serde2.columnar.ColumnarStruct; -import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatInputStorageDriver; - -public class RCFileInputDriver extends HCatInputStorageDriver{ - - - private SerDe serde; - private static final Log LOG = LogFactory.getLog(RCFileInputDriver.class); - private List colsInData; - private StructObjectInspector oi; - private Map partValues; - private List outCols; - private List structFields; - private Map namePosMapping; - - @Override - public InputFormat getInputFormat(Properties hcatProperties) { - return new RCFileMapReduceInputFormat(); - } - - @Override - public void setInputPath(JobContext jobContext, String location) throws IOException { - - super.setInputPath(jobContext, location); - } - - @Override - public void setOriginalSchema(JobContext jobContext, HCatSchema dataSchema) throws IOException { - - colsInData = dataSchema.getFields(); - namePosMapping = new HashMap(colsInData.size()); - int index =0; - for(HCatFieldSchema field : dataSchema.getFields()){ - namePosMapping.put(field.getName(), index++); - } - } - - @Override - public void setOutputSchema(JobContext jobContext, HCatSchema desiredSchema) throws IOException { - - // Finds out which column ids needs to be projected and set them up for RCFile. - outCols = desiredSchema.getFields(); - ArrayList prjColumns = new ArrayList(); - for(HCatFieldSchema prjCol : outCols){ - Integer pos = namePosMapping.get(prjCol.getName().toLowerCase()); - if(pos != null) { - prjColumns.add(pos); - } - } - - Collections.sort(prjColumns); - ColumnProjectionUtils.setReadColumnIDs(jobContext.getConfiguration(), prjColumns); - } - - @Override - public void setPartitionValues(JobContext jobContext, Map partitionValues) - throws IOException { - partValues = partitionValues; - } - - @Override - public HCatRecord convertToHCatRecord(WritableComparable ignored, Writable bytesRefArray) throws IOException { - - // Deserialize bytesRefArray into struct and then convert that struct to - // HCatRecord. - ColumnarStruct struct; - try { - struct = (ColumnarStruct)serde.deserialize(bytesRefArray); - } catch (SerDeException e) { - LOG.error(e.toString(), e); - throw new IOException(e); - } - - List outList = new ArrayList(outCols.size()); - - String colName; - Integer index; - - for(HCatFieldSchema col : outCols){ - - colName = col.getName().toLowerCase(); - index = namePosMapping.get(colName); - - if(index != null){ - StructField field = structFields.get(index); - outList.add( getTypedObj(oi.getStructFieldData(struct, field), field.getFieldObjectInspector())); - } - - else { - outList.add(partValues.get(colName)); - } - - } - return new DefaultHCatRecord(outList); - } - - private Object getTypedObj(Object data, ObjectInspector oi) throws IOException{ - - // The real work-horse method. We are gobbling up all the laziness benefits - // of Hive-RCFile by deserializing everything and creating crisp HCatRecord - // with crisp Java objects inside it. We have to do it because higher layer - // may not know how to do it. - - if (data == null) { - return null; - } - - switch(oi.getCategory()){ - - case PRIMITIVE: - return ((PrimitiveObjectInspector)oi).getPrimitiveJavaObject(data); - - case MAP: - MapObjectInspector moi = (MapObjectInspector)oi; - Map lazyMap = moi.getMap(data); - ObjectInspector keyOI = moi.getMapKeyObjectInspector(); - ObjectInspector valOI = moi.getMapValueObjectInspector(); - Map typedMap = new HashMap(lazyMap.size()); - for(Entry e : lazyMap.entrySet()){ - typedMap.put(getTypedObj(e.getKey(), keyOI), getTypedObj(e.getValue(), valOI)); - } - return typedMap; - - case LIST: - ListObjectInspector loi = (ListObjectInspector)oi; - List lazyList = loi.getList(data); - ObjectInspector elemOI = loi.getListElementObjectInspector(); - List typedList = new ArrayList(lazyList.size()); - Iterator itr = lazyList.listIterator(); - while(itr.hasNext()){ - typedList.add(getTypedObj(itr.next(),elemOI)); - } - return typedList; - - case STRUCT: - StructObjectInspector soi = (StructObjectInspector)oi; - List fields = soi.getAllStructFieldRefs(); - List typedStruct = new ArrayList(fields.size()); - for(StructField field : fields){ - typedStruct.add( getTypedObj(soi.getStructFieldData(data, field), field.getFieldObjectInspector())); - } - return typedStruct; - - - default: - throw new IOException("Don't know how to deserialize: "+oi.getCategory()); - - } - } - - @Override - public void initialize(JobContext context,Properties hcatProperties) - throws IOException { - - super.initialize(context, hcatProperties); - - // Columnar Serde needs to know names and types of columns it needs to read. - List fields = HCatUtil.getFieldSchemaList(colsInData); - hcatProperties.setProperty(Constants.LIST_COLUMNS,MetaStoreUtils. - getColumnNamesFromFieldSchema(fields)); - hcatProperties.setProperty(Constants.LIST_COLUMN_TYPES, MetaStoreUtils. - getColumnTypesFromFieldSchema(fields)); - - // It seems RCFIle reads and writes nulls differently as compared to default hive. - // setting these props to match LazySimpleSerde - hcatProperties.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "\\N"); - hcatProperties.setProperty(Constants.SERIALIZATION_FORMAT, "1"); - - try { - serde = new ColumnarSerDe(); - serde.initialize(context.getConfiguration(), hcatProperties); - oi = (StructObjectInspector) serde.getObjectInspector(); - structFields = oi.getAllStructFieldRefs(); - - } catch (SerDeException e) { - throw new IOException(e); - } - } -} Index: src/java/org/apache/hcatalog/rcfile/RCFileOutputDriver.java =================================================================== --- src/java/org/apache/hcatalog/rcfile/RCFileOutputDriver.java (revision 1305484) +++ src/java/org/apache/hcatalog/rcfile/RCFileOutputDriver.java (working copy) @@ -1,220 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hcatalog.rcfile; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.FileOutputStorageDriver; - -/** - * The storage driver for writing RCFile data through HCatOutputFormat. - */ - public class RCFileOutputDriver extends FileOutputStorageDriver { - - /** The serde for serializing the HCatRecord to bytes writable */ - private SerDe serde; - - /** The object inspector for the given schema */ - private StructObjectInspector objectInspector; - - /** The schema for the output data */ - private HCatSchema outputSchema; - - /** The cached RCFile output format instance */ - private OutputFormat outputFormat = null; - - /* (non-Javadoc) - * @see org.apache.hcatalog.mapreduce.HCatOutputStorageDriver#convertValue(org.apache.hcatalog.data.HCatRecord) - */ - @Override - public Writable convertValue(HCatRecord value) throws IOException { - try { - - return serde.serialize(value.getAll(), objectInspector); - } catch(SerDeException e) { - throw new IOException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hcatalog.mapreduce.HCatOutputStorageDriver#generateKey(org.apache.hcatalog.data.HCatRecord) - */ - @Override - public WritableComparable generateKey(HCatRecord value) throws IOException { - //key is not used for RCFile output - return null; - } - - /* (non-Javadoc) - * @see org.apache.hcatalog.mapreduce.HCatOutputStorageDriver#getOutputFormat(java.util.Properties) - */ - @SuppressWarnings("unchecked") - @Override - public OutputFormat, ? extends Writable> getOutputFormat() throws IOException { - if( outputFormat == null ) { - outputFormat = new RCFileMapReduceOutputFormat(); - } - - return outputFormat; - } - - /* (non-Javadoc) - * @see org.apache.hcatalog.mapreduce.HCatOutputStorageDriver#setOutputPath(org.apache.hadoop.mapreduce.JobContext, java.lang.String) - */ - @Override - public void setOutputPath(JobContext jobContext, String location) throws IOException { - //Not calling FileOutputFormat.setOutputPath since that requires a Job instead of JobContext - jobContext.getConfiguration().set("mapred.output.dir", location); - } - - /* (non-Javadoc) - * @see org.apache.hcatalog.mapreduce.HCatOutputStorageDriver#setPartitionValues(org.apache.hadoop.mapreduce.JobContext, java.util.Map) - */ - @Override - public void setPartitionValues(JobContext jobContext, Map partitionValues) - throws IOException { - //default implementation of HCatOutputStorageDriver.getPartitionLocation will use the partition - //values to generate the data location, so partition values not used here - } - - /* (non-Javadoc) - * @see org.apache.hcatalog.mapreduce.HCatOutputStorageDriver#setSchema(org.apache.hadoop.mapreduce.JobContext, org.apache.hadoop.hive.metastore.api.Schema) - */ - @Override - public void setSchema(JobContext jobContext, HCatSchema schema) throws IOException { - outputSchema = schema; - RCFileMapReduceOutputFormat.setColumnNumber( - jobContext.getConfiguration(), schema.getFields().size()); - } - - @Override - public void initialize(JobContext context,Properties hcatProperties) throws IOException { - - super.initialize(context, hcatProperties); - - List fields = HCatUtil.getFieldSchemaList(outputSchema.getFields()); - hcatProperties.setProperty(Constants.LIST_COLUMNS, - MetaStoreUtils.getColumnNamesFromFieldSchema(fields)); - hcatProperties.setProperty(Constants.LIST_COLUMN_TYPES, - MetaStoreUtils.getColumnTypesFromFieldSchema(fields)); - - // setting these props to match LazySimpleSerde - hcatProperties.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "\\N"); - hcatProperties.setProperty(Constants.SERIALIZATION_FORMAT, "1"); - - try { - serde = new ColumnarSerDe(); - serde.initialize(context.getConfiguration(), hcatProperties); - objectInspector = createStructObjectInspector(); - - } catch (SerDeException e) { - throw new IOException(e); - } - } - - public StructObjectInspector createStructObjectInspector() throws IOException { - - if( outputSchema == null ) { - throw new IOException("Invalid output schema specified"); - } - - List fieldInspectors = new ArrayList(); - List fieldNames = new ArrayList(); - - for(HCatFieldSchema hcatFieldSchema : outputSchema.getFields()) { - TypeInfo type = TypeInfoUtils.getTypeInfoFromTypeString(hcatFieldSchema.getTypeString()); - - fieldNames.add(hcatFieldSchema.getName()); - fieldInspectors.add(getObjectInspector(type)); - } - - StructObjectInspector structInspector = ObjectInspectorFactory. - getStandardStructObjectInspector(fieldNames, fieldInspectors); - return structInspector; - } - - public ObjectInspector getObjectInspector(TypeInfo type) throws IOException { - - switch(type.getCategory()) { - - case PRIMITIVE : - PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type; - return PrimitiveObjectInspectorFactory. - getPrimitiveJavaObjectInspector(primitiveType.getPrimitiveCategory()); - - case MAP : - MapTypeInfo mapType = (MapTypeInfo) type; - MapObjectInspector mapInspector = ObjectInspectorFactory.getStandardMapObjectInspector( - getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo())); - return mapInspector; - - case LIST : - ListTypeInfo listType = (ListTypeInfo) type; - ListObjectInspector listInspector = ObjectInspectorFactory.getStandardListObjectInspector( - getObjectInspector(listType.getListElementTypeInfo())); - return listInspector; - - case STRUCT : - StructTypeInfo structType = (StructTypeInfo) type; - List fieldTypes = structType.getAllStructFieldTypeInfos(); - - List fieldInspectors = new ArrayList(); - for(TypeInfo fieldType : fieldTypes) { - fieldInspectors.add(getObjectInspector(fieldType)); - } - - StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector( - structType.getAllStructFieldNames(), fieldInspectors); - return structInspector; - - default : - throw new IOException("Unknown field schema type"); - } - } - -} Index: src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java (revision 1305484) +++ src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java (working copy) @@ -1,155 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.util.StringUtils; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; - -/** The abstract class to be implemented by underlying storage drivers to enable data access from HCat through - * HCatInputFormat. - */ -public abstract class HCatInputStorageDriver { - - public void initialize(JobContext context, Properties storageDriverArgs) throws IOException { - // trivial do nothing - } - - /** - * Returns the InputFormat to use with this Storage Driver. - * @param hcatProperties the properties containing parameters required for initialization of InputFormat - * @return the InputFormat instance - */ - public abstract InputFormat getInputFormat(Properties hcatProperties); - - - /** - * Converts to HCatRecord format usable by HCatInputFormat to convert to required valuetype. - * Implementers of StorageDriver should look to overwriting this function so as to convert their - * value type to HCatRecord. Default implementation is provided for StorageDriver implementations - * on top of an underlying InputFormat that already uses HCatRecord as a tuple - * @param baseValue the underlying value to convert to HCatRecord - */ - public abstract HCatRecord convertToHCatRecord(WritableComparable baseKey, Writable baseValue) throws IOException; - - /** - * Set the data location for the input. - * @param jobContext the job context object - * @param location the data location - * @throws IOException Signals that an I/O exception has occurred. - * - * Default implementation for FileInputFormat based Input Formats. Override - * this for other input formats. - */ - public void setInputPath(JobContext jobContext, String location) throws IOException{ - - // ideally we should just call FileInputFormat.setInputPaths() here - but - // that won't work since FileInputFormat.setInputPaths() needs - // a Job object instead of a JobContext which we are handed here - - int length = location.length(); - int curlyOpen = 0; - int pathStart = 0; - boolean globPattern = false; - List pathStrings = new ArrayList(); - - for (int i=0; i partitionValues) throws IOException; - -} Index: src/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java (revision 1305484) +++ src/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java (working copy) @@ -23,17 +23,8 @@ import java.util.LinkedList; import java.util.Map; import java.util.HashMap; -import java.util.LinkedHashMap; import java.util.List; -import java.util.Properties; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.conf.Configuration; @@ -41,9 +32,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.JobConfigurable; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; @@ -54,9 +43,7 @@ import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; -import org.apache.hcatalog.common.ErrorType; import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; import org.apache.hcatalog.common.HCatUtil; import org.apache.hcatalog.data.HCatRecord; import org.apache.hcatalog.data.schema.HCatFieldSchema; @@ -109,7 +96,7 @@ * underlying InputFormat's splits * @param jobContext the job context object * @return the splits, an HCatInputSplit wrapper over the storage - * driver InputSplits + * handler InputSplits * @throws IOException or InterruptedException */ @Override @@ -183,7 +170,7 @@ * @param split the split * @param taskContext the task attempt context * @return the record reader instance, either an HCatRecordReader(later) or - * the underlying storage driver's RecordReader + * the underlying storage handler's RecordReader * @throws IOException or InterruptedException */ @Override Index: src/java/org/apache/hcatalog/mapreduce/InputJobInfo.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/InputJobInfo.java (revision 1305484) +++ src/java/org/apache/hcatalog/mapreduce/InputJobInfo.java (working copy) @@ -20,9 +20,7 @@ import org.apache.hadoop.hive.metastore.MetaStoreUtils; import java.io.Serializable; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Properties; /** The class used to serialize and store the information read from the metadata server */ @@ -47,9 +45,6 @@ /** implementation specific job properties */ private Properties properties; - /** job properties */ - private Map jobProperties; - /** * Initializes a new InputJobInfo * for reading data from a table. @@ -132,8 +127,8 @@ } /** - * Set/Get Property information to be passed down to *StorageDriver implementation - * put implementation specific storage driver configurations here + * Set/Get Property information to be passed down to *StorageHandler implementation + * put implementation specific storage handler configurations here * @return the implementation specific job properties */ public Properties getProperties() { Index: src/java/org/apache/hcatalog/mapreduce/InternalUtil.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/InternalUtil.java (revision 1305484) +++ src/java/org/apache/hcatalog/mapreduce/InternalUtil.java (working copy) @@ -38,7 +38,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hcatalog.common.HCatConstants; import org.apache.hcatalog.common.HCatUtil; import org.apache.hcatalog.data.schema.HCatFieldSchema; import org.apache.hcatalog.data.schema.HCatSchema; @@ -64,7 +63,7 @@ } - return new StorerInfo(null, null, + return new StorerInfo( sd.getInputFormat(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), properties.get(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_STORAGE), hcatProperties); Index: src/java/org/apache/hcatalog/mapreduce/OutputFormatContainer.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/OutputFormatContainer.java (revision 1305484) +++ src/java/org/apache/hcatalog/mapreduce/OutputFormatContainer.java (working copy) @@ -28,7 +28,7 @@ * behavior necessary to work with HCatalog (ie metastore updates, hcatalog delegation tokens, etc). * Containers are also used to provide storage specific implementations of some HCatalog features (ie dynamic partitioning). * Hence users wishing to create storage specific implementations of HCatalog features should implement this class and override - * HCatOutputStorageDriver.getOutputFormatContainer() to return the implementation. + * HCatStorageHandler.getOutputFormatContainer(OutputFormat outputFormat) to return the implementation. * By default DefaultOutputFormatContainer is used, which only implements the bare minimum features HCatalog features * such as partitioning isn't supported. */ Index: src/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java (revision 1305484) +++ src/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java (working copy) @@ -123,7 +123,7 @@ } /** - * @return the storageDriver + * @return the storagehandler */ public HCatStorageHandler getStorageHandler() { return storageHandler; Index: src/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java (revision 1305484) +++ src/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java (working copy) @@ -55,7 +55,7 @@ } /** - * Get the record writer for the job. Uses the Table's default OutputStorageDriver + * Get the record writer for the job. Uses the storagehandler's OutputFormat * to get the record writer. * @param context the information about the current task. * @return a RecordWriter to write the output for the job. Index: src/java/org/apache/hcatalog/mapreduce/HCatOutputStorageDriver.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/HCatOutputStorageDriver.java (revision 1305484) +++ src/java/org/apache/hcatalog/mapreduce/HCatOutputStorageDriver.java (working copy) @@ -1,183 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobStatus.State; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; - - -/** The abstract class to be implemented by underlying storage drivers to enable data access from HCat through - * HCatOutputFormat. - */ -public abstract class HCatOutputStorageDriver { - - - /** - * Initialize the storage driver with specified properties, default implementation does nothing. - * @param context the job context object - * @param hcatProperties the properties for the storage driver - * @throws IOException Signals that an I/O exception has occurred. - */ - public void initialize(JobContext context, Properties hcatProperties) throws IOException { - } - - /** - * Returns the OutputFormat to use with this Storage Driver. - * @return the OutputFormat instance - * @throws IOException Signals that an I/O exception has occurred. - */ - public abstract OutputFormat, ? extends Writable> getOutputFormat() throws IOException; - - /** - * Set the data location for the output. - * @param jobContext the job context object - * @param location the data location - * @throws IOException Signals that an I/O exception has occurred. - */ - public abstract void setOutputPath(JobContext jobContext, String location) throws IOException; - - /** - * Set the schema for the data being written out. - * @param jobContext the job context object - * @param schema the data schema - * @throws IOException Signals that an I/O exception has occurred. - */ - public abstract void setSchema(JobContext jobContext, HCatSchema schema) throws IOException; - - /** - * Sets the partition key values for the partition being written. - * @param jobContext the job context object - * @param partitionValues the partition values - * @throws IOException Signals that an I/O exception has occurred. - */ - public abstract void setPartitionValues(JobContext jobContext, Map partitionValues) throws IOException; - - /** - * Generate the key for the underlying outputformat. The value given to HCatOutputFormat is passed as the - * argument. The key given to HCatOutputFormat is ignored.. - * @param value the value given to HCatOutputFormat - * @return a key instance - * @throws IOException Signals that an I/O exception has occurred. - */ - public abstract WritableComparable generateKey(HCatRecord value) throws IOException; - - /** - * Convert the given HCatRecord value to the actual value type. - * @param value the HCatRecord value to convert - * @return a value instance - * @throws IOException Signals that an I/O exception has occurred. - */ - public abstract Writable convertValue(HCatRecord value) throws IOException; - - /** - * Gets the location to use for the specified partition values. - * The storage driver can override as required. - * @param jobContext the job context object - * @param tableLocation the location of the table - * @param partitionValues the partition values - * @param dynHash A unique hash value that represents the dynamic partitioning job used - * @return the location String. - * @throws IOException Signals that an I/O exception has occurred. - */ - public String getOutputLocation(JobContext jobContext, - String tableLocation, List partitionCols, Map partitionValues, String dynHash) throws IOException { - return null; - } - - /** Storage drivers wrapping other OutputFormats should override this method. - */ - public Path getWorkFilePath(TaskAttemptContext context, String outputLoc) throws IOException{ - return null; - } - - /** - * Implementation that calls the underlying output committer's setupJob, - * used in lieu of underlying committer's setupJob when using dynamic partitioning - * The default implementation should be overriden by underlying implementations - * that do not use FileOutputCommitter. - * The reason this function exists is so as to allow a storage driver implementor to - * override underlying OutputCommitter's setupJob implementation to allow for - * being called multiple times in a job, to make it idempotent. - * This should be written in a manner that is callable multiple times - * from individual tasks without stepping on each others' toes - * - * @param context - * @throws InterruptedException - * @throws IOException - */ - public void setupOutputCommitterJob(TaskAttemptContext context) - throws IOException, InterruptedException{ - getOutputFormat().getOutputCommitter(context).setupJob(context); - } - - /** - * Implementation that calls the underlying output committer's cleanupJob, - * used in lieu of underlying committer's cleanupJob when using dynamic partitioning - * This should be written in a manner that is okay to call after having had - * multiple underlying outputcommitters write to task dirs inside it. - * While the base MR cleanupJob should have sufficed normally, this is provided - * in order to let people implementing setupOutputCommitterJob to cleanup properly - * - * @param context - * @throws IOException - */ - public void cleanupOutputCommitterJob(TaskAttemptContext context) - throws IOException, InterruptedException{ - getOutputFormat().getOutputCommitter(context).cleanupJob(context); - } - - /** - * Implementation that calls the underlying output committer's abortJob, - * used in lieu of underlying committer's abortJob when using dynamic partitioning - * This should be written in a manner that is okay to call after having had - * multiple underlying outputcommitters write to task dirs inside it. - * While the base MR cleanupJob should have sufficed normally, this is provided - * in order to let people implementing setupOutputCommitterJob to abort properly - * - * @param context - * @param state - * @throws IOException - */ - public void abortOutputCommitterJob(TaskAttemptContext context, State state) - throws IOException, InterruptedException{ - getOutputFormat().getOutputCommitter(context).abortJob(context,state); - } - - /** - * return an instance of OutputFormatContainer containing the passed outputFormat. DefaultOutputFormatContainer is returned by default. - * @param outputFormat format the returned container will contain - * @return - */ - - //TODO broken this entire class will disappear anyway - OutputFormatContainer getOutputFormatContainer(OutputFormat outputFormat) { - return new DefaultOutputFormatContainer(null); - } - -} Index: src/java/org/apache/hcatalog/mapreduce/FileOutputStorageDriver.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/FileOutputStorageDriver.java (revision 1305484) +++ src/java/org/apache/hcatalog/mapreduce/FileOutputStorageDriver.java (working copy) @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; -import org.apache.hadoop.security.AccessControlException; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -/** - * Base class for File-based OutputStorageDrivers to extend. Provides subclasses - * the convenience of not having to rewrite mechanisms such as, dynamic - * partitioning, partition registration, success file, etc. - */ -public abstract class FileOutputStorageDriver extends HCatOutputStorageDriver { - - /** The directory under which data is initially written for a partitioned table */ - protected static final String DYNTEMP_DIR_NAME = "_DYN"; - - /** The directory under which data is initially written for a non partitioned table */ - protected static final String TEMP_DIR_NAME = "_TEMP"; - private OutputFormat, ? super Writable> outputFormat; - - - @Override - public void initialize(JobContext jobContext, Properties hcatProperties) throws IOException { - super.initialize(jobContext, hcatProperties); - } - - @Override - public final String getOutputLocation(JobContext jobContext, - String tableLocation, List partitionCols, Map partitionValues, String dynHash) throws IOException { - String parentPath = tableLocation; - // For dynamic partitioned writes without all keyvalues specified, - // we create a temp dir for the associated write job - if (dynHash != null){ - parentPath = new Path(tableLocation, DYNTEMP_DIR_NAME+dynHash).toString(); - } - - // For non-partitioned tables, we send them to the temp dir - if((dynHash == null) && ( partitionValues == null || partitionValues.size() == 0 )) { - return new Path(tableLocation, TEMP_DIR_NAME).toString(); - } - - List values = new ArrayList(); - for(String partitionCol : partitionCols) { - values.add(partitionValues.get(partitionCol)); - } - - String partitionLocation = FileUtils.makePartName(partitionCols, values); - - Path path = new Path(parentPath, partitionLocation); - return path.toString(); - } - - @Override - public final Path getWorkFilePath(TaskAttemptContext context, String outputLoc) throws IOException{ - return new Path(new FileOutputCommitter(new Path(outputLoc), context).getWorkPath(), org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.getUniqueFile(context, "part", "")); - } - - /** - * Any initialization of file paths, set permissions and group on freshly created files - * This is called at RecordWriter instantiation time which can be at write-time for - * a dynamic partitioning usecase - * @param context - * @throws IOException - */ - static void prepareOutputLocation(HCatOutputStorageDriver osd, TaskAttemptContext context) throws IOException { - OutputJobInfo info = HCatBaseOutputFormat.getJobInfo(context); -// Path workFile = osd.getWorkFilePath(context,info.getLocation()); - Path workFile = osd.getWorkFilePath(context,context.getConfiguration().get("mapred.output.dir")); - Path tblPath = new Path(info.getTableInfo().getTable().getSd().getLocation()); - FileSystem fs = tblPath.getFileSystem(context.getConfiguration()); - FileStatus tblPathStat = fs.getFileStatus(tblPath); - -// LOG.info("Attempting to set permission ["+tblPathStat.getPermission()+"] on ["+ -// workFile+"], location=["+info.getLocation()+"] , mapred.locn =["+ -// context.getConfiguration().get("mapred.output.dir")+"]"); -// -// FileStatus wFileStatus = fs.getFileStatus(workFile); -// LOG.info("Table : "+tblPathStat.getPath()); -// LOG.info("Working File : "+wFileStatus.getPath()); - - fs.setPermission(workFile, tblPathStat.getPermission()); - try{ - fs.setOwner(workFile, null, tblPathStat.getGroup()); - } catch(AccessControlException ace){ - // log the messages before ignoring. Currently, logging is not built in HCat. - } - } - - @Override - OutputFormatContainer getOutputFormatContainer(OutputFormat outputFormat) { - //broken - return new FileOutputFormatContainer(null); - } -} - Index: src/java/org/apache/hcatalog/mapreduce/HCatBaseOutputFormat.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/HCatBaseOutputFormat.java (revision 1305484) +++ src/java/org/apache/hcatalog/mapreduce/HCatBaseOutputFormat.java (working copy) @@ -23,9 +23,6 @@ import java.util.List; import java.util.Map; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.io.WritableComparable; @@ -98,9 +95,8 @@ } /** - * Gets the output storage driver instance. + * Configure the output storage handler * @param jobContext the job context - * @return the output driver instance * @throws IOException */ @SuppressWarnings("unchecked") @@ -110,9 +106,9 @@ } /** - * Gets the output storage driver instance, with allowing specification of missing dynamic partvals + * Configure the output storage handler with allowing specification of missing dynamic partvals * @param jobContext the job context - * @return the output driver instance + * @param dynamicPartVals * @throws IOException */ @SuppressWarnings("unchecked") @@ -130,7 +126,7 @@ List dynamicPartKeys = jobInfo.getDynamicPartitioningKeys(); if (dynamicPartVals.size() != dynamicPartKeys.size()){ throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, - "Unable to instantiate dynamic partitioning storage driver, mismatch between" + "Unable to configure dynamic partitioning for storage handler, mismatch between" + " number of partition values obtained["+dynamicPartVals.size() + "] and number of partition values required["+dynamicPartKeys.size()+"]"); } @@ -153,16 +149,17 @@ if (e instanceof HCatException){ throw (HCatException)e; }else{ - throw new HCatException(ErrorType.ERROR_INIT_STORAGE_DRIVER, e); + throw new HCatException(ErrorType.ERROR_INIT_STORAGE_HANDLER, e); } } } /** - * Gets the output storage driver instance, with allowing specification + * Configure the output storage handler, with allowing specification * of partvals from which it picks the dynamic partvals * @param context the job context * @param jobInfo the output job info + * @param fullPartSpec * @throws IOException */ @@ -191,7 +188,7 @@ // So, find out positions of partition columns in schema provided by user. // We also need to update the output Schema with these deletions. - // Note that, output storage drivers never sees partition columns in data + // Note that, output storage handlers never sees partition columns in data // or schema. HCatSchema schemaWithoutParts = new HCatSchema(schema.getFields()); Index: src/java/org/apache/hcatalog/mapreduce/OutputJobInfo.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/OutputJobInfo.java (revision 1305484) +++ src/java/org/apache/hcatalog/mapreduce/OutputJobInfo.java (working copy) @@ -205,8 +205,8 @@ } /** - * Set/Get Property information to be passed down to *StorageDriver implementation - * put implementation specific storage driver configurations here + * Set/Get Property information to be passed down to *StorageHandler implementation + * put implementation specific storage handler configurations here * @return the implementation specific job properties */ public Properties getProperties() { Index: src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java (revision 1305484) +++ src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java (working copy) @@ -62,7 +62,7 @@ /** * Set the info about the output to write for the Job. This queries the metadata server - * to find the StorageDriver to use for the table. Throws error if partition is already published. + * to find the StorageHandler to use for the table. Throws error if partition is already published. * @param job the job object * @param outputJobInfo the table output info * @throws IOException the exception in communicating with the metadata server @@ -221,7 +221,7 @@ } /** - * Get the record writer for the job. Uses the Table's default OutputStorageDriver + * Get the record writer for the job. Uses the StorageHandler's default OutputFormat * to get the record writer. * @param context the information about the current task. * @return a RecordWriter to write the output for the job. Index: src/java/org/apache/hcatalog/mapreduce/InitializeInput.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/InitializeInput.java (revision 1305484) +++ src/java/org/apache/hcatalog/mapreduce/InitializeInput.java (working copy) @@ -67,7 +67,7 @@ private static final Log LOG = LogFactory.getLog(InitializeInput.class); - /** The prefix for keys used for storage driver arguments */ + /** The prefix for keys used for storage handler arguments */ static final String HCAT_KEY_PREFIX = "hcat."; private static HiveConf hiveConf; Index: src/java/org/apache/hcatalog/mapreduce/StorerInfo.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/StorerInfo.java (revision 1305484) +++ src/java/org/apache/hcatalog/mapreduce/StorerInfo.java (working copy) @@ -25,15 +25,7 @@ /** The serialization version */ private static final long serialVersionUID = 1L; - //TODO remove this - /** The name of the input storage driver class */ - private String inputSDClass; - - //TODO remove this - /** The name of the output storage driver class */ - private String outputSDClass; - - /** The properties for the storage driver */ + /** The properties for the storage handler */ private Properties properties; private String ofClass; @@ -44,31 +36,16 @@ private String storageHandlerClass; - - //TODO remove this /** - * Initialize the storage driver - * @param inputSDClass - * @param outputSDClass + * Initialize the storer info + * @param ifClass + * @param ofClass + * @param serdeClass + * @param storageHandlerClass * @param properties */ - public StorerInfo(String inputSDClass, String outputSDClass, Properties properties) { + public StorerInfo(String ifClass, String ofClass, String serdeClass, String storageHandlerClass, Properties properties) { super(); - this.inputSDClass = inputSDClass; - this.outputSDClass = outputSDClass; - this.properties = properties; - } - - /** - * Initialize the storage driver - * @param inputSDClass - * @param outputSDClass - * @param properties - */ - public StorerInfo(String inputSDClass, String outputSDClass, String ifClass, String ofClass, String serdeClass, String storageHandlerClass, Properties properties) { - super(); - this.inputSDClass = inputSDClass; - this.outputSDClass = outputSDClass; this.ifClass =ifClass; this.ofClass = ofClass; this.serdeClass = serdeClass; @@ -76,21 +53,7 @@ this.properties = properties; } - /** - * @return the inputSDClass - */ - public String getInputSDClass() { - return inputSDClass; - } - - /** - * @return the outputSDClass - */ - public String getOutputSDClass() { - return outputSDClass; - } - -public String getIfClass() { + public String getIfClass() { return ifClass; } Index: src/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateTableHook.java =================================================================== --- src/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateTableHook.java (revision 1305484) +++ src/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateTableHook.java (working copy) @@ -24,36 +24,26 @@ import java.util.Map; import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.ql.exec.DDLTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.ql.metadata.AuthorizationException; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; -import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; import org.apache.hadoop.hive.ql.security.authorization.Privilege; import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; import org.apache.hcatalog.common.HCatUtil; import org.apache.hcatalog.mapreduce.HCatStorageHandler; -import org.apache.hcatalog.rcfile.RCFileInputDriver; -import org.apache.hcatalog.rcfile.RCFileOutputDriver; final class CreateTableHook extends HCatSemanticAnalyzerBase { Index: src/java/org/apache/hcatalog/common/ErrorType.java =================================================================== --- src/java/org/apache/hcatalog/common/ErrorType.java (revision 1305484) +++ src/java/org/apache/hcatalog/common/ErrorType.java (working copy) @@ -34,7 +34,7 @@ ERROR_DUPLICATE_PARTITION (2002, "Partition already present with given partition key values"), ERROR_NON_EMPTY_TABLE (2003, "Non-partitioned table already contains data"), ERROR_NOT_INITIALIZED (2004, "HCatOutputFormat not initialized, setOutput has to be called"), - ERROR_INIT_STORAGE_DRIVER (2005, "Error initializing output storage driver instance"), + ERROR_INIT_STORAGE_HANDLER (2005, "Error initializing storage handler instance"), ERROR_PUBLISHING_PARTITION (2006, "Error adding partition to metastore"), ERROR_SCHEMA_COLUMN_MISMATCH (2007, "Invalid column position in partition schema"), ERROR_SCHEMA_PARTITION_KEY (2008, "Partition key cannot be present in the partition data"), Index: src/java/org/apache/hcatalog/common/HCatConstants.java =================================================================== --- src/java/org/apache/hcatalog/common/HCatConstants.java (revision 1305484) +++ src/java/org/apache/hcatalog/common/HCatConstants.java (working copy) @@ -25,8 +25,6 @@ public static final String HIVE_RCFILE_IF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileInputFormat"; public static final String HIVE_RCFILE_OF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileOutputFormat"; - public static final String HCAT_RCFILE_ISD_CLASS = "org.apache.hcatalog.rcfile.RCFileInputDriver"; - public static final String HCAT_RCFILE_OSD_CLASS = "org.apache.hcatalog.rcfile.RCFileOutputDriver"; public static final String SEQUENCEFILE_INPUT = SequenceFileInputFormat.class.getName(); public static final String SEQUENCEFILE_OUTPUT = SequenceFileOutputFormat.class.getName(); Index: storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java =================================================================== --- storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java (revision 1305484) +++ storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java (working copy) @@ -352,7 +352,7 @@ String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + "(key int, english string, spanish string) STORED BY " + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + - "TBLPROPERTIES ('"+HBaseConstants.PROPERTY_OSD_BULK_MODE_KEY+"'='true',"+ + "TBLPROPERTIES ('"+HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY+"'='true',"+ "'hbase.columns.mapping'=':key,"+familyName+":english,"+familyName+":spanish')" ; assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); @@ -446,7 +446,7 @@ String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + "(key int, english string, spanish string) STORED BY " + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + - "TBLPROPERTIES ('"+HBaseConstants.PROPERTY_OSD_BULK_MODE_KEY+"'='true',"+ + "TBLPROPERTIES ('"+HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY+"'='true',"+ "'hbase.columns.mapping'=':key,"+familyName+":english,"+familyName+":spanish')" ; assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); @@ -525,7 +525,7 @@ String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + "(key int, english string, spanish string) STORED BY " + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + - "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_OSD_BULK_MODE_KEY + "'='true'," + + "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + ":spanish')"; @@ -579,7 +579,7 @@ ResultScanner scanner = table.getScanner(scan); assertFalse(scanner.iterator().hasNext()); - // verify that the input storage driver returns empty results. + // verify that the storage handler input format returns empty results. Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableBulkIgnoreAbortedTransactions"); FileSystem fs = getFileSystem(); Index: storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseConstants.java =================================================================== --- storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseConstants.java (revision 1305484) +++ storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseConstants.java (working copy) @@ -22,7 +22,7 @@ import org.apache.hcatalog.common.HCatConstants; /** - * Constants class for constants used in HBase storage driver. + * Constants class for constants used in HBase storage handler. */ class HBaseConstants { @@ -35,8 +35,8 @@ /** key used to define the column mapping of hbase to hcatalog schema */ public static final String PROPERTY_COLUMN_MAPPING_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX+"."+ HBaseSerDe.HBASE_COLUMNS_MAPPING; - /** key used to define wether bulk storage driver will be used or not */ - public static final String PROPERTY_OSD_BULK_MODE_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX+".hbase.output.bulkMode"; + /** key used to define whether bulk storage output format will be used or not */ + public static final String PROPERTY_BULK_OUTPUT_MODE_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX+".hbase.output.bulkMode"; /** key used to define the hbase table snapshot. */ public static final String PROPERTY_TABLE_SNAPSHOT_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + "hbase.table.snapshot"; Index: storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java =================================================================== --- storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java (revision 1305484) +++ storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java (working copy) @@ -556,7 +556,7 @@ public static boolean isBulkMode(OutputJobInfo outputJobInfo) { //Default is false String bulkMode = outputJobInfo.getTableInfo().getStorerInfo().getProperties() - .getProperty(HBaseConstants.PROPERTY_OSD_BULK_MODE_KEY, + .getProperty(HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY, "false"); return "true".equals(bulkMode); }