diff --git data/files/people.txt data/files/people.txt new file mode 100644 index 0000000..44408a8 --- /dev/null +++ data/files/people.txt @@ -0,0 +1,100 @@ +Dalton Meyers +Keane Y. Abbott +Tarik L. Gibbs +Bert I. Gilliam +Jayme Hurst +Lesley F. Robles +Evangeline Espinoza +Georgia Cantrell +Kiayada K. Carey +McKenzie Valenzuela +Lavinia M. Maddox +Wanda Garcia +Paul E. Cooper +Graham W. Rojas +Melvin A. Ortega +Devin Rice +Dorian P. Barron +Kenneth C. Stewart +Yuri Mullins +Joelle Miles +Abdul Keith +Charity R. Castaneda +Chanda Cherry +Keely Z. Jacobs +Samuel E. Mitchell +Darryl W. Huber +Carly V. Ward +Kalia Burns +Kristen Price +Aquila X. Cooper +Rachel I. Gill +Shay Goodwin +Camden E. Ellison +Cole F. Rice +Quincy G. Sharpe +Jacqueline G. Justice +Athena D. Kent +Genevieve G. Boyd +Larissa Strong +Branden Byrd +Madeson B. Guzman +Ignacia Ellis +Josephine Oneal +Jakeem Coffey +Scarlet Greene +Francis A. Bradshaw +Jerry Keith +Kuame F. Schultz +Ferdinand F. Adkins +Lamar L. Dale +Todd Graham +Connor Calhoun +Emily N. Stein +Geoffrey L. Witt +Nayda Hale +Dexter X. Benton +Sandra Contreras +Xavier Montgomery +Hector F. Craig +Pascale Winters +Drake Wiley +Barclay Mckay +Briar Orr +Keith H. Hendricks +Beau Le +Hanna Becker +Cadman Briggs +Lewis F. Carrillo +Kaseem Austin +Madeline Rodriquez +Dakota Calhoun +Odette X. Delgado +Jonah H. Ellis +Laurel W. Morse +Hunter P. Johns +Dillon F. Washington +Karleigh K. Conway +Nyssa Dennis +Kennedy Church +August Kent +Keegan Q. Winters +Chester Whitley +Drake T. Burt +Marvin Rivas +Deanna Holman +Lillian Sargent +Ella R. Hudson +Tanya B. Gay +Judith M. Beach +Wallace Mueller +Olga Matthews +Raphael Shaffer +Jena P. Morin +Gray V. Woods +Breanna Nash +Miranda Randolph +Nichole Mccormick +September J. Dominguez +Slade Pearson +Caryn Barrera \ No newline at end of file diff --git metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote old mode 100644 new mode 100755 diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index 6daf199..aeefdbb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -26,6 +26,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.UUID; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -34,10 +35,8 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; -import org.apache.hadoop.hive.ql.exec.FooterBuffer; import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveRecordReader; @@ -48,7 +47,6 @@ import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; -import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.DelegatedObjectInspectorFactory; @@ -59,11 +57,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.InputFormat; @@ -81,6 +76,11 @@ static Log LOG = LogFactory.getLog(FetchOperator.class.getName()); static LogHelper console = new LogHelper(LOG); + public static final String FETCH_OPERATOR_TABLE_CALL_ID = + "hive.fetchoperator.table.call.id"; + public static final String FETCH_OPERATOR_DIRECTORY_LIST = + "hive.complete.dir.list"; + private boolean isNativeTable; private FetchWork work; protected Operator operator; // operator tree for processing row further (option) @@ -352,6 +352,7 @@ private void getNextPath() throws Exception { } return; } else { + setFetchOperatorContext(job, work.getPartDir()); iterPath = work.getPartDir().iterator(); iterPartDesc = work.getPartDesc().iterator(); } @@ -380,6 +381,31 @@ private void getNextPath() throws Exception { } /** + * Set context for this fetch operator in to the jobconf. + * This helps InputFormats make decisions based on the scope of the complete + * operation. + * @param conf the configuration to modify + * @param partDirs the list of partition directories + */ + static void setFetchOperatorContext(JobConf conf, + ArrayList partDirs) { + conf.set(FETCH_OPERATOR_TABLE_CALL_ID, UUID.randomUUID().toString()); + if (partDirs != null) { + StringBuilder buff = new StringBuilder(); + boolean first = true; + for(Path p: partDirs) { + if (first) { + first = false; + } else { + buff.append('\t'); + } + buff.append(p.toString()); + } + conf.set(FETCH_OPERATOR_DIRECTORY_LIST, buff.toString()); + } + } + + /** * A cache of Object Inspector Settable Properties. */ private static Map oiSettableProperties = new HashMap(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java index 487bb33..2c9e81f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java @@ -697,6 +697,7 @@ public MergeQueue(String alias, FetchWork fetchWork, JobConf jobConf, // But if hive supports assigning bucket number for each partition, this can be vary public void setupContext(List paths) throws HiveException { int segmentLen = paths.size(); + FetchOperator.setFetchOperatorContext(jobConf, fetchWork.getPartDir()); FetchOperator[] segments = segmentsForSize(segmentLen); for (int i = 0 ; i < segmentLen; i++) { Path path = paths.get(i); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index c52a093..bc78c03 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -2107,13 +2107,11 @@ public static int getDefaultNotificationInterval(Configuration hconf) { * configuration which receives configured properties */ public static void copyTableJobPropertiesToConf(TableDesc tbl, JobConf job) { - String bucketString = tbl.getProperties() - .getProperty(hive_metastoreConstants.BUCKET_COUNT); - // copy the bucket count - if (bucketString != null) { - job.set(hive_metastoreConstants.BUCKET_COUNT, bucketString); + Properties tblProperties = tbl.getProperties(); + for(String name: tblProperties.stringPropertyNames()) { + job.set(serdeConstants.TABLE_PROP_PREFIX + name, + (String) tblProperties.get(name)); } - Map jobProperties = tbl.getJobProperties(); if (jobProperties == null) { return; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 7edb3c2..82237bf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -54,6 +54,7 @@ import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -376,7 +377,8 @@ public boolean validateInput(FileSystem fs, HiveConf conf, footerInSplits = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS); numBuckets = - Math.max(conf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0), 0); + Math.max(conf.getInt(serdeConstants.TABLE_PROP_PREFIX + + hive_metastoreConstants.BUCKET_COUNT, 0), 0); int cacheStripeDetailsSize = HiveConf.getIntVar(conf, ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE); int numThreads = HiveConf.getIntVar(conf, diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java index bbde09c..780322f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java @@ -18,17 +18,24 @@ package org.apache.hadoop.hive.ql.exec; +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import junit.framework.TestCase; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.io.IOContext; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.CollectDesc; @@ -42,6 +49,10 @@ import org.apache.hadoop.hive.ql.plan.ScriptDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.processors.CommandProcessor; +import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -49,8 +60,14 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.TextInputFormat; +import org.junit.Test; /** * TestOperators. @@ -274,7 +291,7 @@ public void testScriptOperator() throws Throwable { cd, sop); op.initialize(new JobConf(TestOperators.class), - new ObjectInspector[] {r[0].oi}); + new ObjectInspector[]{r[0].oi}); // evaluate on row for (int i = 0; i < 5; i++) { @@ -379,4 +396,68 @@ public void testMapOperator() throws Throwable { throw (e); } } + + /** + * A custom input format that checks to make sure that the fetch operator + * sets the required attributes. + */ + public static class CustomInFmt extends TextInputFormat { + + @Override + public InputSplit[] getSplits(JobConf job, int splits) throws IOException { + + // ensure that the table properties were copied + assertEquals("val1", job.get("table.myprop1")); + assertEquals("val2", job.get("table.myprop2")); + + // ensure the fetch id was set. + assertNotNull(job.get("hive.fetchoperator.table.call.id")); + + // ensure that both of the partitions are in the complete list. + String[] dirs = job.get("hive.complete.dir.list").split("\t"); + assertEquals(2, dirs.length); + assertEquals(true, dirs[0].endsWith("/state=CA")); + assertEquals(true, dirs[1].endsWith("/state=OR")); + return super.getSplits(job, splits); + } + } + + @Test + public void testFetchOperatorContext() throws Exception { + HiveConf conf = new HiveConf(); + conf.set("hive.support.concurrency", "false"); + SessionState.start(conf); + String cmd = "create table fetchOp (name string) " + + "partitioned by (state string) " + + "stored as " + + "inputformat 'org.apache.hadoop.hive.ql.exec.TestOperators$CustomInFmt' " + + "outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' " + + "tblproperties ('myprop1'='val1', 'myprop2' = 'val2')"; + Driver driver = new Driver(); + driver.init(); + CommandProcessorResponse response = driver.run(cmd); + assertEquals(0, response.getResponseCode()); + List result = new ArrayList(); + + cmd = "load data local inpath '../data/files/people.txt' " + + "overwrite into table fetchOp partition (state='CA')"; + driver.init(); + response = driver.run(cmd); + assertEquals(0, response.getResponseCode()); + + cmd = "load data local inpath '../data/files/people.txt' " + + "overwrite into table fetchOp partition (state='OR')"; + driver.init(); + response = driver.run(cmd); + assertEquals(0, response.getResponseCode()); + + cmd = "select * from fetchOp"; + driver.init(); + driver.setMaxRows(500); + response = driver.run(cmd); + assertEquals(0, response.getResponseCode()); + driver.getResults(result); + assertEquals(200, result.size()); + driver.close(); + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index 5664f3f..da2405a 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -73,6 +73,7 @@ import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -1346,7 +1347,8 @@ public void testVectorizationWithBuckets() throws Exception { .setBlocks(new MockBlock("host0", "host1")); // call getsplits - conf.setInt(hive_metastoreConstants.BUCKET_COUNT, 3); + conf.setInt(serdeConstants.TABLE_PROP_PREFIX + + hive_metastoreConstants.BUCKET_COUNT, 3); HiveInputFormat inputFormat = new HiveInputFormat(); InputSplit[] splits = inputFormat.getSplits(conf, 10); diff --git serde/if/serde.thrift serde/if/serde.thrift index 31c87ee..47cc34e 100644 --- serde/if/serde.thrift +++ serde/if/serde.thrift @@ -68,6 +68,8 @@ const string UNION_TYPE_NAME = "uniontype"; const string LIST_COLUMNS = "columns"; const string LIST_COLUMN_TYPES = "columns.types"; +const string TABLE_PROP_PREFIX = "table."; + const set PrimitiveTypes = [ VOID_TYPE_NAME BOOLEAN_TYPE_NAME TINYINT_TYPE_NAME SMALLINT_TYPE_NAME INT_TYPE_NAME BIGINT_TYPE_NAME FLOAT_TYPE_NAME DOUBLE_TYPE_NAME STRING_TYPE_NAME VARCHAR_TYPE_NAME CHAR_TYPE_NAME DATE_TYPE_NAME DATETIME_TYPE_NAME TIMESTAMP_TYPE_NAME DECIMAL_TYPE_NAME BINARY_TYPE_NAME], const set CollectionTypes = [ LIST_TYPE_NAME MAP_TYPE_NAME ], const set IntegralTypes = [ TINYINT_TYPE_NAME SMALLINT_TYPE_NAME INT_TYPE_NAME BIGINT_TYPE_NAME ], diff --git serde/src/gen/thrift/gen-cpp/serde_constants.cpp serde/src/gen/thrift/gen-cpp/serde_constants.cpp index 54503e3..6fadc2f 100644 --- serde/src/gen/thrift/gen-cpp/serde_constants.cpp +++ serde/src/gen/thrift/gen-cpp/serde_constants.cpp @@ -87,6 +87,8 @@ serdeConstants::serdeConstants() { LIST_COLUMN_TYPES = "columns.types"; + TABLE_PROP_PREFIX = "table."; + PrimitiveTypes.insert("void"); PrimitiveTypes.insert("boolean"); PrimitiveTypes.insert("tinyint"); diff --git serde/src/gen/thrift/gen-cpp/serde_constants.h serde/src/gen/thrift/gen-cpp/serde_constants.h index d56c917..6ca9207 100644 --- serde/src/gen/thrift/gen-cpp/serde_constants.h +++ serde/src/gen/thrift/gen-cpp/serde_constants.h @@ -53,6 +53,7 @@ class serdeConstants { std::string UNION_TYPE_NAME; std::string LIST_COLUMNS; std::string LIST_COLUMN_TYPES; + std::string TABLE_PROP_PREFIX; std::set PrimitiveTypes; std::set CollectionTypes; std::set IntegralTypes; diff --git serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java index 515cf25..202e84f 100644 --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java @@ -109,6 +109,8 @@ public static final String LIST_COLUMN_TYPES = "columns.types"; + public static final String TABLE_PROP_PREFIX = "table."; + public static final Set PrimitiveTypes = new HashSet(); static { PrimitiveTypes.add("void"); diff --git serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php index 837dd11..534ee0f 100644 --- serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php +++ serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php @@ -92,6 +92,8 @@ $GLOBALS['serde_CONSTANTS']['LIST_COLUMNS'] = "columns"; $GLOBALS['serde_CONSTANTS']['LIST_COLUMN_TYPES'] = "columns.types"; +$GLOBALS['serde_CONSTANTS']['TABLE_PROP_PREFIX'] = "table."; + $GLOBALS['serde_CONSTANTS']['PrimitiveTypes'] = array( "void" => true, "boolean" => true, diff --git serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py index 8eac87d..fdc40ca 100644 --- serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py +++ serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py @@ -47,6 +47,7 @@ UNION_TYPE_NAME = "uniontype" LIST_COLUMNS = "columns" LIST_COLUMN_TYPES = "columns.types" +TABLE_PROP_PREFIX = "table." PrimitiveTypes = set([ "void", "boolean", diff --git serde/src/gen/thrift/gen-rb/serde_constants.rb serde/src/gen/thrift/gen-rb/serde_constants.rb index ed86522..b9da90f 100644 --- serde/src/gen/thrift/gen-rb/serde_constants.rb +++ serde/src/gen/thrift/gen-rb/serde_constants.rb @@ -83,6 +83,8 @@ LIST_COLUMNS = %q"columns" LIST_COLUMN_TYPES = %q"columns.types" +TABLE_PROP_PREFIX = %q"table." + PrimitiveTypes = Set.new([ %q"void", %q"boolean", diff --git service/src/gen/thrift/gen-py/TCLIService/TCLIService-remote service/src/gen/thrift/gen-py/TCLIService/TCLIService-remote old mode 100644 new mode 100755 diff --git service/src/gen/thrift/gen-py/hive_service/ThriftHive-remote service/src/gen/thrift/gen-py/hive_service/ThriftHive-remote old mode 100644 new mode 100755