diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 32ab3d8..e48298f 100644
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -223,6 +223,9 @@
// ignore the mapjoin hint
HIVEIGNOREMAPJOINHINT("hive.ignore.mapjoin.hint", true),
+ // max number of footer user can set for a table file
+ HIVE_FILE_MAX_FOOTER("hive.file.max.footer", 100),
+
// Hadoop Configuration Properties
// Properties with null values are ignored and exist only for the purpose of giving us
// a symbolic name to reference in the Hive source code. Properties with non-null
diff --git data/files/header_footer_table_1/0001.txt data/files/header_footer_table_1/0001.txt
new file mode 100644
index 0000000..c242b42
--- /dev/null
+++ data/files/header_footer_table_1/0001.txt
@@ -0,0 +1,8 @@
+name message 0
+steven hive 1
+dave oozie 2
+xifa phd 3
+chuan hadoop 4
+shanyu senior 5
+footer1 footer1 0
+footer2 0
\ No newline at end of file
diff --git data/files/header_footer_table_1/0002.txt data/files/header_footer_table_1/0002.txt
new file mode 100644
index 0000000..d5db38d
--- /dev/null
+++ data/files/header_footer_table_1/0002.txt
@@ -0,0 +1,8 @@
+name message 0
+steven2 hive 11
+dave2 oozie 12
+xifa2 phd 13
+chuan2 hadoop 14
+shanyu2 senior 15
+footer1 footer1 0
+footer2 0
\ No newline at end of file
diff --git data/files/header_footer_table_1/0003.txt data/files/header_footer_table_1/0003.txt
new file mode 100644
index 0000000..f7a763d
--- /dev/null
+++ data/files/header_footer_table_1/0003.txt
@@ -0,0 +1,4 @@
+name message 0
+david3 oozie 22
+footer1 footer1 0
+footer2 0
\ No newline at end of file
diff --git data/files/header_footer_table_2/2012/01/01/0001.txt data/files/header_footer_table_2/2012/01/01/0001.txt
new file mode 100644
index 0000000..c242b42
--- /dev/null
+++ data/files/header_footer_table_2/2012/01/01/0001.txt
@@ -0,0 +1,8 @@
+name message 0
+steven hive 1
+dave oozie 2
+xifa phd 3
+chuan hadoop 4
+shanyu senior 5
+footer1 footer1 0
+footer2 0
\ No newline at end of file
diff --git data/files/header_footer_table_2/2012/01/02/0002.txt data/files/header_footer_table_2/2012/01/02/0002.txt
new file mode 100644
index 0000000..d5db38d
--- /dev/null
+++ data/files/header_footer_table_2/2012/01/02/0002.txt
@@ -0,0 +1,8 @@
+name message 0
+steven2 hive 11
+dave2 oozie 12
+xifa2 phd 13
+chuan2 hadoop 14
+shanyu2 senior 15
+footer1 footer1 0
+footer2 0
\ No newline at end of file
diff --git data/files/header_footer_table_2/2012/01/03/0003.txt data/files/header_footer_table_2/2012/01/03/0003.txt
new file mode 100644
index 0000000..f7a763d
--- /dev/null
+++ data/files/header_footer_table_2/2012/01/03/0003.txt
@@ -0,0 +1,4 @@
+name message 0
+david3 oozie 22
+footer1 footer1 0
+footer2 0
\ No newline at end of file
diff --git itests/qtest/pom.xml itests/qtest/pom.xml
index a453d8a..c8e8c18 100644
--- itests/qtest/pom.xml
+++ itests/qtest/pom.xml
@@ -36,7 +36,7 @@
false
false
- list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q
+ list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,file_with_header_footer.q
cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q
add_part_exist.q,alter1.q,alter2.q,alter4.q,alter5.q,alter_rename_partition.q,alter_rename_partition_authorization.q,archive.q,archive_corrupt.q,archive_multi.q,archive_mr_1806.q,archive_multi_mr_1806.q,authorization_1.q,authorization_2.q,authorization_4.q,authorization_5.q,authorization_6.q,authorization_7.q,ba_table1.q,ba_table2.q,ba_table3.q,ba_table_udfs.q,binary_table_bincolserde.q,binary_table_colserde.q,cluster.q,columnarserde_create_shortcut.q,combine2.q,constant_prop.q,create_nested_type.q,create_or_replace_view.q,create_struct_table.q,create_union_table.q,database.q,database_location.q,database_properties.q,ddltime.q,describe_database_json.q,drop_database_removes_partition_dirs.q,escape1.q,escape2.q,exim_00_nonpart_empty.q,exim_01_nonpart.q,exim_02_00_part_empty.q,exim_02_part.q,exim_03_nonpart_over_compat.q,exim_04_all_part.q,exim_04_evolved_parts.q,exim_05_some_part.q,exim_06_one_part.q,exim_07_all_part_over_nonoverlap.q,exim_08_nonpart_rename.q,exim_09_part_spec_nonoverlap.q,exim_10_external_managed.q,exim_11_managed_external.q,exim_12_external_location.q,exim_13_managed_location.q,exim_14_managed_location_over_existing.q,exim_15_external_part.q,exim_16_part_external.q,exim_17_part_managed.q,exim_18_part_external.q,exim_19_00_part_external_location.q,exim_19_part_external_location.q,exim_20_part_managed_location.q,exim_21_export_authsuccess.q,exim_22_import_exist_authsuccess.q,exim_23_import_part_authsuccess.q,exim_24_import_nonexist_authsuccess.q,global_limit.q,groupby_complex_types.q,groupby_complex_types_multi_single_reducer.q,index_auth.q,index_auto.q,index_auto_empty.q,index_bitmap.q,index_bitmap1.q,index_bitmap2.q,index_bitmap3.q,index_bitmap_auto.q,index_bitmap_rc.q,index_compact.q,index_compact_1.q,index_compact_2.q,index_compact_3.q,index_stale_partitioned.q,init_file.q,input16.q,input16_cc.q,input46.q,input_columnarserde.q,input_dynamicserde.q,input_lazyserde.q,input_testxpath3.q,input_testxpath4.q,insert2_overwrite_partitions.q,insertexternal1.q,join_thrift.q,lateral_view.q,load_binary_data.q,load_exist_part_authsuccess.q,load_nonpart_authsuccess.q,load_part_authsuccess.q,loadpart_err.q,lock1.q,lock2.q,lock3.q,lock4.q,merge_dynamic_partition.q,multi_insert.q,multi_insert_move_tasks_share_dependencies.q,null_column.q,ppd_clusterby.q,query_with_semi.q,rename_column.q,sample6.q,sample_islocalmode_hook.q,set_processor_namespaces.q,show_tables.q,source.q,split_sample.q,str_to_map.q,transform1.q,udaf_collect_set.q,udaf_context_ngrams.q,udaf_histogram_numeric.q,udaf_ngrams.q,udaf_percentile_approx.q,udf_array.q,udf_bitmap_and.q,udf_bitmap_or.q,udf_explode.q,udf_format_number.q,udf_map.q,udf_map_keys.q,udf_map_values.q,udf_max.q,udf_min.q,udf_named_struct.q,udf_percentile.q,udf_printf.q,udf_sentences.q,udf_sort_array.q,udf_split.q,udf_struct.q,udf_substr.q,udf_translate.q,udf_union.q,udf_xpath.q,udtf_stack.q,view.q,virtual_column.q
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
index 5abcfc1..a7354a8 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
@@ -37,6 +37,7 @@
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader;
+import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.KVPair;
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveRecordReader;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -80,6 +81,10 @@
private PartitionDesc currPart;
private TableDesc currTbl;
private boolean tblDataDone;
+ private ArrayList footerbuf = null;
+ private int footercur = 0;
+ private int headerCount = 0;
+ private int footerCount = 0;
private boolean hasVC;
private boolean isPartitioned;
@@ -527,6 +532,7 @@ protected void pushRow(InspectableObject row) throws HiveException {
public InspectableObject getNextRow() throws IOException {
try {
while (true) {
+ boolean ret = true;
if (context != null) {
context.resetRow();
}
@@ -535,9 +541,81 @@ public InspectableObject getNextRow() throws IOException {
if (currRecReader == null) {
return null;
}
+ //start reading a new file
+ headerCount = 0;
+ footerCount = 0;
+ TableDesc table = null;
+ if (currTbl != null) {
+ table = currTbl;
+ } else if (currPart != null) {
+ table = currPart.getTableDesc();
+ }
+ if (table != null) {
+ try {
+ headerCount = Integer.parseInt(table.getProperties()
+ .getProperty("skip.header.number", "0"));
+ } catch (NumberFormatException nfe) {
+ //set header count to 0 if an invalid property is passed
+ headerCount = 0;
+ }
+ try {
+ footerCount = Integer.parseInt(table.getProperties()
+ .getProperty("skip.footer.number", "0"));
+ if (footerCount > HiveConf.getIntVar(job, HiveConf.ConfVars.HIVE_FILE_MAX_FOOTER)) {
+ LOG.info("Footer for table " + table.getTableName() +
+ " exceed max footer length, setting footer rows to " +
+ HiveConf.getIntVar(job, HiveConf.ConfVars.HIVE_FILE_MAX_FOOTER));
+ footerCount = HiveConf.getIntVar(job, HiveConf.ConfVars.HIVE_FILE_MAX_FOOTER);
+ }
+ } catch (NumberFormatException nfe) {
+ //set header count to 0 if an invalid property is passed
+ footerCount = 0;
+ }
+ }
+ //skip header lines
+ while (headerCount > 0) {
+ if (!currRecReader.next(key, value)) {
+ ret = false;
+ break;
+ }
+ headerCount--;
+ }
+ if (ret && footerCount > 0) {
+ //if table format has footer row, initialize the buffer
+ footerbuf = new ArrayList();
+ //fill the buffer
+ while (footerbuf.size() < footerCount) {
+ ret = currRecReader.next(key, value);
+ if (!ret) break;
+ KVPair tem = new KVPair();
+ tem.key = ReflectionUtils.copy(job, key, tem.key);
+ tem.value = ReflectionUtils.copy(job, value, tem.value);
+ footerbuf.add(tem);
+ }
+ footercur = 0;
+ } else {
+ this.footerbuf = null;
+ }
+ }
+ if (ret && footerbuf == null) {
+ //when file doesn't end after skipping header line
+ //and there is no footer lines, read normally
+ ret = currRecReader.next(key, value);
+ }
+ if (ret && footerbuf != null) {
+ //when table files have footer rows
+ key = ReflectionUtils.copy(job, (WritableComparable)footerbuf.get(footercur).getKey(), key);
+ value = ReflectionUtils.copy(job, (Writable)footerbuf.get(footercur).getValue(), value);
+ ret = currRecReader.next((WritableComparable)footerbuf.get(footercur).getKey(),
+ (Writable)footerbuf.get(footercur).getValue());
+ if (ret) {
+ footercur = (++footercur) % footerbuf.size();
+ } else {
+ //file reach the end, need to nullify key/value
+ key = ReflectionUtils.copy(job, (WritableComparable)footerbuf.get(footercur).getKey(), key);
+ value = ReflectionUtils.copy(job, (Writable)footerbuf.get(footercur).getValue(), value);
+ }
}
-
- boolean ret = currRecReader.next(key, value);
if (ret) {
if (operator != null && context != null && context.inputFileChanged()) {
// The child operators cleanup if input file has changed
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
index dd5cb6b..db8de4d 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
@@ -20,24 +20,34 @@
import java.io.IOException;
import java.util.ArrayList;
+import java.util.LinkedList;
import java.util.List;
+import java.util.Map;
+import java.util.Properties;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.IOContext.Comparison;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.util.ReflectionUtils;
/** This class prepares an IOContext, and provides the ability to perform a binary search on the
* data. The binary search can be used by setting the value of inputFormatSorted in the
@@ -217,6 +227,37 @@ public float getProgress() throws IOException {
}
}
+ public static class KVPair {
+ public K key;
+ public V value;
+ public KVPair(K key, V value){
+
+ this.key = key;
+ this.value = value;
+ }
+ public KVPair(){
+ this.key = null;
+ this.value = null;
+ }
+ public K getKey() {
+ return this.key;
+ }
+ public V getValue() {
+ return this.value;
+ }
+ public void setKey(K key) {
+ this.key = key;
+ }
+ public void setValue(V value) {
+ this.value = value;
+ }
+ }
+
+ private ArrayList footerbuf = null;
+ private int footercur = 0;
+ private int headerCount = 0;
+ private int footerCount = 0;
+
public boolean doNext(K key, V value) throws IOException {
if (this.isSorted) {
if (this.getIOContext().shouldEndBinarySearch() ||
@@ -271,7 +312,85 @@ public boolean doNext(K key, V value) throws IOException {
}
try {
- return recordReader.next(key, value);
+ //when start reading new file, check header, footer rows
+ if (this.ioCxtRef.getCurrentBlockStart() == 0) {
+ //check if the table file has header to skip
+ Path filePath = this.ioCxtRef.getInputPath();
+ PartitionDesc part = null;
+ try {
+ Map pathToPartitionInfo = Utilities
+ .getMapWork(jobConf).getPathToPartitionInfo();
+ part = HiveFileFormatUtils
+ .getPartitionDescFromPathRecursively(pathToPartitionInfo,
+ filePath, IOPrepareCache.get().getPartitionDescMap());
+ } catch (Exception e) {
+ LOG.info("Cannot get partition description from " + this.ioCxtRef.getInputPath()
+ + "because " + e.getMessage());
+ part = null;
+ }
+ TableDesc table = (part == null) ? null : part.getTableDesc();
+ if (table != null) {
+ try {
+ headerCount = Integer.parseInt(table.getProperties()
+ .getProperty("skip.header.number", "0"));
+ } catch (NumberFormatException nfe) {
+ //set header count to 0 if an invalid property is passed
+ headerCount = 0;
+ }
+ try {
+ footerCount = Integer.parseInt(table.getProperties()
+ .getProperty("skip.footer.number", "0"));
+ if (footerCount > HiveConf.getIntVar(jobConf, HiveConf.ConfVars.HIVE_FILE_MAX_FOOTER)) {
+ LOG.info("Footer for table " + table.getTableName() +
+ " exceed max footer length, setting footer rows to " +
+ HiveConf.getIntVar(jobConf, HiveConf.ConfVars.HIVE_FILE_MAX_FOOTER));
+ footerCount = HiveConf.getIntVar(jobConf, HiveConf.ConfVars.HIVE_FILE_MAX_FOOTER);
+ }
+ } catch (NumberFormatException nfe) {
+ //set header count to 0 if an invalid property is passed
+ footerCount = 0;
+ }
+ }
+ //if input contains header, skip header
+ while (headerCount > 0) {
+ if (!recordReader.next(key, value))
+ return false;
+ headerCount--;
+ }
+ if (footerCount > 0) {
+ //if table format has footer row, initialize the buffer
+ footerbuf = new ArrayList();
+ //fill the buffer
+ while (footerbuf.size() < footerCount) {
+ boolean ret = recordReader.next(key, value);
+ if (!ret){
+ //reach the end of the file
+ return ret;
+ }
+ KVPair tem = new KVPair();
+ tem.key = ReflectionUtils.copy(jobConf, key, tem.key);
+ tem.value = ReflectionUtils.copy(jobConf, value, tem.value);
+ footerbuf.add(tem);
+ }
+ footercur = 0;
+ }
+ }
+ if (footerbuf == null) {
+ //table files don't have footer rows
+ return recordReader.next(key, value);
+ } else {
+ //table files have footer rows
+ key = (K)ReflectionUtils.copy(jobConf, footerbuf.get(footercur).getKey(), key);
+ value = (V)ReflectionUtils.copy(jobConf, footerbuf.get(footercur).getValue(), value);
+ boolean ret = recordReader.next(footerbuf.get(footercur).getKey(), footerbuf.get(footercur).getValue());
+ if (ret) {
+ footercur = (++footercur) % footerbuf.size();
+ } else {
+ key = (K)ReflectionUtils.copy(jobConf, footerbuf.get(footercur).getKey(), key);
+ value = (V)ReflectionUtils.copy(jobConf, footerbuf.get(footercur).getValue(), value);
+ }
+ return ret;
+ }
} catch (Exception e) {
return HiveIOExceptionHandlerUtil.handleRecordReaderNextException(e, jobConf);
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
index 0ec6e63..4ca454a 100755
--- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
@@ -33,6 +33,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -286,6 +287,29 @@ protected void init(JobConf job) {
FileInputFormat.setInputPaths(newjob, dir);
newjob.setInputFormat(inputFormat.getClass());
+ TableDesc tableDesc = part.getTableDesc();
+ int headerCount = 0;
+ int footerCount = 0;
+ if (tableDesc != null) {
+ try {
+ headerCount = Integer.parseInt(tableDesc.getProperties()
+ .getProperty("skip.header.number", "0"));
+ } catch (NumberFormatException nfe) {
+ //set header count to 0 if an invalid property is passed
+ headerCount = 0;
+ }
+ try {
+ footerCount = Integer.parseInt(tableDesc.getProperties()
+ .getProperty("skip.footer.number", "0"));
+ } catch (NumberFormatException nfe) {
+ //set header count to 0 if an invalid property is passed
+ footerCount = 0;
+ }
+ if (headerCount != 0 || footerCount != 0) {
+ //input file has header or footer, can not be splitted
+ newjob.setLong("mapred.min.split.size", Long.MAX_VALUE);
+ }
+ }
InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length);
for (InputSplit is : iss) {
result.add(new HiveInputSplit(is, inputFormatClass.getName()));
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java
index 85dd975..1047a8d 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java
@@ -24,13 +24,19 @@
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
+import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.util.LinkedHashMap;
import junit.framework.Assert;
import junit.framework.TestCase;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.HiveInputFormat.HiveInputSplit;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
@@ -127,6 +133,15 @@ private void init() throws IOException {
when(rcfReader.getPos()).thenReturn(50L);
conf = new JobConf();
conf.setBoolean("hive.input.format.sorted", true);
+
+ TableDesc tblDesc = Utilities.defaultTd;
+ PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
+ LinkedHashMap pt = new LinkedHashMap();
+ pt.put("/tmp/testfolder", partDesc);
+ MapredWork mrwork = new MapredWork();
+ mrwork.getMapWork().setPathToPartitionInfo(pt);
+ Utilities.setMapRedWork(conf, mrwork,"/tmp/" + System.getProperty("user.name") + "/hive");
+
hiveSplit = new TestHiveInputSplit();
hbsReader = new TestHiveRecordReader(rcfReader, conf);
hbsReader.initIOContext(hiveSplit, conf, Class.class, rcfReader);
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java
index 0686d9b..94061af 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java
@@ -21,6 +21,7 @@
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
+import java.util.LinkedHashMap;
import java.util.List;
import junit.framework.TestCase;
@@ -38,6 +39,9 @@
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.mr.ExecDriver;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@@ -70,6 +74,15 @@
protected void setUp() throws IOException {
conf = new Configuration();
job = new JobConf(conf);
+
+ TableDesc tblDesc = Utilities.defaultTd;
+ PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
+ LinkedHashMap pt = new LinkedHashMap();
+ pt.put("/tmp/testfolder", partDesc);
+ MapredWork mrwork = new MapredWork();
+ mrwork.getMapWork().setPathToPartitionInfo(pt);
+ Utilities.setMapRedWork(job, mrwork,"/tmp/" + System.getProperty("user.name") + "/hive");
+
fileSystem = FileSystem.getLocal(conf);
testDir = new Path(System.getProperty("test.tmp.dir", System.getProperty(
"user.dir", new File(".").getAbsolutePath()))
diff --git ql/src/test/queries/clientpositive/file_with_header_footer.q ql/src/test/queries/clientpositive/file_with_header_footer.q
new file mode 100644
index 0000000..57394a7
--- /dev/null
+++ ql/src/test/queries/clientpositive/file_with_header_footer.q
@@ -0,0 +1,29 @@
+dfs -mkdir hdfs:///tmp/test/;
+
+dfs -copyFromLocal ../data/files/header_footer_table_1 hdfs:///tmp/test/header_footer_table_1;
+
+dfs -copyFromLocal ../data/files/header_footer_table_2 hdfs:///tmp/test/header_footer_table_2;
+
+CREATE EXTERNAL TABLE header_footer_table_1 (name string, message string, id int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION 'hdfs:///tmp/test/header_footer_table_1' tblproperties ("skip.header.number"="1", "skip.footer.number"="2");
+
+SELECT * FROM header_footer_table_1;
+
+SELECT * FROM header_footer_table_1 WHERE id < 50;
+
+CREATE EXTERNAL TABLE header_footer_table_2 (name string, message string, id int) PARTITIONED BY (year int, month int, day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' tblproperties ("skip.header.number"="1", "skip.footer.number"="2");
+
+ALTER TABLE header_footer_table_2 ADD PARTITION (year=2012, month=1, day=1) location 'hdfs:///tmp/test/header_footer_table_2/2012/01/01';
+
+ALTER TABLE header_footer_table_2 ADD PARTITION (year=2012, month=1, day=2) location 'hdfs:///tmp/test/header_footer_table_2/2012/01/02';
+
+ALTER TABLE header_footer_table_2 ADD PARTITION (year=2012, month=1, day=3) location 'hdfs:///tmp/test/header_footer_table_2/2012/01/03';
+
+SELECT * FROM header_footer_table_2;
+
+SELECT * FROM header_footer_table_2 WHERE id < 50;
+
+DROP TABLE header_footer_table_1;
+
+DROP TABLE header_footer_table_2;
+
+dfs -rmr hdfs:///tmp/test;
\ No newline at end of file
diff --git ql/src/test/results/clientpositive/file_with_header_footer.q.out ql/src/test/results/clientpositive/file_with_header_footer.q.out
new file mode 100644
index 0000000..11e5349
--- /dev/null
+++ ql/src/test/results/clientpositive/file_with_header_footer.q.out
@@ -0,0 +1,136 @@
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@header_footer_table_1
+PREHOOK: query: SELECT * FROM header_footer_table_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@header_footer_table_1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM header_footer_table_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@header_footer_table_1
+#### A masked pattern was here ####
+steven hive 1
+dave oozie 2
+xifa phd 3
+chuan hadoop 4
+shanyu senior 5
+steven2 hive 11
+dave2 oozie 12
+xifa2 phd 13
+chuan2 hadoop 14
+shanyu2 senior 15
+david3 oozie 22
+PREHOOK: query: SELECT * FROM header_footer_table_1 WHERE id < 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@header_footer_table_1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM header_footer_table_1 WHERE id < 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@header_footer_table_1
+#### A masked pattern was here ####
+steven hive 1
+dave oozie 2
+xifa phd 3
+chuan hadoop 4
+shanyu senior 5
+steven2 hive 11
+dave2 oozie 12
+xifa2 phd 13
+chuan2 hadoop 14
+shanyu2 senior 15
+david3 oozie 22
+PREHOOK: query: CREATE EXTERNAL TABLE header_footer_table_2 (name string, message string, id int) PARTITIONED BY (year int, month int, day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' tblproperties ("skip.header.number"="1", "skip.footer.number"="2")
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE EXTERNAL TABLE header_footer_table_2 (name string, message string, id int) PARTITIONED BY (year int, month int, day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' tblproperties ("skip.header.number"="1", "skip.footer.number"="2")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@header_footer_table_2
+#### A masked pattern was here ####
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Input: default@header_footer_table_2
+#### A masked pattern was here ####
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Input: default@header_footer_table_2
+POSTHOOK: Output: default@header_footer_table_2@year=2012/month=1/day=1
+#### A masked pattern was here ####
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Input: default@header_footer_table_2
+#### A masked pattern was here ####
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Input: default@header_footer_table_2
+POSTHOOK: Output: default@header_footer_table_2@year=2012/month=1/day=2
+#### A masked pattern was here ####
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Input: default@header_footer_table_2
+#### A masked pattern was here ####
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Input: default@header_footer_table_2
+POSTHOOK: Output: default@header_footer_table_2@year=2012/month=1/day=3
+PREHOOK: query: SELECT * FROM header_footer_table_2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@header_footer_table_2
+PREHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=1
+PREHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=2
+PREHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=3
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM header_footer_table_2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@header_footer_table_2
+POSTHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=1
+POSTHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=2
+POSTHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=3
+#### A masked pattern was here ####
+steven hive 1 2012 1 1
+dave oozie 2 2012 1 1
+xifa phd 3 2012 1 1
+chuan hadoop 4 2012 1 1
+shanyu senior 5 2012 1 1
+steven2 hive 11 2012 1 2
+dave2 oozie 12 2012 1 2
+xifa2 phd 13 2012 1 2
+chuan2 hadoop 14 2012 1 2
+shanyu2 senior 15 2012 1 2
+david3 oozie 22 2012 1 3
+PREHOOK: query: SELECT * FROM header_footer_table_2 WHERE id < 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@header_footer_table_2
+PREHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=1
+PREHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=2
+PREHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=3
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM header_footer_table_2 WHERE id < 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@header_footer_table_2
+POSTHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=1
+POSTHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=2
+POSTHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=3
+#### A masked pattern was here ####
+steven hive 1 2012 1 1
+dave oozie 2 2012 1 1
+xifa phd 3 2012 1 1
+chuan hadoop 4 2012 1 1
+shanyu senior 5 2012 1 1
+steven2 hive 11 2012 1 2
+dave2 oozie 12 2012 1 2
+xifa2 phd 13 2012 1 2
+chuan2 hadoop 14 2012 1 2
+shanyu2 senior 15 2012 1 2
+david3 oozie 22 2012 1 3
+PREHOOK: query: DROP TABLE header_footer_table_1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@header_footer_table_1
+PREHOOK: Output: default@header_footer_table_1
+POSTHOOK: query: DROP TABLE header_footer_table_1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@header_footer_table_1
+POSTHOOK: Output: default@header_footer_table_1
+PREHOOK: query: DROP TABLE header_footer_table_2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@header_footer_table_2
+PREHOOK: Output: default@header_footer_table_2
+POSTHOOK: query: DROP TABLE header_footer_table_2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@header_footer_table_2
+POSTHOOK: Output: default@header_footer_table_2
+#### A masked pattern was here ####