Index: contrib/src/java/org/apache/hadoop/hive/contrib/serde2/RegexSerDe.java =================================================================== --- contrib/src/java/org/apache/hadoop/hive/contrib/serde2/RegexSerDe.java (revision 1145469) +++ contrib/src/java/org/apache/hadoop/hive/contrib/serde2/RegexSerDe.java (working copy) @@ -180,7 +180,11 @@ // Report the row LOG.warn("" + unmatchedRows + " unmatched rows are found: " + rowText); } - return null; + + for (int c = 0; c < numColumns; c++) { + row.set(c, null); + } + return row; } // Otherwise, return the row. Index: contrib/src/test/queries/clientpositive/serde_regex_withwrongregex.q =================================================================== --- contrib/src/test/queries/clientpositive/serde_regex_withwrongregex.q (revision 0) +++ contrib/src/test/queries/clientpositive/serde_regex_withwrongregex.q (revision 0) @@ -0,0 +1,27 @@ +add jar ${system:build.dir}/hive-contrib-${system:hive.version}.jar; + +DROP TABLE sampleregex; + +CREATE TABLE sampleregex( + host STRING, + identity STRING, + user STRING, + time STRING, + request STRING, + status STRING, + size STRING, + referer STRING, + agent STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' +WITH SERDEPROPERTIES ( + "input.regex" = "([^ ]*) ([^ ]*) (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?", + "output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s" +) +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH "../data/files/apache.access.log" INTO TABLE sampleregex; +LOAD DATA LOCAL INPATH "../data/files/apache.access.2.log" INTO TABLE sampleregex; + +SELECT * FROM sampleregex; + +DROP TABLE sampleregex; Index: contrib/src/test/results/clientpositive/serde_regex_withwrongregex.q.out =================================================================== --- contrib/src/test/results/clientpositive/serde_regex_withwrongregex.q.out (revision 0) +++ contrib/src/test/results/clientpositive/serde_regex_withwrongregex.q.out (revision 0) @@ -0,0 +1,69 @@ +PREHOOK: query: DROP TABLE sampleregex +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE sampleregex +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE sampleregex( + host STRING, + identity STRING, + user STRING, + time STRING, + request STRING, + status STRING, + size STRING, + referer STRING, + agent STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' +WITH SERDEPROPERTIES ( + "input.regex" = "([^ ]*) ([^ ]*) (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?", + "output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s" +) +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE sampleregex( + host STRING, + identity STRING, + user STRING, + time STRING, + request STRING, + status STRING, + size STRING, + referer STRING, + agent STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' +WITH SERDEPROPERTIES ( + "input.regex" = "([^ ]*) ([^ ]*) (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?", + "output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s" +) +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@sampleregex +PREHOOK: query: LOAD DATA LOCAL INPATH "../data/files/apache.access.log" INTO TABLE sampleregex +PREHOOK: type: LOAD +PREHOOK: Output: default@sampleregex +POSTHOOK: query: LOAD DATA LOCAL INPATH "../data/files/apache.access.log" INTO TABLE sampleregex +POSTHOOK: type: LOAD +POSTHOOK: Output: default@sampleregex +PREHOOK: query: LOAD DATA LOCAL INPATH "../data/files/apache.access.2.log" INTO TABLE sampleregex +PREHOOK: type: LOAD +PREHOOK: Output: default@sampleregex +POSTHOOK: query: LOAD DATA LOCAL INPATH "../data/files/apache.access.2.log" INTO TABLE sampleregex +POSTHOOK: type: LOAD +POSTHOOK: Output: default@sampleregex +PREHOOK: query: SELECT * FROM sampleregex +PREHOOK: type: QUERY +PREHOOK: Input: default@sampleregex +PREHOOK: Output: file:/tmp/root/hive_2011-07-13_08-52-37_085_1201010411073926742/-mr-10000 +POSTHOOK: query: SELECT * FROM sampleregex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sampleregex +POSTHOOK: Output: file:/tmp/root/hive_2011-07-13_08-52-37_085_1201010411073926742/-mr-10000 +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: DROP TABLE sampleregex +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@sampleregex +PREHOOK: Output: default@sampleregex +POSTHOOK: query: DROP TABLE sampleregex +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@sampleregex +POSTHOOK: Output: default@sampleregex