diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java index 0e4a494..e296b42 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java @@ -93,6 +93,8 @@ public class ImportTsv extends Configured implements Tool { private final byte[][] families; private final byte[][] qualifiers; + private final boolean[] skips; + private final byte separatorByte; private int rowKeyColumnIndex; @@ -126,9 +128,14 @@ public class ImportTsv extends Configured implements Tool { maxColumnCount = columnStrings.size(); families = new byte[maxColumnCount][]; qualifiers = new byte[maxColumnCount][]; + skips = new boolean[maxColumnCount]; for (int i = 0; i < columnStrings.size(); i++) { String str = columnStrings.get(i); + if (str.length() == 0) { + skips[i] = true; + continue; + } if (ROWKEY_COLUMN_SPEC.equals(str)) { rowKeyColumnIndex = i; continue; @@ -167,6 +174,9 @@ public class ImportTsv extends Configured implements Tool { public byte[] getQualifier(int idx) { return qualifiers[idx]; } + public boolean isSkipped(int idx) { + return skips[idx]; + } public ParsedLine parse(byte[] lineBytes, int length) throws BadTsvLineException { @@ -400,6 +410,8 @@ public class ImportTsv extends Configured implements Tool { "You must specify at most one column as timestamp key for each imported record.\n" + "Record with invalid timestamps (blank, non-numeric) will be treated as bad record.\n" + "Note: if you use this option, then '" + TIMESTAMP_CONF_KEY + "' option will be ignored.\n" + + "You can also omit loading columns by providing no value for their column specification.\n" + + "For example, ',HBASE_ROW_KEY,cf1:q1,,cf2:q1,' would omit the first, fourth and last columns of the input data.\n" "\n" + "By default importtsv will load data directly into HBase. To instead generate\n" + "HFiles of data to prepare for a bulk data load, pass the option:\n" + diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java index d503079..2c5c299 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java @@ -142,7 +142,9 @@ public class TextSortReducer extends ts = parsed.getTimestamp(ts); for (int i = 0; i < parsed.getColumnCount(); i++) { - if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex()) { + if (i == parser.getRowKeyColumnIndex() + || i == parser.getTimestampKeyColumnIndex() + || parser.isSkipped(i)) { continue; } KeyValue kv = new KeyValue(lineBytes, parsed.getRowKeyOffset(), diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java index 6360b2e..f1d2a2f 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java @@ -136,7 +136,8 @@ extends Mapper Put put = new Put(rowKey.copyBytes()); for (int i = 0; i < parsed.getColumnCount(); i++) { if (i == parser.getRowKeyColumnIndex() - || i == parser.getTimestampKeyColumnIndex()) { + || i == parser.getTimestampKeyColumnIndex() + || parser.isSkipped(i)) { continue; } KeyValue kv = new KeyValue( diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java index 2d93371..53a034e 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java @@ -219,11 +219,11 @@ public class TestImportTsv implements Configurable { "-D" + ImportTsv.MAPPER_CONF_KEY + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper", "-D" + ImportTsv.COLUMNS_CONF_KEY - + "=HBASE_ROW_KEY,FAM:A,FAM:B", + + "=HBASE_ROW_KEY,FAM:A,FAM:B,", "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,", "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(), table }; - String data = "KEY\u001bVALUE4\u001bVALUE8\n"; + String data = "KEY\u001bVALUE4\u001bVALUE8\u001bVALUEIGNORED\n"; doMROnTableTest(util, FAMILY, data, args, 4); } diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java index edc927b..61c4d49 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java @@ -80,14 +80,19 @@ public class TestImportTsvParser { assertEquals(0, parser.getRowKeyColumnIndex()); assertFalse(parser.hasTimestamp()); - parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,col1:scol2", "\t"); - assertNull(parser.getFamily(0)); - assertNull(parser.getQualifier(0)); - assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); - assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); + parser = new TsvParser(",HBASE_ROW_KEY,col1:scol1,,col1:scol2,", "\t"); + assertTrue(parser.isSkipped(0)); + assertTrue(parser.isSkipped(3)); + assertTrue(parser.isSkipped(5)); + assertNull(parser.getFamily(1)); + assertNull(parser.getQualifier(1)); assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(2)); - assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(2)); - assertEquals(0, parser.getRowKeyColumnIndex()); + assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(2)); + assertFalse(parser.isSkipped(2)); + assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(4)); + assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(4)); + assertFalse(parser.isSkipped(4)); + assertEquals(1, parser.getRowKeyColumnIndex()); assertFalse(parser.hasTimestamp()); parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2", "\t");