diff --git a/src/docs/src/documentation/content/xdocs/readerwriter.xml b/src/docs/src/documentation/content/xdocs/readerwriter.xml index 67c74d7..9f57752 100644 --- a/src/docs/src/documentation/content/xdocs/readerwriter.xml +++ b/src/docs/src/documentation/content/xdocs/readerwriter.xml @@ -144,6 +144,15 @@ writer.write(hCatRecordItr);

The writer then calls getNext() on this iterator in a loop and writes out all the records attached to the iterator.

+ + +

Once all slave writers are done, the master commits to persist the +data.

+ + +writer.commit(info); // on master. + + diff --git a/src/java/org/apache/hcatalog/data/transfer/HCatReader.java b/src/java/org/apache/hcatalog/data/transfer/HCatReader.java index 84465f9..b00def7 100644 --- a/src/java/org/apache/hcatalog/data/transfer/HCatReader.java +++ b/src/java/org/apache/hcatalog/data/transfer/HCatReader.java @@ -28,8 +28,8 @@ import org.apache.hcatalog.data.HCatRecord; import org.apache.hcatalog.data.transfer.state.StateProvider; /** - * This abstract class is internal to HCatalog and abstracts away the notion of - * underlying system from which reads will be done. + * This abstract class facilitates external systems to read from HCatalog. + * Use {@link DataTransferFactory} to get an instance of this. */ public abstract class HCatReader { @@ -55,8 +55,8 @@ public abstract class HCatReader { * This constructor will be invoked by {@link DataTransferFactory} at master * node. Don't use this constructor. Instead, use {@link DataTransferFactory} * - * @param re - * @param config + * @param re A Read Entity + * @param config Configuration parameters */ protected HCatReader(final ReadEntity re, final Map config) { this(config); @@ -65,10 +65,10 @@ public abstract class HCatReader { /** * This constructor will be invoked by {@link DataTransferFactory} at slave - * nodes. Don't use this constructor. Instead, use {@link DataTransferFactory} + * nodes. Don't use this constructor directly. Instead, use {@link DataTransferFactory} * - * @param config - * @param sp + * @param config Configuration parameters + * @param sp The State provider */ protected HCatReader(final Configuration config, StateProvider sp) { @@ -91,6 +91,10 @@ public abstract class HCatReader { this.conf = conf; } + /** + * Returns the configuration parameters used by the reader + */ + public Configuration getConf() { if (null == conf) { throw new IllegalStateException( diff --git a/src/java/org/apache/hcatalog/data/transfer/HCatWriter.java b/src/java/org/apache/hcatalog/data/transfer/HCatWriter.java index 07c33c3..1b1a549 100644 --- a/src/java/org/apache/hcatalog/data/transfer/HCatWriter.java +++ b/src/java/org/apache/hcatalog/data/transfer/HCatWriter.java @@ -28,9 +28,8 @@ import org.apache.hcatalog.data.HCatRecord; import org.apache.hcatalog.data.transfer.state.StateProvider; /** - * This abstraction is internal to HCatalog. This is to facilitate writing to - * HCatalog from external systems. Don't try to instantiate this directly. - * Instead, use {@link DataTransferFactory} + * This class facilitates writing to HCatalog from external systems. + * Use {@link DataTransferFactory} to get an instance of this. */ public abstract class HCatWriter { @@ -92,7 +91,9 @@ public abstract class HCatWriter { /** * This constructor will be used at slave nodes. * - * @param config + * @param config Configuration used to communicate with HCatalog + * @param sp A state provider + * */ protected HCatWriter(final Configuration config, final StateProvider sp) { this.conf = config; diff --git a/src/java/org/apache/hcatalog/data/transfer/ReadEntity.java b/src/java/org/apache/hcatalog/data/transfer/ReadEntity.java index 6787b10..8649355 100644 --- a/src/java/org/apache/hcatalog/data/transfer/ReadEntity.java +++ b/src/java/org/apache/hcatalog/data/transfer/ReadEntity.java @@ -20,6 +20,11 @@ package org.apache.hcatalog.data.transfer; import java.util.Map; +/** + * Represents an HCatalog entity such as a table or partition + * from which data can be read. + */ + public class ReadEntity extends EntityBase.Entity { private String filterString; @@ -56,31 +61,66 @@ public class ReadEntity extends EntityBase.Entity { private String filterString; + /** + * Specifies the region + * + * @param region Region name + * @return A self reference to {@link Builder} instance + */ public Builder withRegion(final String region) { this.region = region; return this; } + /** + * Specifies the database name + * + * @param dbName Database name + * @return A self reference to {@link Builder} instance + */ public Builder withDatabase(final String dbName) { this.dbName = dbName; return this; } + /** + * Specifies the table name + * + * @param tblName Table name + * @return A self reference to {@link Builder} instance + */ public Builder withTable(final String tblName) { this.tableName = tblName; return this; } + /** + * Specifies the partition name + * + * @param partKVs Partition name + * @return A self reference to {@link Builder} instance + */ public Builder withPartition(final Map partKVs) { this.partitionKVs = partKVs; return this; } + /** + * Specifies the filter + * + * @param filterString The filter + * @return A self reference to {@link Builder} instance + */ public Builder withFilter(String filterString) { this.filterString = filterString; return this; } + /** + * Create an instance of {@link ReadEntity} + * + * @return A {@link ReadEntity} instance + */ public ReadEntity build() { return new ReadEntity(this); } diff --git a/src/java/org/apache/hcatalog/data/transfer/ReaderContext.java b/src/java/org/apache/hcatalog/data/transfer/ReaderContext.java index 68d653a..f653052 100644 --- a/src/java/org/apache/hcatalog/data/transfer/ReaderContext.java +++ b/src/java/org/apache/hcatalog/data/transfer/ReaderContext.java @@ -31,10 +31,10 @@ import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hcatalog.mapreduce.HCatSplit; /** - * This class will contain information of different {@link InputSplit} obtained - * at master node and configuration. This class implements - * {@link Externalizable} so it can be serialized using standard java - * mechanisms. + * This contains information obtained at master node to help prepare slave nodes + * for reading. This class implements {@link Externalizable} so it can be + * serialized using standard java mechanisms. Master can serialize it to + * make it available to slaves. */ public class ReaderContext implements Externalizable, Configurable { diff --git a/src/java/org/apache/hcatalog/data/transfer/WriteEntity.java b/src/java/org/apache/hcatalog/data/transfer/WriteEntity.java index 3917a18..266d363 100644 --- a/src/java/org/apache/hcatalog/data/transfer/WriteEntity.java +++ b/src/java/org/apache/hcatalog/data/transfer/WriteEntity.java @@ -20,6 +20,11 @@ package org.apache.hcatalog.data.transfer; import java.util.Map; +/** + * Represents an HCatalog entity such as a table or partition + * to which data can be written. + */ + public class WriteEntity extends EntityBase.Entity { /** @@ -46,26 +51,54 @@ public class WriteEntity extends EntityBase.Entity { */ public static class Builder extends EntityBase { + /** + * Specifies the region + * + * @param region Region name + * @return A self reference to {@link Builder} instance + */ public Builder withRegion(final String region) { this.region = region; return this; } - + /** + * Specifies the database name + * + * @param dbName Database name + * @return A self reference to {@link Builder} instance + */ public Builder withDatabase(final String dbName) { this.dbName = dbName; return this; } + /** + * Specifies the table name + * + * @param tblName Table name + * @return A self reference to {@link Builder} instance + */ public Builder withTable(final String tblName) { this.tableName = tblName; return this; } + /** + * Specifies the partition name + * + * @param partKVs Partition name + * @return A self reference to {@link Builder} instance + */ public Builder withPartition(final Map partKVs) { this.partitionKVs = partKVs; return this; } + /** + * Create an instance of {@link WriteEntity} + * + * @return A {@link WriteEntity} instance + */ public WriteEntity build() { return new WriteEntity(this); } diff --git a/src/java/org/apache/hcatalog/data/transfer/WriterContext.java b/src/java/org/apache/hcatalog/data/transfer/WriterContext.java index 002ca07..bcd4fc9 100644 --- a/src/java/org/apache/hcatalog/data/transfer/WriterContext.java +++ b/src/java/org/apache/hcatalog/data/transfer/WriterContext.java @@ -28,9 +28,9 @@ import org.apache.hadoop.conf.Configuration; /** * This contains information obtained at master node to help prepare slave nodes - * for writer. This class implements {@link Externalizable} so it can be - * serialized using standard java mechanisms. Master should serialize it and - * make it available to slaves to prepare for writes. + * for writing. This class implements {@link Externalizable} so it can be + * serialized using standard java mechanisms. Master can serialize it to + * make it available to slaves. */ public class WriterContext implements Externalizable, Configurable {