The writer then calls getNext() on this iterator in a loop
and writes out all the records attached to the iterator.
+
+
+
Once all slave writers are done, the master commits to persist the
+data.
+
+
+writer.commit(info); // on master.
+
+
diff --git a/src/java/org/apache/hcatalog/data/transfer/HCatReader.java b/src/java/org/apache/hcatalog/data/transfer/HCatReader.java
index 84465f9..b00def7 100644
--- a/src/java/org/apache/hcatalog/data/transfer/HCatReader.java
+++ b/src/java/org/apache/hcatalog/data/transfer/HCatReader.java
@@ -28,8 +28,8 @@ import org.apache.hcatalog.data.HCatRecord;
import org.apache.hcatalog.data.transfer.state.StateProvider;
/**
- * This abstract class is internal to HCatalog and abstracts away the notion of
- * underlying system from which reads will be done.
+ * This abstract class facilitates external systems to read from HCatalog.
+ * Use {@link DataTransferFactory} to get an instance of this.
*/
public abstract class HCatReader {
@@ -55,8 +55,8 @@ public abstract class HCatReader {
* This constructor will be invoked by {@link DataTransferFactory} at master
* node. Don't use this constructor. Instead, use {@link DataTransferFactory}
*
- * @param re
- * @param config
+ * @param re A Read Entity
+ * @param config Configuration parameters
*/
protected HCatReader(final ReadEntity re, final Map config) {
this(config);
@@ -65,10 +65,10 @@ public abstract class HCatReader {
/**
* This constructor will be invoked by {@link DataTransferFactory} at slave
- * nodes. Don't use this constructor. Instead, use {@link DataTransferFactory}
+ * nodes. Don't use this constructor directly. Instead, use {@link DataTransferFactory}
*
- * @param config
- * @param sp
+ * @param config Configuration parameters
+ * @param sp The State provider
*/
protected HCatReader(final Configuration config, StateProvider sp) {
@@ -91,6 +91,10 @@ public abstract class HCatReader {
this.conf = conf;
}
+ /**
+ * Returns the configuration parameters used by the reader
+ */
+
public Configuration getConf() {
if (null == conf) {
throw new IllegalStateException(
diff --git a/src/java/org/apache/hcatalog/data/transfer/HCatWriter.java b/src/java/org/apache/hcatalog/data/transfer/HCatWriter.java
index 07c33c3..1b1a549 100644
--- a/src/java/org/apache/hcatalog/data/transfer/HCatWriter.java
+++ b/src/java/org/apache/hcatalog/data/transfer/HCatWriter.java
@@ -28,9 +28,8 @@ import org.apache.hcatalog.data.HCatRecord;
import org.apache.hcatalog.data.transfer.state.StateProvider;
/**
- * This abstraction is internal to HCatalog. This is to facilitate writing to
- * HCatalog from external systems. Don't try to instantiate this directly.
- * Instead, use {@link DataTransferFactory}
+ * This class facilitates writing to HCatalog from external systems.
+ * Use {@link DataTransferFactory} to get an instance of this.
*/
public abstract class HCatWriter {
@@ -92,7 +91,9 @@ public abstract class HCatWriter {
/**
* This constructor will be used at slave nodes.
*
- * @param config
+ * @param config Configuration used to communicate with HCatalog
+ * @param sp A state provider
+ *
*/
protected HCatWriter(final Configuration config, final StateProvider sp) {
this.conf = config;
diff --git a/src/java/org/apache/hcatalog/data/transfer/ReadEntity.java b/src/java/org/apache/hcatalog/data/transfer/ReadEntity.java
index 6787b10..8649355 100644
--- a/src/java/org/apache/hcatalog/data/transfer/ReadEntity.java
+++ b/src/java/org/apache/hcatalog/data/transfer/ReadEntity.java
@@ -20,6 +20,11 @@ package org.apache.hcatalog.data.transfer;
import java.util.Map;
+/**
+ * Represents an HCatalog entity such as a table or partition
+ * from which data can be read.
+ */
+
public class ReadEntity extends EntityBase.Entity {
private String filterString;
@@ -56,31 +61,66 @@ public class ReadEntity extends EntityBase.Entity {
private String filterString;
+ /**
+ * Specifies the region
+ *
+ * @param region Region name
+ * @return A self reference to {@link Builder} instance
+ */
public Builder withRegion(final String region) {
this.region = region;
return this;
}
+ /**
+ * Specifies the database name
+ *
+ * @param dbName Database name
+ * @return A self reference to {@link Builder} instance
+ */
public Builder withDatabase(final String dbName) {
this.dbName = dbName;
return this;
}
+ /**
+ * Specifies the table name
+ *
+ * @param tblName Table name
+ * @return A self reference to {@link Builder} instance
+ */
public Builder withTable(final String tblName) {
this.tableName = tblName;
return this;
}
+ /**
+ * Specifies the partition name
+ *
+ * @param partKVs Partition name
+ * @return A self reference to {@link Builder} instance
+ */
public Builder withPartition(final Map partKVs) {
this.partitionKVs = partKVs;
return this;
}
+ /**
+ * Specifies the filter
+ *
+ * @param filterString The filter
+ * @return A self reference to {@link Builder} instance
+ */
public Builder withFilter(String filterString) {
this.filterString = filterString;
return this;
}
+ /**
+ * Create an instance of {@link ReadEntity}
+ *
+ * @return A {@link ReadEntity} instance
+ */
public ReadEntity build() {
return new ReadEntity(this);
}
diff --git a/src/java/org/apache/hcatalog/data/transfer/ReaderContext.java b/src/java/org/apache/hcatalog/data/transfer/ReaderContext.java
index 68d653a..f653052 100644
--- a/src/java/org/apache/hcatalog/data/transfer/ReaderContext.java
+++ b/src/java/org/apache/hcatalog/data/transfer/ReaderContext.java
@@ -31,10 +31,10 @@ import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hcatalog.mapreduce.HCatSplit;
/**
- * This class will contain information of different {@link InputSplit} obtained
- * at master node and configuration. This class implements
- * {@link Externalizable} so it can be serialized using standard java
- * mechanisms.
+ * This contains information obtained at master node to help prepare slave nodes
+ * for reading. This class implements {@link Externalizable} so it can be
+ * serialized using standard java mechanisms. Master can serialize it to
+ * make it available to slaves.
*/
public class ReaderContext implements Externalizable, Configurable {
diff --git a/src/java/org/apache/hcatalog/data/transfer/WriteEntity.java b/src/java/org/apache/hcatalog/data/transfer/WriteEntity.java
index 3917a18..266d363 100644
--- a/src/java/org/apache/hcatalog/data/transfer/WriteEntity.java
+++ b/src/java/org/apache/hcatalog/data/transfer/WriteEntity.java
@@ -20,6 +20,11 @@ package org.apache.hcatalog.data.transfer;
import java.util.Map;
+/**
+ * Represents an HCatalog entity such as a table or partition
+ * to which data can be written.
+ */
+
public class WriteEntity extends EntityBase.Entity {
/**
@@ -46,26 +51,54 @@ public class WriteEntity extends EntityBase.Entity {
*/
public static class Builder extends EntityBase {
+ /**
+ * Specifies the region
+ *
+ * @param region Region name
+ * @return A self reference to {@link Builder} instance
+ */
public Builder withRegion(final String region) {
this.region = region;
return this;
}
-
+ /**
+ * Specifies the database name
+ *
+ * @param dbName Database name
+ * @return A self reference to {@link Builder} instance
+ */
public Builder withDatabase(final String dbName) {
this.dbName = dbName;
return this;
}
+ /**
+ * Specifies the table name
+ *
+ * @param tblName Table name
+ * @return A self reference to {@link Builder} instance
+ */
public Builder withTable(final String tblName) {
this.tableName = tblName;
return this;
}
+ /**
+ * Specifies the partition name
+ *
+ * @param partKVs Partition name
+ * @return A self reference to {@link Builder} instance
+ */
public Builder withPartition(final Map partKVs) {
this.partitionKVs = partKVs;
return this;
}
+ /**
+ * Create an instance of {@link WriteEntity}
+ *
+ * @return A {@link WriteEntity} instance
+ */
public WriteEntity build() {
return new WriteEntity(this);
}
diff --git a/src/java/org/apache/hcatalog/data/transfer/WriterContext.java b/src/java/org/apache/hcatalog/data/transfer/WriterContext.java
index 002ca07..bcd4fc9 100644
--- a/src/java/org/apache/hcatalog/data/transfer/WriterContext.java
+++ b/src/java/org/apache/hcatalog/data/transfer/WriterContext.java
@@ -28,9 +28,9 @@ import org.apache.hadoop.conf.Configuration;
/**
* This contains information obtained at master node to help prepare slave nodes
- * for writer. This class implements {@link Externalizable} so it can be
- * serialized using standard java mechanisms. Master should serialize it and
- * make it available to slaves to prepare for writes.
+ * for writing. This class implements {@link Externalizable} so it can be
+ * serialized using standard java mechanisms. Master can serialize it to
+ * make it available to slaves.
*/
public class WriterContext implements Externalizable, Configurable {