Index: ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java (working copy) @@ -25,32 +25,34 @@ import org.apache.hadoop.hive.ql.exec.Task; public class DriverContext { - + Queue> runnable = new LinkedList>(); - - public DriverContext( Queue> runnable) { + + public DriverContext(Queue> runnable) { this.runnable = runnable; } - + public Queue> getRunnable() { - return this.runnable; + return runnable; } - + /** * Checks if a task can be launched * - * @param tsk the task to be checked - * @return true if the task is launchable, false otherwise + * @param tsk + * the task to be checked + * @return true if the task is launchable, false otherwise */ public static boolean isLaunchable(Task tsk) { - // A launchable task is one that hasn't been queued, hasn't been initialized, and is runnable. + // A launchable task is one that hasn't been queued, hasn't been + // initialized, and is runnable. return !tsk.getQueued() && !tsk.getInitialized() && tsk.isRunnable(); } public void addToRunnable(Task tsk) { runnable.add(tsk); tsk.setQueued(); - } + } } Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Sample.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Sample.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Sample.java (working copy) @@ -24,58 +24,72 @@ **/ public class Sample { - protected int sampleNum; - protected int sampleFraction; - protected Dimension sampleDimension; - protected int moduloNum; + protected int sampleNum; + protected int sampleFraction; + protected Dimension sampleDimension; + protected int moduloNum; - @SuppressWarnings("nls") - public Sample(int num, int fraction, Dimension d) throws HiveException { - if((num <= 0) || (num > fraction)) { - throw new HiveException("Bad sample spec: " + num + "/" + fraction); - } - this.sampleNum = num; - this.moduloNum = this.sampleNum-1; - this.sampleFraction = fraction; - this.sampleDimension = d; + @SuppressWarnings("nls") + public Sample(int num, int fraction, Dimension d) throws HiveException { + if ((num <= 0) || (num > fraction)) { + throw new HiveException("Bad sample spec: " + num + "/" + fraction); } + sampleNum = num; + moduloNum = sampleNum - 1; + sampleFraction = fraction; + sampleDimension = d; + } - /** - * Given an arbitrary object, determine if it falls within this sample. - */ - public boolean inSample(Object o) { - return (((this.sampleDimension.hashCode(o) & Integer.MAX_VALUE) % this.sampleFraction) == this.moduloNum); - } + /** + * Given an arbitrary object, determine if it falls within this sample. + */ + public boolean inSample(Object o) { + return (((sampleDimension.hashCode(o) & Integer.MAX_VALUE) % sampleFraction) == moduloNum); + } - @Override - public boolean equals (Object o) { - if (this == o) - return true; - if (o == null) - return false; - if(o instanceof Sample) { - Sample s = (Sample)o; - return ((this.sampleNum == s.sampleNum) && (this.sampleFraction == s.sampleFraction) && - this.sampleDimension.equals(s.sampleDimension)); - } - return (false); + @Override + public boolean equals(Object o) { + if (this == o) { + return true; } - - public int getSampleNum() { return this.sampleNum;} - public int getSampleFraction() { return this.sampleFraction;} - public Dimension getSampleDimension() { return this.sampleDimension;} + if (o == null) { + return false; + } + if (o instanceof Sample) { + Sample s = (Sample) o; + return ((sampleNum == s.sampleNum) + && (sampleFraction == s.sampleFraction) && sampleDimension + .equals(s.sampleDimension)); + } + return (false); + } - @SuppressWarnings("nls") - @Override - public String toString() { return this.sampleNum+"/"+this.sampleFraction+"@("+this.sampleDimension+")";} + public int getSampleNum() { + return sampleNum; + } - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((this.sampleDimension == null) ? 0 : this.sampleDimension.hashCode()); - result = prime * result + this.sampleFraction; - result = prime * result + this.sampleNum; - return result; - } + public int getSampleFraction() { + return sampleFraction; + } + + public Dimension getSampleDimension() { + return sampleDimension; + } + + @SuppressWarnings("nls") + @Override + public String toString() { + return sampleNum + "/" + sampleFraction + "@(" + sampleDimension + ")"; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + + ((sampleDimension == null) ? 0 : sampleDimension.hashCode()); + result = prime * result + sampleFraction; + result = prime * result + sampleNum; + return result; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java (working copy) @@ -13,31 +13,30 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult; -import org.apache.hadoop.hive.conf.HiveConf; - import org.apache.thrift.TException; /** - * Verify that the information in the metastore matches what - * is on the filesystem. Return a CheckResult object - * containing lists of missing and any unexpected tables and partitions. + * Verify that the information in the metastore matches what is on the + * filesystem. Return a CheckResult object containing lists of missing and any + * unexpected tables and partitions. */ public class HiveMetaStoreChecker { public static final Log LOG = LogFactory.getLog(HiveMetaStoreChecker.class); - private Hive hive; - private HiveConf conf; + private final Hive hive; + private final HiveConf conf; public HiveMetaStoreChecker(Hive hive) { super(); this.hive = hive; - this.conf = hive.getConf(); + conf = hive.getConf(); } /** @@ -49,16 +48,19 @@ * @param tableName * Table we want to run the check for. If null we'll check all the * tables in the database. - * @param partitions List of partition name value pairs, - * if null or empty check all partitions - * @param result Fill this with the results of the check - * @throws HiveException Failed to get required information - * from the metastore. - * @throws IOException Most likely filesystem related + * @param partitions + * List of partition name value pairs, if null or empty check all + * partitions + * @param result + * Fill this with the results of the check + * @throws HiveException + * Failed to get required information from the metastore. + * @throws IOException + * Most likely filesystem related */ public void checkMetastore(String dbName, String tableName, - List> partitions, CheckResult result) - throws HiveException, IOException { + List> partitions, CheckResult result) + throws HiveException, IOException { if (dbName == null || "".equalsIgnoreCase(dbName)) { dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; @@ -71,7 +73,7 @@ for (String currentTableName : tables) { checkTable(dbName, currentTableName, null, result); } - + findUnknownTables(dbName, tables, result); } else if (partitions == null || partitions.isEmpty()) { // only one table, let's check all partitions @@ -93,21 +95,26 @@ /** * Check for table directories that aren't in the metastore. - * @param dbName Name of the database - * @param tables List of table names - * @param result Add any found tables to this - * @throws HiveException Failed to get required information - * from the metastore. - * @throws IOException Most likely filesystem related - * @throws MetaException Failed to get required information - * from the metastore. - * @throws NoSuchObjectException Failed to get required information - * from the metastore. - * @throws TException Thrift communication error. + * + * @param dbName + * Name of the database + * @param tables + * List of table names + * @param result + * Add any found tables to this + * @throws HiveException + * Failed to get required information from the metastore. + * @throws IOException + * Most likely filesystem related + * @throws MetaException + * Failed to get required information from the metastore. + * @throws NoSuchObjectException + * Failed to get required information from the metastore. + * @throws TException + * Thrift communication error. */ - void findUnknownTables(String dbName, List tables, - CheckResult result) throws IOException, MetaException, TException, - HiveException { + void findUnknownTables(String dbName, List tables, CheckResult result) + throws IOException, MetaException, TException, HiveException { Set dbPaths = new HashSet(); Set tableNames = new HashSet(tables); @@ -125,10 +132,9 @@ FileSystem fs = dbPath.getFileSystem(conf); FileStatus[] statuses = fs.listStatus(dbPath); for (FileStatus status : statuses) { - - if (status.isDir() - && !tableNames.contains(status.getPath().getName())) { - + + if (status.isDir() && !tableNames.contains(status.getPath().getName())) { + result.getTablesNotInMs().add(status.getPath().getName()); } } @@ -139,16 +145,20 @@ * Check the metastore for inconsistencies, data missing in either the * metastore or on the dfs. * - * @param dbName Name of the database - * @param tableName Name of the table - * @param partitions Partitions to check, if null or empty - * get all the partitions. - * @param result Result object - * @throws HiveException Failed to get required information - * from the metastore. - * @throws IOException Most likely filesystem related - * @throws MetaException Failed to get required information - * from the metastore. + * @param dbName + * Name of the database + * @param tableName + * Name of the table + * @param partitions + * Partitions to check, if null or empty get all the partitions. + * @param result + * Result object + * @throws HiveException + * Failed to get required information from the metastore. + * @throws IOException + * Most likely filesystem related + * @throws MetaException + * Failed to get required information from the metastore. */ void checkTable(String dbName, String tableName, List> partitions, CheckResult result) @@ -165,18 +175,18 @@ List parts = new ArrayList(); boolean findUnknownPartitions = true; - + if (table.isPartitioned()) { if (partitions == null || partitions.isEmpty()) { // no partitions specified, let's get all parts = hive.getPartitions(table); } else { - //we're interested in specific partitions, - //don't check for any others + // we're interested in specific partitions, + // don't check for any others findUnknownPartitions = false; for (Map map : partitions) { Partition part = hive.getPartition(table, map, false); - if(part == null) { + if (part == null) { PartitionResult pr = new PartitionResult(); pr.setTableName(tableName); pr.setPartitionName(Warehouse.makePartName(map)); @@ -195,16 +205,22 @@ * Check the metastore for inconsistencies, data missing in either the * metastore or on the dfs. * - * @param table Table to check - * @param parts Partitions to check - * @param result Result object - * @param findUnknownPartitions Should we try to find unknown partitions? - * @throws IOException Could not get information from filesystem - * @throws HiveException Could not create Partition object + * @param table + * Table to check + * @param parts + * Partitions to check + * @param result + * Result object + * @param findUnknownPartitions + * Should we try to find unknown partitions? + * @throws IOException + * Could not get information from filesystem + * @throws HiveException + * Could not create Partition object */ - void checkTable(Table table, List parts, - boolean findUnknownPartitions, CheckResult result) - throws IOException, HiveException { + void checkTable(Table table, List parts, + boolean findUnknownPartitions, CheckResult result) throws IOException, + HiveException { Path tablePath = table.getPath(); FileSystem fs = tablePath.getFileSystem(conf); @@ -217,8 +233,8 @@ // check that the partition folders exist on disk for (Partition partition : parts) { - if(partition == null) { - //most likely the user specified an invalid partition + if (partition == null) { + // most likely the user specified an invalid partition continue; } Path partPath = partition.getPartitionPath(); @@ -236,23 +252,26 @@ } } - if(findUnknownPartitions) { + if (findUnknownPartitions) { findUnknownPartitions(table, partPaths, result); } } /** - * Find partitions on the fs that are - * unknown to the metastore - * @param table Table where the partitions would be located - * @param partPaths Paths of the partitions the ms knows about - * @param result Result object - * @throws IOException Thrown if we fail at fetching listings from - * the fs. + * Find partitions on the fs that are unknown to the metastore + * + * @param table + * Table where the partitions would be located + * @param partPaths + * Paths of the partitions the ms knows about + * @param result + * Result object + * @throws IOException + * Thrown if we fail at fetching listings from the fs. */ - void findUnknownPartitions(Table table, Set partPaths, + void findUnknownPartitions(Table table, Set partPaths, CheckResult result) throws IOException { - + Path tablePath = table.getPath(); // now check the table folder and see if we find anything // that isn't in the metastore @@ -263,18 +282,18 @@ // remove the partition paths we know about allPartDirs.removeAll(partPaths); - + // we should now only have the unexpected folders left for (Path partPath : allPartDirs) { FileSystem fs = partPath.getFileSystem(conf); - String partitionName = getPartitionName(fs.makeQualified(tablePath), + String partitionName = getPartitionName(fs.makeQualified(tablePath), partPath); - + if (partitionName != null) { PartitionResult pr = new PartitionResult(); pr.setPartitionName(partitionName); pr.setTableName(table.getName()); - + result.getPartitionsNotInMs().add(pr); } } @@ -283,36 +302,39 @@ /** * Get the partition name from the path. * - * @param tablePath Path of the table. - * @param partitionPath Path of the partition. + * @param tablePath + * Path of the table. + * @param partitionPath + * Path of the partition. * @return Partition name, for example partitiondate=2008-01-01 */ private String getPartitionName(Path tablePath, Path partitionPath) { String result = null; Path currPath = partitionPath; while (currPath != null && !tablePath.equals(currPath)) { - if(result == null) { + if (result == null) { result = currPath.getName(); } else { result = currPath.getName() + Path.SEPARATOR + result; } - + currPath = currPath.getParent(); } return result; } /** - * Recursive method to get the leaf directories of a base path. - * Example: - * base/dir1/dir2 - * base/dir3 + * Recursive method to get the leaf directories of a base path. Example: + * base/dir1/dir2 base/dir3 * * This will return dir2 and dir3 but not dir1. * - * @param basePath Start directory - * @param allDirs This set will contain the leaf paths at the end. - * @throws IOException Thrown if we can't get lists from the fs. + * @param basePath + * Start directory + * @param allDirs + * This set will contain the leaf paths at the end. + * @throws IOException + * Thrown if we can't get lists from the fs. */ private void getAllLeafDirs(Path basePath, Set allDirs) @@ -322,7 +344,7 @@ private void getAllLeafDirs(Path basePath, Set allDirs, FileSystem fs) throws IOException { - + FileStatus[] statuses = fs.listStatus(basePath); if (statuses.length == 0) { Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Dimension.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Dimension.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Dimension.java (working copy) @@ -19,48 +19,61 @@ package org.apache.hadoop.hive.ql.metadata; /** - * Hive consists of a fixed, well defined set of Dimensions. - * Each dimension has a type and id. Dimensions link columns in different tables - * + * Hive consists of a fixed, well defined set of Dimensions. Each dimension has + * a type and id. Dimensions link columns in different tables + * */ public class Dimension { - protected Class dimensionType; - protected String dimensionId; + protected Class dimensionType; + protected String dimensionId; - public Dimension (Class t, String id) { - this.dimensionType = t; - this.dimensionId = id; - } + public Dimension(Class t, String id) { + dimensionType = t; + dimensionId = id; + } - public Class getDimensionType() { return this.dimensionType; } - public String getDimensionId() { return this.dimensionId; } + public Class getDimensionType() { + return dimensionType; + } - @Override - public boolean equals(Object o) { - if (super.equals(o)) - return true; - if (o == null) - return false; - if(o instanceof Dimension) { - Dimension d = (Dimension) o; - return (this.dimensionId.equals(d.dimensionId) && (this.dimensionType == d.dimensionType)); - } + public String getDimensionId() { + return dimensionId; + } + + @Override + public boolean equals(Object o) { + if (super.equals(o)) { + return true; + } + if (o == null) { return false; } + if (o instanceof Dimension) { + Dimension d = (Dimension) o; + return (dimensionId.equals(d.dimensionId) && (dimensionType == d.dimensionType)); + } + return false; + } - @Override - @SuppressWarnings("nls") - public String toString() { return "Type="+this.dimensionType.getName()+","+"Id="+this.dimensionId; } + @Override + @SuppressWarnings("nls") + public String toString() { + return "Type=" + dimensionType.getName() + "," + "Id=" + dimensionId; + } - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((this.dimensionId == null) ? 0 : this.dimensionId.hashCode()); - result = prime * result + ((this.dimensionType == null) ? 0 : this.dimensionType.hashCode()); - return result; - } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + + ((dimensionId == null) ? 0 : dimensionId.hashCode()); + result = prime * result + + ((dimensionType == null) ? 0 : dimensionType.hashCode()); + return result; + } - public int hashCode(Object o) { return this.dimensionType.hashCode() ^ this.dimensionId.hashCode(); } + public int hashCode(Object o) { + return dimensionType.hashCode() ^ dimensionId.hashCode(); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/RandomDimension.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/RandomDimension.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/RandomDimension.java (working copy) @@ -21,18 +21,21 @@ import java.util.Random; /** - * A random dimension is an abstract dimension. - * It is implicitly associated with every row in data and has a random value - * + * A random dimension is an abstract dimension. It is implicitly associated with + * every row in data and has a random value + * **/ public class RandomDimension extends Dimension { - Random r; + Random r; - public RandomDimension(Class t, String id) { - super(t, id); - r = new Random(); - } + public RandomDimension(Class t, String id) { + super(t, id); + r = new Random(); + } - public int hashCode(Object o) { return r.nextInt(); } + @Override + public int hashCode(Object o) { + return r.nextInt(); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (working copy) @@ -33,6 +33,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -48,14 +49,13 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.InputFormat; - /** - * A Hive Table: is a fundamental unit of data in Hive that shares a common schema/DDL + * A Hive Table: is a fundamental unit of data in Hive that shares a common + * schema/DDL */ public class Table { @@ -70,36 +70,49 @@ /** * Table (only used internally) + * * @throws HiveException - * + * */ protected Table() throws HiveException { } /** * Table - * - * Create a TableMetaInfo object presumably with the intent of saving it to the metastore - * - * @param name the name of this table in the metadb - * @param schema an object that represents the schema that this SerDe must know - * @param deserializer a Class to be used for deserializing the data - * @param dataLocation where is the table ? (e.g., dfs://hadoop001.sf2p.facebook.com:9000/user/facebook/warehouse/example) NOTE: should not be hardcoding this, but ok for now - * - * @exception HiveException on internal error. Note not possible now, but in the future reserve the right to throw an exception + * + * Create a TableMetaInfo object presumably with the intent of saving it to + * the metastore + * + * @param name + * the name of this table in the metadb + * @param schema + * an object that represents the schema that this SerDe must know + * @param deserializer + * a Class to be used for deserializing the data + * @param dataLocation + * where is the table ? (e.g., + * dfs://hadoop001.sf2p.facebook.com:9000/ + * user/facebook/warehouse/example) NOTE: should not be hardcoding + * this, but ok for now + * + * @exception HiveException + * on internal error. Note not possible now, but in the future + * reserve the right to throw an exception */ public Table(String name, Properties schema, Deserializer deserializer, Class> inputFormatClass, - Class outputFormatClass, - URI dataLocation, Hive hive) throws HiveException { + Class outputFormatClass, URI dataLocation, Hive hive) + throws HiveException { initEmpty(); this.schema = schema; - this.deserializer = deserializer; //TODO: convert to SerDeInfo format - this.getTTable().getSd().getSerdeInfo().setSerializationLib(deserializer.getClass().getName()); + this.deserializer = deserializer; // TODO: convert to SerDeInfo format + getTTable().getSd().getSerdeInfo().setSerializationLib( + deserializer.getClass().getName()); getTTable().setTableName(name); getSerdeInfo().setSerializationLib(deserializer.getClass().getName()); setInputFormatClass(inputFormatClass); - setOutputFormatClass(HiveFileFormatUtils.getOutputFormatSubstitute(outputFormatClass)); + setOutputFormatClass(HiveFileFormatUtils + .getOutputFormatSubstitute(outputFormatClass)); setDataLocation(dataLocation); } @@ -108,9 +121,11 @@ initEmpty(); getTTable().setTableName(name); getTTable().setDbName(MetaStoreUtils.DEFAULT_DATABASE_NAME); - // We have to use MetadataTypedColumnsetSerDe because LazySimpleSerDe does not + // We have to use MetadataTypedColumnsetSerDe because LazySimpleSerDe does + // not // support a table with no columns. - getSerdeInfo().setSerializationLib(MetadataTypedColumnsetSerDe.class.getName()); + getSerdeInfo().setSerializationLib( + MetadataTypedColumnsetSerDe.class.getName()); getSerdeInfo().getParameters().put(Constants.SERIALIZATION_FORMAT, "1"); } @@ -133,7 +148,8 @@ public void reinitSerDe() throws HiveException { try { - deserializer = MetaStoreUtils.getDeserializer(Hive.get().getConf(), this.getTTable()); + deserializer = MetaStoreUtils.getDeserializer(Hive.get().getConf(), + getTTable()); } catch (MetaException e) { throw new HiveException(e); } @@ -142,7 +158,8 @@ protected void initSerDe() throws HiveException { if (deserializer == null) { try { - deserializer = MetaStoreUtils.getDeserializer(Hive.get().getConf(), this.getTTable()); + deserializer = MetaStoreUtils.getDeserializer(Hive.get().getConf(), + getTTable()); } catch (MetaException e) { throw new HiveException(e); } @@ -152,11 +169,13 @@ public void checkValidity() throws HiveException { // check for validity String name = getTTable().getTableName(); - if (null == name || name.length() == 0 || !MetaStoreUtils.validateName(name)) { + if (null == name || name.length() == 0 + || !MetaStoreUtils.validateName(name)) { throw new HiveException("[" + name + "]: is not a valid table name"); } if (0 == getCols().size()) { - throw new HiveException("at least one column must be specified for the table"); + throw new HiveException( + "at least one column must be specified for the table"); } if (!isView()) { if (null == getDeserializer()) { @@ -177,20 +196,23 @@ Iterator iter = colNames.iterator(); while (iter.hasNext()) { String oldColName = iter.next(); - if (colName.equalsIgnoreCase(oldColName)) - throw new HiveException("Duplicate column name " + colName + " in the table definition."); + if (colName.equalsIgnoreCase(oldColName)) { + throw new HiveException("Duplicate column name " + colName + + " in the table definition."); + } } colNames.add(colName.toLowerCase()); } - if (getPartCols() != null) - { + if (getPartCols() != null) { // there is no overlap between columns and partitioning columns Iterator partColsIter = getPartCols().iterator(); while (partColsIter.hasNext()) { String partCol = partColsIter.next().getName(); - if(colNames.contains(partCol.toLowerCase())) - throw new HiveException("Partition column name " + partCol + " conflicts with table columns."); + if (colNames.contains(partCol.toLowerCase())) { + throw new HiveException("Partition column name " + partCol + + " conflicts with table columns."); + } } } return; @@ -208,11 +230,11 @@ * @param class1 */ public void setOutputFormatClass(Class class1) { - this.outputFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(class1); + outputFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(class1); tTable.getSd().setOutputFormat(class1.getName()); } - final public Properties getSchema() { + final public Properties getSchema() { return schema; } @@ -229,7 +251,7 @@ } final public Deserializer getDeserializer() { - if(deserializer == null) { + if (deserializer == null) { try { initSerDe(); } catch (HiveException e) { @@ -247,24 +269,30 @@ return outputFormatClass; } - final public boolean isValidSpec(Map spec) throws HiveException { + final public boolean isValidSpec(Map spec) + throws HiveException { // TODO - types need to be checked. List partCols = getTTable().getPartitionKeys(); - if(partCols== null || (partCols.size() == 0)) { - if (spec != null) - throw new HiveException("table is not partitioned but partition spec exists: " + spec); - else + if (partCols == null || (partCols.size() == 0)) { + if (spec != null) { + throw new HiveException( + "table is not partitioned but partition spec exists: " + spec); + } else { return true; + } } - if((spec == null) || (spec.size() != partCols.size())) { - throw new HiveException("table is partitioned but partition spec is not specified or tab: " + spec); + if ((spec == null) || (spec.size() != partCols.size())) { + throw new HiveException( + "table is partitioned but partition spec is not specified or tab: " + + spec); } for (FieldSchema field : partCols) { - if(spec.get(field.getName()) == null) { - throw new HiveException(field.getName() + " not found in table's partition spec: " + spec); + if (spec.get(field.getName()) == null) { + throw new HiveException(field.getName() + + " not found in table's partition spec: " + spec); } } @@ -277,7 +305,7 @@ /** * getProperty - * + * */ public String getProperty(String name) { return getTTable().getParameters().get(name); @@ -285,14 +313,16 @@ public Vector getFields() { - Vector fields = new Vector (); + Vector fields = new Vector(); try { Deserializer decoder = getDeserializer(); // Expand out all the columns of the table - StructObjectInspector structObjectInspector = (StructObjectInspector)decoder.getObjectInspector(); - List fld_lst = structObjectInspector.getAllStructFieldRefs(); - for(StructField field: fld_lst) { + StructObjectInspector structObjectInspector = (StructObjectInspector) decoder + .getObjectInspector(); + List fld_lst = structObjectInspector + .getAllStructFieldRefs(); + for (StructField field : fld_lst) { fields.add(field); } } catch (SerDeException e) { @@ -303,35 +333,38 @@ public StructField getField(String fld) { try { - StructObjectInspector structObjectInspector = (StructObjectInspector)getDeserializer().getObjectInspector(); + StructObjectInspector structObjectInspector = (StructObjectInspector) getDeserializer() + .getObjectInspector(); return structObjectInspector.getStructFieldRef(fld); - } - catch (Exception e) { + } catch (Exception e) { throw new RuntimeException(e); } } /** - * @param schema the schema to set + * @param schema + * the schema to set */ public void setSchema(Properties schema) { this.schema = schema; } /** - * @param deserializer the deserializer to set + * @param deserializer + * the deserializer to set */ public void setDeserializer(Deserializer deserializer) { this.deserializer = deserializer; } + @Override public String toString() { return getTTable().getTableName(); } public List getPartCols() { List partKeys = getTTable().getPartitionKeys(); - if(partKeys == null) { + if (partKeys == null) { partKeys = new ArrayList(); getTTable().setPartitionKeys(partKeys); } @@ -340,22 +373,23 @@ public boolean isPartitionKey(String colName) { for (FieldSchema key : getPartCols()) { - if(key.getName().toLowerCase().equals(colName)) { + if (key.getName().toLowerCase().equals(colName)) { return true; } } return false; } - //TODO merge this with getBucketCols function + // TODO merge this with getBucketCols function public String getBucketingDimensionId() { List bcols = getTTable().getSd().getBucketCols(); - if(bcols == null || bcols.size() == 0) { + if (bcols == null || bcols.size() == 0) { return null; } - if(bcols.size() > 1) { - LOG.warn(this + " table has more than one dimensions which aren't supported yet"); + if (bcols.size() > 1) { + LOG.warn(this + + " table has more than one dimensions which aren't supported yet"); } return bcols.get(0); @@ -369,7 +403,8 @@ } /** - * @param table the tTable to set + * @param table + * the tTable to set */ protected void setTTable(org.apache.hadoop.hive.metastore.api.Table table) { tTable = table; @@ -386,8 +421,10 @@ } for (String col : bucketCols) { - if(!isField(col)) - throw new HiveException("Bucket columns " + col + " is not part of the table columns" ); + if (!isField(col)) { + throw new HiveException("Bucket columns " + col + + " is not part of the table columns"); + } } getTTable().getSd().setBucketCols(bucketCols); } @@ -398,7 +435,7 @@ private boolean isField(String col) { for (FieldSchema field : getCols()) { - if(field.getName().equals(col)) { + if (field.getName().equals(col)) { return true; } } @@ -407,29 +444,33 @@ public List getCols() { boolean isNative = SerDeUtils.isNativeSerDe(getSerializationLib()); - if (isNative) + if (isNative) { return getTTable().getSd().getCols(); - else { + } else { try { return Hive.getFieldsFromDeserializer(getName(), getDeserializer()); } catch (HiveException e) { - LOG.error("Unable to get field from serde: " + getSerializationLib(), e); + LOG + .error("Unable to get field from serde: " + getSerializationLib(), + e); } return new ArrayList(); } } /** - * Returns a list of all the columns of the table (data columns + partition columns in that order. - * + * Returns a list of all the columns of the table (data columns + partition + * columns in that order. + * * @return List */ public List getAllCols() { - ArrayList f_list = new ArrayList(); - f_list.addAll(getPartCols()); - f_list.addAll(getCols()); - return f_list; + ArrayList f_list = new ArrayList(); + f_list.addAll(getPartCols()); + f_list.addAll(getCols()); + return f_list; } + public void setPartCols(List partCols) { getTTable().setPartitionKeys(partCols); } @@ -443,9 +484,13 @@ } /** - * Replaces files in the partition with new data set specified by srcf. Works by moving files - * @param srcf Files to be replaced. Leaf directories or globbed file paths - * @param tmpd Temporary directory + * Replaces files in the partition with new data set specified by srcf. Works + * by moving files + * + * @param srcf + * Files to be replaced. Leaf directories or globbed file paths + * @param tmpd + * Temporary directory */ protected void replaceFiles(Path srcf, Path tmpd) throws HiveException { FileSystem fs; @@ -459,7 +504,9 @@ /** * Inserts files specified into the partition. Works by moving files - * @param srcf Files to be moved. Leaf directories or globbed file paths + * + * @param srcf + * Files to be moved. Leaf directories or globbed file paths */ protected void copyFiles(Path srcf) throws HiveException { FileSystem fs; @@ -473,8 +520,8 @@ public void setInputFormatClass(String name) throws HiveException { try { - setInputFormatClass((Class>) - Class.forName(name, true, JavaUtils.getClassLoader())); + setInputFormatClass((Class>) Class + .forName(name, true, JavaUtils.getClassLoader())); } catch (ClassNotFoundException e) { throw new HiveException("Class not found: " + name, e); } @@ -483,15 +530,15 @@ public void setOutputFormatClass(String name) throws HiveException { try { Class origin = Class.forName(name, true, JavaUtils.getClassLoader()); - setOutputFormatClass(HiveFileFormatUtils.getOutputFormatSubstitute(origin)); + setOutputFormatClass(HiveFileFormatUtils + .getOutputFormatSubstitute(origin)); } catch (ClassNotFoundException e) { throw new HiveException("Class not found: " + name, e); } } - public boolean isPartitioned() { - if(getPartCols() == null) { + if (getPartCols() == null) { return false; } return (getPartCols().size() != 0); @@ -581,7 +628,8 @@ } /** - * @param viewOriginalText the original view text to set + * @param viewOriginalText + * the original view text to set */ public void setViewOriginalText(String viewOriginalText) { getTTable().setViewOriginalText(viewOriginalText); @@ -595,7 +643,8 @@ } /** - * @param viewExpandedText the expanded view text to set + * @param viewExpandedText + * the expanded view text to set */ public void setViewExpandedText(String viewExpandedText) { getTTable().setViewExpandedText(viewExpandedText); @@ -609,13 +658,15 @@ // be set, or neither boolean hasExpandedText = (getViewExpandedText() != null); boolean hasOriginalText = (getViewOriginalText() != null); - assert(hasExpandedText == hasOriginalText); + assert (hasExpandedText == hasOriginalText); return hasExpandedText; } /** * Creates a partition name -> value spec map object - * @param tp Use the information from this partition. + * + * @param tp + * Use the information from this partition. * @return Partition name to value mapping. */ public LinkedHashMap createSpec( @@ -635,13 +686,13 @@ public Table copy() throws HiveException { Table newTbl = new Table(); - newTbl.schema = this.schema; - newTbl.deserializer = this.deserializer; //TODO: convert to SerDeInfo format + newTbl.schema = schema; + newTbl.deserializer = deserializer; // TODO: convert to SerDeInfo format newTbl.setTTable(getTTable().clone()); - newTbl.uri = this.uri; - newTbl.inputFormatClass = this.inputFormatClass; - newTbl.outputFormatClass = this.outputFormatClass; + newTbl.uri = uri; + newTbl.inputFormatClass = inputFormatClass; + newTbl.outputFormatClass = outputFormatClass; return newTbl; } }; Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (working copy) @@ -43,10 +43,7 @@ import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.serde2.Deserializer; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.InputFormat; - import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.transport.TMemoryBuffer; @@ -57,16 +54,17 @@ public class Partition { @SuppressWarnings("nls") - static final private Log LOG = LogFactory.getLog("hive.ql.metadata.Partition"); + static final private Log LOG = LogFactory + .getLog("hive.ql.metadata.Partition"); private Table table; private org.apache.hadoop.hive.metastore.api.Partition tPartition; - + private Deserializer deserializer; private Properties schema; private Class inputFormatClass; private Class outputFormatClass; - + /** * @return the tPartition */ @@ -87,38 +85,44 @@ private Path partPath; private URI partURI; - public Partition(Table tbl, org.apache.hadoop.hive.metastore.api.Partition tp) throws HiveException { + public Partition(Table tbl, org.apache.hadoop.hive.metastore.api.Partition tp) + throws HiveException { initialize(tbl, tp); } /** * Create partition object with the given info. - * @param tbl Table the partition will be in. - * @param partSpec Partition specifications. - * @param location Location of the partition, relative to the table. - * @throws HiveException Thrown if we could not create the partition. + * + * @param tbl + * Table the partition will be in. + * @param partSpec + * Partition specifications. + * @param location + * Location of the partition, relative to the table. + * @throws HiveException + * Thrown if we could not create the partition. */ - public Partition(Table tbl, Map partSpec, - Path location) throws HiveException { + public Partition(Table tbl, Map partSpec, Path location) + throws HiveException { List pvals = new ArrayList(); for (FieldSchema field : tbl.getPartCols()) { String val = partSpec.get(field.getName()); if (val == null) { - throw new HiveException("partition spec is invalid. field.getName() does not exist in input."); + throw new HiveException( + "partition spec is invalid. field.getName() does not exist in input."); } pvals.add(val); } - org.apache.hadoop.hive.metastore.api.Partition tpart = - new org.apache.hadoop.hive.metastore.api.Partition(); + org.apache.hadoop.hive.metastore.api.Partition tpart = new org.apache.hadoop.hive.metastore.api.Partition(); tpart.setDbName(tbl.getDbName()); tpart.setTableName(tbl.getName()); tpart.setValues(pvals); StorageDescriptor sd = new StorageDescriptor(); try { - //replace with THRIFT-138 + // replace with THRIFT-138 TMemoryBuffer buffer = new TMemoryBuffer(1024); TBinaryProtocol prot = new TBinaryProtocol(buffer); tbl.getTTable().getSd().write(prot); @@ -141,22 +145,24 @@ /** * Initializes this object with the given variables - * @param tbl Table the partition belongs to - * @param tp Thrift Partition object - * @throws HiveException Thrown if we cannot initialize the partition + * + * @param tbl + * Table the partition belongs to + * @param tp + * Thrift Partition object + * @throws HiveException + * Thrown if we cannot initialize the partition */ private void initialize(Table tbl, - org.apache.hadoop.hive.metastore.api.Partition tp) - throws HiveException { + org.apache.hadoop.hive.metastore.api.Partition tp) throws HiveException { table = tbl; tPartition = tp; partName = ""; - if(tbl.isPartitioned()) { + if (tbl.isPartitioned()) { try { - partName = Warehouse.makePartName(tbl.getPartCols(), - tp.getValues()); + partName = Warehouse.makePartName(tbl.getPartCols(), tp.getValues()); if (tp.getSd().getLocation() == null) { // set default if location is not set partPath = new Path(tbl.getDataLocation().toString(), partName); @@ -187,10 +193,10 @@ return table; } - public Path [] getPath() { - Path [] ret = new Path [1]; + public Path[] getPath() { + Path[] ret = new Path[1]; ret[0] = partPath; - return(ret); + return (ret); } public Path getPartitionPath() { @@ -200,9 +206,9 @@ final public URI getDataLocation() { return partURI; } - + final public Deserializer getDeserializer() { - if(deserializer == null) { + if (deserializer == null) { try { initSerDe(); } catch (HiveException e) { @@ -211,34 +217,38 @@ } return deserializer; } - + /** - * @param schema the schema to set + * @param schema + * the schema to set */ public void setSchema(Properties schema) { this.schema = schema; } - + public Properties getSchema() { - if(this.schema == null) - this.schema = MetaStoreUtils.getSchema(this.getTPartition(), this.getTable().getTTable()); - return this.schema; + if (schema == null) { + schema = MetaStoreUtils + .getSchema(getTPartition(), getTable().getTTable()); + } + return schema; } - + protected void initSerDe() throws HiveException { if (deserializer == null) { try { - deserializer = MetaStoreUtils.getDeserializer(Hive.get().getConf(), this.getTPartition(), this.getTable().getTTable()); + deserializer = MetaStoreUtils.getDeserializer(Hive.get().getConf(), + getTPartition(), getTable().getTTable()); } catch (MetaException e) { throw new HiveException(e); } } } - + /** * @param inputFormatClass */ - public void setInputFormatClass(Class inputFormatClass) { + public void setInputFormatClass(Class inputFormatClass) { this.inputFormatClass = inputFormatClass; tPartition.getSd().setInputFormat(inputFormatClass.getName()); } @@ -247,78 +257,78 @@ * @param class1 */ public void setOutputFormatClass(Class class1) { - this.outputFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(class1); + outputFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(class1); tPartition.getSd().setOutputFormat(class1.getName()); } - final public Class getInputFormatClass() throws HiveException{ - if(inputFormatClass == null) { - String clsName = getSchema().getProperty(org.apache.hadoop.hive.metastore.api.Constants.FILE_INPUT_FORMAT, + final public Class getInputFormatClass() + throws HiveException { + if (inputFormatClass == null) { + String clsName = getSchema().getProperty( + org.apache.hadoop.hive.metastore.api.Constants.FILE_INPUT_FORMAT, org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName()); - try{ - setInputFormatClass((Class)Class.forName(clsName, true, JavaUtils.getClassLoader())); - } catch (ClassNotFoundException e) { + try { + setInputFormatClass((Class) Class.forName( + clsName, true, JavaUtils.getClassLoader())); + } catch (ClassNotFoundException e) { throw new HiveException("Class not found: " + clsName, e); } - } - + } + return inputFormatClass; } - final public Class getOutputFormatClass() throws HiveException { - if (outputFormatClass == null) { - String clsName = getSchema().getProperty(org.apache.hadoop.hive.metastore.api.Constants.FILE_OUTPUT_FORMAT, - HiveSequenceFileOutputFormat.class.getName()); - try{ - setOutputFormatClass(Class.forName(clsName, true, JavaUtils.getClassLoader())); - } catch (ClassNotFoundException e) { + final public Class getOutputFormatClass() + throws HiveException { + if (outputFormatClass == null) { + String clsName = getSchema().getProperty( + org.apache.hadoop.hive.metastore.api.Constants.FILE_OUTPUT_FORMAT, + HiveSequenceFileOutputFormat.class.getName()); + try { + setOutputFormatClass(Class.forName(clsName, true, JavaUtils + .getClassLoader())); + } catch (ClassNotFoundException e) { throw new HiveException("Class not found: " + clsName, e); } - } + } return outputFormatClass; } - + /** - * The number of buckets is a property of the partition. However - internally we are just - * storing it as a property of the table as a short term measure. + * The number of buckets is a property of the partition. However - internally + * we are just storing it as a property of the table as a short term measure. */ public int getBucketCount() { return table.getNumBuckets(); /* - TODO: Keeping this code around for later use when we will support - sampling on tables which are not created with CLUSTERED INTO clause - - // read from table meta data - int numBuckets = this.table.getNumBuckets(); - if (numBuckets == -1) { - // table meta data does not have bucket information - // check if file system has multiple buckets(files) in this partition - String pathPattern = this.partPath.toString() + "/*"; - try { - FileSystem fs = FileSystem.get(this.table.getDataLocation(), Hive.get().getConf()); - FileStatus srcs[] = fs.globStatus(new Path(pathPattern)); - numBuckets = srcs.length; - } - catch (Exception e) { - throw new RuntimeException("Cannot get bucket count for table " + this.table.getName(), e); - } - } - return numBuckets; + * TODO: Keeping this code around for later use when we will support + * sampling on tables which are not created with CLUSTERED INTO clause + * + * // read from table meta data int numBuckets = this.table.getNumBuckets(); + * if (numBuckets == -1) { // table meta data does not have bucket + * information // check if file system has multiple buckets(files) in this + * partition String pathPattern = this.partPath.toString() + "/*"; try { + * FileSystem fs = FileSystem.get(this.table.getDataLocation(), + * Hive.get().getConf()); FileStatus srcs[] = fs.globStatus(new + * Path(pathPattern)); numBuckets = srcs.length; } catch (Exception e) { + * throw new RuntimeException("Cannot get bucket count for table " + + * this.table.getName(), e); } } return numBuckets; */ } public List getBucketCols() { - return this.tPartition.getSd().getBucketCols(); + return tPartition.getSd().getBucketCols(); } /** * mapping from bucket number to bucket path */ - //TODO: add test case and clean it up + // TODO: add test case and clean it up @SuppressWarnings("nls") public Path getBucketPath(int bucketNum) { try { - FileSystem fs = FileSystem.get(table.getDataLocation(), Hive.get().getConf()); + FileSystem fs = FileSystem.get(table.getDataLocation(), Hive.get() + .getConf()); String pathPattern = partPath.toString(); if (getBucketCount() > 0) { pathPattern = pathPattern + "/*"; @@ -326,79 +336,84 @@ LOG.info("Path pattern = " + pathPattern); FileStatus srcs[] = fs.globStatus(new Path(pathPattern)); Arrays.sort(srcs); - for (FileStatus src: srcs) { + for (FileStatus src : srcs) { LOG.info("Got file: " + src.getPath()); } - if(srcs.length == 0) + if (srcs.length == 0) { return null; + } return srcs[bucketNum].getPath(); + } catch (Exception e) { + throw new RuntimeException("Cannot get bucket path for bucket " + + bucketNum, e); } - catch (Exception e) { - throw new RuntimeException("Cannot get bucket path for bucket " + bucketNum, e); - } } /** * mapping from a Path to the bucket number if any */ - private static Pattern bpattern = Pattern.compile("part-([0-9][0-9][0-9][0-9][0-9])"); + private static Pattern bpattern = Pattern + .compile("part-([0-9][0-9][0-9][0-9][0-9])"); private String partName; + @SuppressWarnings("nls") public static int getBucketNum(Path p) { Matcher m = bpattern.matcher(p.getName()); - if(m.find()) { + if (m.find()) { String bnum_str = m.group(1); try { return (Integer.parseInt(bnum_str)); } catch (NumberFormatException e) { - throw new RuntimeException("Unexpected error parsing: "+p.getName()+","+bnum_str); + throw new RuntimeException("Unexpected error parsing: " + p.getName() + + "," + bnum_str); } } return 0; } - @SuppressWarnings("nls") - public Path [] getPath(Sample s) throws HiveException { - if(s == null) { + public Path[] getPath(Sample s) throws HiveException { + if (s == null) { return getPath(); } else { int bcount = getBucketCount(); - if(bcount == 0) { + if (bcount == 0) { return getPath(); } Dimension d = s.getSampleDimension(); - if(!d.getDimensionId().equals(table.getBucketingDimensionId())) { + if (!d.getDimensionId().equals(table.getBucketingDimensionId())) { // if the bucket dimension is not the same as the sampling dimension // we must scan all the data return getPath(); } int scount = s.getSampleFraction(); - ArrayList ret = new ArrayList (); + ArrayList ret = new ArrayList(); - if(bcount == scount) { - ret.add(getBucketPath(s.getSampleNum()-1)); + if (bcount == scount) { + ret.add(getBucketPath(s.getSampleNum() - 1)); } else if (bcount < scount) { - if((scount/bcount)*bcount != scount) { - throw new HiveException("Sample Count"+scount+" is not a multiple of bucket count " + - bcount + " for table " + table.getName()); + if ((scount / bcount) * bcount != scount) { + throw new HiveException("Sample Count" + scount + + " is not a multiple of bucket count " + bcount + " for table " + + table.getName()); } // undersampling a bucket - ret.add(getBucketPath((s.getSampleNum()-1)%bcount)); + ret.add(getBucketPath((s.getSampleNum() - 1) % bcount)); } else if (bcount > scount) { - if((bcount/scount)*scount != bcount) { - throw new HiveException("Sample Count"+scount+" is not a divisor of bucket count " + - bcount + " for table " + table.getName()); + if ((bcount / scount) * scount != bcount) { + throw new HiveException("Sample Count" + scount + + " is not a divisor of bucket count " + bcount + " for table " + + table.getName()); } // sampling multiple buckets - for(int i=0; i params = getTPartition().getParameters(); - if (params == null) + Map params = getTPartition().getParameters(); + if (params == null) { return null; + } return params.get(name); } Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveException.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveException.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveException.java (working copy) @@ -18,7 +18,7 @@ package org.apache.hadoop.hive.ql.metadata; -/** +/** * Generic exception class for Hive */ @@ -26,7 +26,7 @@ public HiveException() { super(); } - + public HiveException(String message) { super(message); } @@ -39,4 +39,3 @@ super(message, cause); } } - Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (working copy) @@ -31,8 +31,8 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.metastore.HiveMetaException; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; @@ -44,7 +44,6 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; @@ -53,14 +52,13 @@ import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.util.StringUtils; - import org.apache.thrift.TException; /** - * The Hive class contains information about this instance of Hive. - * An instance of Hive represents a set of data in a file system (usually HDFS) - * organized for easy query processing - * + * The Hive class contains information about this instance of Hive. An instance + * of Hive represents a set of data in a file system (usually HDFS) organized + * for easy query processing + * */ public class Hive { @@ -71,34 +69,39 @@ private IMetaStoreClient metaStoreClient; private static ThreadLocal hiveDB = new ThreadLocal() { + @Override protected synchronized Object initialValue() { - return null; + return null; } + @Override public synchronized void remove() { - if( this.get() != null ) { - ((Hive)this.get()).close(); + if (this.get() != null) { + ((Hive) this.get()).close(); } super.remove(); } }; /** - * Gets hive object for the current thread. If one is not initialized then a new one is created - * If the new configuration is different in metadata conf vars then a new one is created. - * @param c new Hive Configuration + * Gets hive object for the current thread. If one is not initialized then a + * new one is created If the new configuration is different in metadata conf + * vars then a new one is created. + * + * @param c + * new Hive Configuration * @return Hive object for current thread * @throws HiveException - * + * */ public static Hive get(HiveConf c) throws HiveException { boolean needsRefresh = false; Hive db = hiveDB.get(); - if(db != null) { - for(HiveConf.ConfVars oneVar: HiveConf.metaVars) { + if (db != null) { + for (HiveConf.ConfVars oneVar : HiveConf.metaVars) { String oldVar = db.getConf().getVar(oneVar); String newVar = c.getVar(oneVar); - if(oldVar.compareToIgnoreCase(newVar) != 0) { + if (oldVar.compareToIgnoreCase(newVar) != 0) { needsRefresh = true; break; } @@ -109,16 +112,19 @@ /** * get a connection to metastore. see get(HiveConf) function for comments - * @param c new conf - * @param needsRefresh if true then creates a new one + * + * @param c + * new conf + * @param needsRefresh + * if true then creates a new one * @return The connection to the metastore * @throws HiveException */ public static Hive get(HiveConf c, boolean needsRefresh) throws HiveException { Hive db = hiveDB.get(); - if(db == null || needsRefresh) { + if (db == null || needsRefresh) { closeCurrent(); - c.set("fs.scheme.class","dfs"); + c.set("fs.scheme.class", "dfs"); db = new Hive(c); hiveDB.set(db); } @@ -127,7 +133,7 @@ public static Hive get() throws HiveException { Hive db = hiveDB.get(); - if(db == null) { + if (db == null) { db = new Hive(new HiveConf(Hive.class)); hiveDB.set(db); } @@ -140,13 +146,13 @@ /** * Hive - * + * * @param argFsRoot * @param c - * + * */ - private Hive(HiveConf c) throws HiveException { - this.conf = c; + private Hive(HiveConf c) throws HiveException { + conf = c; } /** @@ -159,31 +165,53 @@ /** * Creates a table metdata and the directory for the table data - * @param tableName name of the table - * @param columns list of fields of the table - * @param partCols partition keys of the table - * @param fileInputFormat Class of the input format of the table data file - * @param fileOutputFormat Class of the output format of the table data file - * @throws HiveException thrown if the args are invalid or if the metadata or the data directory couldn't be created + * + * @param tableName + * name of the table + * @param columns + * list of fields of the table + * @param partCols + * partition keys of the table + * @param fileInputFormat + * Class of the input format of the table data file + * @param fileOutputFormat + * Class of the output format of the table data file + * @throws HiveException + * thrown if the args are invalid or if the metadata or the data + * directory couldn't be created */ - public void createTable(String tableName, List columns, List partCols, - Class fileInputFormat, Class fileOutputFormat) throws HiveException { - this.createTable(tableName, columns, partCols, fileInputFormat, fileOutputFormat, -1, null); + public void createTable(String tableName, List columns, + List partCols, Class fileInputFormat, + Class fileOutputFormat) throws HiveException { + this.createTable(tableName, columns, partCols, fileInputFormat, + fileOutputFormat, -1, null); } /** * Creates a table metdata and the directory for the table data - * @param tableName name of the table - * @param columns list of fields of the table - * @param partCols partition keys of the table - * @param fileInputFormat Class of the input format of the table data file - * @param fileOutputFormat Class of the output format of the table data file - * @param bucketCount number of buckets that each partition (or the table itself) should be divided into - * @throws HiveException thrown if the args are invalid or if the metadata or the data directory couldn't be created + * + * @param tableName + * name of the table + * @param columns + * list of fields of the table + * @param partCols + * partition keys of the table + * @param fileInputFormat + * Class of the input format of the table data file + * @param fileOutputFormat + * Class of the output format of the table data file + * @param bucketCount + * number of buckets that each partition (or the table itself) should + * be divided into + * @throws HiveException + * thrown if the args are invalid or if the metadata or the data + * directory couldn't be created */ - public void createTable(String tableName, List columns, List partCols, - Class fileInputFormat, Class fileOutputFormat, int bucketCount, List bucketCols) throws HiveException { - if(columns == null) { + public void createTable(String tableName, List columns, + List partCols, Class fileInputFormat, + Class fileOutputFormat, int bucketCount, List bucketCols) + throws HiveException { + if (columns == null) { throw new HiveException("columns not specified for table " + tableName); } @@ -191,16 +219,19 @@ tbl.setInputFormatClass(fileInputFormat.getName()); tbl.setOutputFormatClass(fileOutputFormat.getName()); - for (String col: columns) { - FieldSchema field = new FieldSchema(col, org.apache.hadoop.hive.serde.Constants.STRING_TYPE_NAME, "default"); + for (String col : columns) { + FieldSchema field = new FieldSchema(col, + org.apache.hadoop.hive.serde.Constants.STRING_TYPE_NAME, "default"); tbl.getCols().add(field); } - if(partCols != null) { + if (partCols != null) { for (String partCol : partCols) { FieldSchema part = new FieldSchema(); part.setName(partCol); - part.setType(org.apache.hadoop.hive.serde.Constants.STRING_TYPE_NAME); // default partition key + part.setType(org.apache.hadoop.hive.serde.Constants.STRING_TYPE_NAME); // default + // partition + // key tbl.getPartCols().add(part); } } @@ -210,19 +241,22 @@ createTable(tbl); } - /** * Updates the existing table metadata with the new metadata. - * @param tblName name of the existing table - * @param newTbl new name of the table. could be the old name - * @throws InvalidOperationException if the changes in metadata is not acceptable + * + * @param tblName + * name of the existing table + * @param newTbl + * new name of the table. could be the old name + * @throws InvalidOperationException + * if the changes in metadata is not acceptable * @throws TException */ - public void alterTable(String tblName, - Table newTbl) throws InvalidOperationException, - HiveException { + public void alterTable(String tblName, Table newTbl) + throws InvalidOperationException, HiveException { try { - getMSC().alter_table(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, newTbl.getTTable()); + getMSC().alter_table(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, + newTbl.getTTable()); } catch (MetaException e) { throw new HiveException("Unable to alter table.", e); } catch (TException e) { @@ -232,16 +266,20 @@ /** * Updates the existing table metadata with the new metadata. - * @param tblName name of the existing table - * @param newTbl new name of the table. could be the old name - * @throws InvalidOperationException if the changes in metadata is not acceptable + * + * @param tblName + * name of the existing table + * @param newTbl + * new name of the table. could be the old name + * @throws InvalidOperationException + * if the changes in metadata is not acceptable * @throws TException */ public void alterPartition(String tblName, Partition newPart) - throws InvalidOperationException, HiveException { + throws InvalidOperationException, HiveException { try { getMSC().alter_partition(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, - newPart.getTPartition()); + newPart.getTPartition()); } catch (MetaException e) { throw new HiveException("Unable to alter partition.", e); @@ -252,7 +290,9 @@ /** * Creates the table with the give objects - * @param tbl a table object + * + * @param tbl + * a table object * @throws HiveException */ public void createTable(Table tbl) throws HiveException { @@ -261,15 +301,19 @@ /** * Creates the table with the give objects - * @param tbl a table object - * @param ifNotExists if true, ignore AlreadyExistsException + * + * @param tbl + * a table object + * @param ifNotExists + * if true, ignore AlreadyExistsException * @throws HiveException */ public void createTable(Table tbl, boolean ifNotExists) throws HiveException { try { tbl.initSerDe(); - if(tbl.getCols().size() == 0) { - tbl.setFields(MetaStoreUtils.getFieldsFromDeserializer(tbl.getName(), tbl.getDeserializer())); + if (tbl.getCols().size() == 0) { + tbl.setFields(MetaStoreUtils.getFieldsFromDeserializer(tbl.getName(), + tbl.getDeserializer())); } tbl.checkValidity(); getMSC().createTable(tbl.getTTable()); @@ -283,11 +327,15 @@ } /** - * Drops table along with the data in it. If the table doesn't exist - * then it is a no-op - * @param dbName database where the table lives - * @param tableName table to drop - * @throws HiveException thrown if the drop fails + * Drops table along with the data in it. If the table doesn't exist then it + * is a no-op + * + * @param dbName + * database where the table lives + * @param tableName + * table to drop + * @throws HiveException + * thrown if the drop fails */ public void dropTable(String dbName, String tableName) throws HiveException { dropTable(dbName, tableName, true, true); @@ -295,10 +343,12 @@ /** * Drops the table. + * * @param tableName - * @param deleteData deletes the underlying data along with metadata - * @param ignoreUnknownTab an exception if thrown if this is falser and - * table doesn't exist + * @param deleteData + * deletes the underlying data along with metadata + * @param ignoreUnknownTab + * an exception if thrown if this is falser and table doesn't exist * @throws HiveException */ public void dropTable(String dbName, String tableName, boolean deleteData, @@ -321,31 +371,37 @@ /** * Returns metadata of the table. - * @param dbName the name of the database - * @param tableName the name of the table + * + * @param dbName + * the name of the database + * @param tableName + * the name of the table * @return the table - * @exception HiveException if there's an internal error or if the - * table doesn't exist + * @exception HiveException + * if there's an internal error or if the table doesn't exist */ public Table getTable(final String dbName, final String tableName) - throws HiveException { + throws HiveException { return this.getTable(dbName, tableName, true); } /** * Returns metadata of the table - * @param dbName the name of the database - * @param tableName the name of the table - * @param throwException controls whether an exception is thrown - * or a returns a null + * + * @param dbName + * the name of the database + * @param tableName + * the name of the table + * @param throwException + * controls whether an exception is thrown or a returns a null * @return the table or if throwException is false a null value. * @throws HiveException */ public Table getTable(final String dbName, final String tableName, boolean throwException) throws HiveException { - if(tableName == null || tableName.equals("")) { + if (tableName == null || tableName.equals("")) { throw new HiveException("empty table creation??"); } Table table = new Table(); @@ -353,7 +409,7 @@ try { tTable = getMSC().getTable(dbName, tableName); } catch (NoSuchObjectException e) { - if(throwException) { + if (throwException) { LOG.error(StringUtils.stringifyException(e)); throw new InvalidTableException("Table not found ", tableName); } @@ -362,17 +418,20 @@ throw new HiveException("Unable to fetch table " + tableName, e); } // just a sanity check - assert(tTable != null); + assert (tTable != null); try { // Use LazySimpleSerDe for MetadataTypedColumnsetSerDe. - // NOTE: LazySimpleSerDe does not support tables with a single column of col - // of type "array". This happens when the table is created using an + // NOTE: LazySimpleSerDe does not support tables with a single column of + // col + // of type "array". This happens when the table is created using + // an // earlier version of Hive. if (tTable.getSd().getSerdeInfo().getSerializationLib().equals( - org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe.class.getName()) + org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe.class + .getName()) && tTable.getSd().getColsSize() > 0 - && tTable.getSd().getCols().get(0).getType().indexOf('<') == -1 ) { + && tTable.getSd().getCols().get(0).getType().indexOf('<') == -1) { tTable.getSd().getSerdeInfo().setSerializationLib( org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName()); } @@ -388,25 +447,33 @@ return table; } - table.setInputFormatClass((Class>) - Class.forName(table.getSchema().getProperty(org.apache.hadoop.hive.metastore.api.Constants.FILE_INPUT_FORMAT, - org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName()), - true, JavaUtils.getClassLoader())); - table.setOutputFormatClass((Class) - Class.forName(table.getSchema().getProperty(org.apache.hadoop.hive.metastore.api.Constants.FILE_OUTPUT_FORMAT, - HiveSequenceFileOutputFormat.class.getName()), - true, JavaUtils.getClassLoader())); + table + .setInputFormatClass((Class>) Class + .forName( + table + .getSchema() + .getProperty( + org.apache.hadoop.hive.metastore.api.Constants.FILE_INPUT_FORMAT, + org.apache.hadoop.mapred.SequenceFileInputFormat.class + .getName()), true, JavaUtils.getClassLoader())); + table.setOutputFormatClass(Class.forName(table.getSchema().getProperty( + org.apache.hadoop.hive.metastore.api.Constants.FILE_OUTPUT_FORMAT, + HiveSequenceFileOutputFormat.class.getName()), true, JavaUtils + .getClassLoader())); table.setDeserializer(MetaStoreUtils.getDeserializer(getConf(), p)); table.setDataLocation(new URI(tTable.getSd().getLocation())); - } catch(Exception e) { + } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); throw new HiveException(e); } - String sf = table.getSerdeParam(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT); - if(sf != null) { + String sf = table + .getSerdeParam(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT); + if (sf != null) { char[] b = sf.toCharArray(); - if ((b.length == 1) && (b[0] < 10)){ // ^A, ^B, ^C, ^D, \t - table.setSerdeParam(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, Integer.toString(b[0])); + if ((b.length == 1) && (b[0] < 10)) { // ^A, ^B, ^C, ^D, \t + table.setSerdeParam( + org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, + Integer.toString(b[0])); } } table.checkValidity(); @@ -420,20 +487,21 @@ /** * returns all existing tables from default database which match the given * pattern. The matching occurs as per Java regular expressions - * + * * @param tablePattern * java re pattern * @return list of table names * @throws HiveException */ - public List getTablesByPattern(String tablePattern) throws HiveException { + public List getTablesByPattern(String tablePattern) + throws HiveException { return getTablesForDb(MetaStoreUtils.DEFAULT_DATABASE_NAME, tablePattern); } /** * returns all existing tables from the given database which match the given * pattern. The matching occurs as per Java regular expressions - * + * * @param database * the database name * @param tablePattern @@ -441,10 +509,11 @@ * @return list of table names * @throws HiveException */ - public List getTablesForDb(String database, String tablePattern) throws HiveException { + public List getTablesForDb(String database, String tablePattern) + throws HiveException { try { return getMSC().getTables(database, tablePattern); - } catch(Exception e) { + } catch (Exception e) { throw new HiveException(e); } } @@ -456,10 +525,11 @@ * @throws AlreadyExistsException * @throws MetaException * @throws TException - * @see org.apache.hadoop.hive.metastore.HiveMetaStoreClient#createDatabase(java.lang.String, java.lang.String) + * @see org.apache.hadoop.hive.metastore.HiveMetaStoreClient#createDatabase(java.lang.String, + * java.lang.String) */ - protected boolean createDatabase(String name, String locationUri) throws AlreadyExistsException, - MetaException, TException { + protected boolean createDatabase(String name, String locationUri) + throws AlreadyExistsException, MetaException, TException { return getMSC().createDatabase(name, locationUri); } @@ -475,39 +545,47 @@ } /** - * Load a directory into a Hive Table Partition - * - Alters existing content of the partition with the contents of loadPath. - * - If he partition does not exist - one is created - * - files in loadPath are moved into Hive. But the directory itself is not removed. - * - * @param loadPath Directory containing files to load into Table - * @param tableName name of table to be loaded. - * @param partSpec defines which partition needs to be loaded - * @param replace if true - replace files in the partition, otherwise add files to the partition - * @param tmpDirPath The temporary directory. + * Load a directory into a Hive Table Partition - Alters existing content of + * the partition with the contents of loadPath. - If he partition does not + * exist - one is created - files in loadPath are moved into Hive. But the + * directory itself is not removed. + * + * @param loadPath + * Directory containing files to load into Table + * @param tableName + * name of table to be loaded. + * @param partSpec + * defines which partition needs to be loaded + * @param replace + * if true - replace files in the partition, otherwise add files to + * the partition + * @param tmpDirPath + * The temporary directory. */ public void loadPartition(Path loadPath, String tableName, - AbstractMap partSpec, boolean replace, - Path tmpDirPath) - throws HiveException { + AbstractMap partSpec, boolean replace, Path tmpDirPath) + throws HiveException { Table tbl = getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); try { - /** Move files before creating the partition since down stream processes check - * for existence of partition in metadata before accessing the data. If partition - * is created before data is moved, downstream waiting processes might move forward - * with partial data + /** + * Move files before creating the partition since down stream processes + * check for existence of partition in metadata before accessing the data. + * If partition is created before data is moved, downstream waiting + * processes might move forward with partial data */ FileSystem fs; Path partPath; // check if partition exists without creating it - Partition part = getPartition (tbl, partSpec, false); + Partition part = getPartition(tbl, partSpec, false); if (part == null) { - // Partition does not exist currently. The partition name is extrapolated from + // Partition does not exist currently. The partition name is + // extrapolated from // the table's location (even if the table is marked external) fs = FileSystem.get(tbl.getDataLocation(), getConf()); - partPath = new Path(tbl.getDataLocation().getPath(), Warehouse.makePartName(partSpec)); + partPath = new Path(tbl.getDataLocation().getPath(), Warehouse + .makePartName(partSpec)); } else { // Partition exists already. Get the path from the partition. This will // get the default path for Hive created partitions or the external path @@ -515,7 +593,7 @@ partPath = part.getPath()[0]; fs = partPath.getFileSystem(getConf()); } - if(replace) { + if (replace) { Hive.replaceFiles(loadPath, partPath, fs, tmpDirPath); } else { Hive.copyFiles(loadPath, partPath, fs); @@ -536,22 +614,25 @@ } /** - * Load a directory into a Hive Table. - * - Alters existing content of table with the contents of loadPath. - * - If table does not exist - an exception is thrown - * - files in loadPath are moved into Hive. But the directory itself is not removed. - * - * @param loadPath Directory containing files to load into Table - * @param tableName name of table to be loaded. - * @param replace if true - replace files in the table, otherwise add files to table - * @param tmpDirPath The temporary directory. + * Load a directory into a Hive Table. - Alters existing content of table with + * the contents of loadPath. - If table does not exist - an exception is + * thrown - files in loadPath are moved into Hive. But the directory itself is + * not removed. + * + * @param loadPath + * Directory containing files to load into Table + * @param tableName + * name of table to be loaded. + * @param replace + * if true - replace files in the table, otherwise add files to table + * @param tmpDirPath + * The temporary directory. */ - public void loadTable(Path loadPath, String tableName, - boolean replace, - Path tmpDirPath) throws HiveException { + public void loadTable(Path loadPath, String tableName, boolean replace, + Path tmpDirPath) throws HiveException { Table tbl = getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); - if(replace) { + if (replace) { tbl.replaceFiles(loadPath, tmpDirPath); } else { tbl.copyFiles(loadPath); @@ -560,23 +641,32 @@ /** * Creates a partition. - * @param tbl table for which partition needs to be created - * @param partSpec partition keys and their values + * + * @param tbl + * table for which partition needs to be created + * @param partSpec + * partition keys and their values * @return created partition object - * @throws HiveException if table doesn't exist or partition already exists + * @throws HiveException + * if table doesn't exist or partition already exists */ public Partition createPartition(Table tbl, Map partSpec) - throws HiveException { - return createPartition(tbl, partSpec, null); + throws HiveException { + return createPartition(tbl, partSpec, null); } /** * Creates a partition - * @param tbl table for which partition needs to be created - * @param partSpec partition keys and their values - * @param location location of this partition + * + * @param tbl + * table for which partition needs to be created + * @param partSpec + * partition keys and their values + * @param location + * location of this partition * @return created partition object - * @throws HiveException if table doesn't exist or partition already exists + * @throws HiveException + * if table doesn't exist or partition already exists */ public Partition createPartition(Table tbl, Map partSpec, Path location) throws HiveException { @@ -585,8 +675,9 @@ for (FieldSchema field : tbl.getPartCols()) { String val = partSpec.get(field.getName()); - if(val == null || val.length() == 0) { - throw new HiveException("add partition: Value for key " + field.getName() + " is null or empty"); + if (val == null || val.length() == 0) { + throw new HiveException("add partition: Value for key " + + field.getName() + " is null or empty"); } } @@ -603,33 +694,41 @@ /** * Returns partition metadata - * @param tbl the partition's table - * @param partSpec partition keys and values - * @param forceCreate if this is true and partition doesn't exist then a partition is created + * + * @param tbl + * the partition's table + * @param partSpec + * partition keys and values + * @param forceCreate + * if this is true and partition doesn't exist then a partition is + * created * @return result partition object or null if there is no partition * @throws HiveException */ - public Partition getPartition(Table tbl, Map partSpec, boolean forceCreate) - throws HiveException { - if(!tbl.isValidSpec(partSpec)) { + public Partition getPartition(Table tbl, Map partSpec, + boolean forceCreate) throws HiveException { + if (!tbl.isValidSpec(partSpec)) { throw new HiveException("Invalid partition: " + partSpec); } List pvals = new ArrayList(); for (FieldSchema field : tbl.getPartCols()) { String val = partSpec.get(field.getName()); - if(val == null || val.length() == 0) { - throw new HiveException("get partition: Value for key " + field.getName() + " is null or empty"); + if (val == null || val.length() == 0) { + throw new HiveException("get partition: Value for key " + + field.getName() + " is null or empty"); } pvals.add(val); } org.apache.hadoop.hive.metastore.api.Partition tpart = null; try { tpart = getMSC().getPartition(tbl.getDbName(), tbl.getName(), pvals); - if(tpart == null && forceCreate) { - LOG.debug("creating partition for table " + tbl.getName() + " with partition spec : " + partSpec); - tpart = getMSC().appendPartition(tbl.getDbName(), tbl.getName(), pvals);; + if (tpart == null && forceCreate) { + LOG.debug("creating partition for table " + tbl.getName() + + " with partition spec : " + partSpec); + tpart = getMSC().appendPartition(tbl.getDbName(), tbl.getName(), pvals); + ; } - if(tpart == null){ + if (tpart == null) { return null; } } catch (Exception e) { @@ -639,8 +738,8 @@ return new Partition(tbl, tpart); } - public boolean dropPartition(String db_name, String tbl_name, List part_vals, - boolean deleteData) throws HiveException { + public boolean dropPartition(String db_name, String tbl_name, + List part_vals, boolean deleteData) throws HiveException { try { return getMSC().dropPartition(db_name, tbl_name, part_vals, deleteData); } catch (NoSuchObjectException e) { @@ -650,7 +749,8 @@ } } - public List getPartitionNames(String dbName, String tblName, short max) throws HiveException { + public List getPartitionNames(String dbName, String tblName, short max) + throws HiveException { List names = null; try { names = getMSC().listPartitionNames(dbName, tblName, max); @@ -663,15 +763,18 @@ /** * get all the partitions that the table has - * @param tbl object for which partition is needed + * + * @param tbl + * object for which partition is needed * @return list of partition objects * @throws HiveException */ public List getPartitions(Table tbl) throws HiveException { - if(tbl.isPartitioned()) { + if (tbl.isPartitioned()) { List tParts; try { - tParts = getMSC().listPartitions(tbl.getDbName(), tbl.getName(), (short) -1); + tParts = getMSC().listPartitions(tbl.getDbName(), tbl.getName(), + (short) -1); } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); throw new HiveException(e); @@ -683,7 +786,8 @@ return parts; } else { // create an empty partition. - // HACK, HACK. SemanticAnalyzer code requires that an empty partition when the table is not partitioned + // HACK, HACK. SemanticAnalyzer code requires that an empty partition when + // the table is not partitioned org.apache.hadoop.hive.metastore.api.Partition tPart = new org.apache.hadoop.hive.metastore.api.Partition(); tPart.setSd(tbl.getTTable().getSd()); // TODO: get a copy Partition part = new Partition(tbl, tPart); @@ -693,40 +797,47 @@ } } - static private void checkPaths(FileSystem fs, FileStatus [] srcs, Path destf, boolean replace) throws HiveException { + static private void checkPaths(FileSystem fs, FileStatus[] srcs, Path destf, + boolean replace) throws HiveException { try { - for(int i=0; i getFieldsFromDeserializer(String name, Deserializer serde) throws HiveException { + public static List getFieldsFromDeserializer(String name, + Deserializer serde) throws HiveException { try { return MetaStoreUtils.getFieldsFromDeserializer(name, serde); } catch (SerDeException e) { - throw new HiveException("Error in getting fields from serde. " + e.getMessage(), e); + throw new HiveException("Error in getting fields from serde. " + + e.getMessage(), e); } catch (MetaException e) { - throw new HiveException("Error in getting fields from serde." + e.getMessage(), e); + throw new HiveException("Error in getting fields from serde." + + e.getMessage(), e); } } }; Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java (working copy) @@ -12,107 +12,114 @@ private List tablesNotInMs = new ArrayList(); private List partitionsNotOnFs = new ArrayList(); private List partitionsNotInMs = new ArrayList(); - + /** * @return a list of tables not found on the filesystem. */ public List getTablesNotOnFs() { return tablesNotOnFs; } - + /** - * @param tablesNotOnFs a list of tables not found on the filesystem. + * @param tablesNotOnFs + * a list of tables not found on the filesystem. */ public void setTablesNotOnFs(List tablesNotOnFs) { this.tablesNotOnFs = tablesNotOnFs; } - + /** * @return a list of tables not found in the metastore. */ public List getTablesNotInMs() { return tablesNotInMs; } - + /** - * @param tablesNotInMs a list of tables not found in the metastore. + * @param tablesNotInMs + * a list of tables not found in the metastore. */ public void setTablesNotInMs(List tablesNotInMs) { this.tablesNotInMs = tablesNotInMs; } - + /** * @return a list of partitions not found on the fs */ public List getPartitionsNotOnFs() { return partitionsNotOnFs; } - + /** - * @param partitionsNotOnFs a list of partitions not found on the fs + * @param partitionsNotOnFs + * a list of partitions not found on the fs */ public void setPartitionsNotOnFs(List partitionsNotOnFs) { this.partitionsNotOnFs = partitionsNotOnFs; } - + /** * @return a list of partitions not found in the metastore */ public List getPartitionsNotInMs() { return partitionsNotInMs; } - + /** - * @param partitionsNotInMs a list of partitions not found in the metastore + * @param partitionsNotInMs + * a list of partitions not found in the metastore */ public void setPartitionsNotInMs(List partitionsNotInMs) { this.partitionsNotInMs = partitionsNotInMs; - } - + } + /** - * A basic description of a partition that is - * missing from either the fs or the ms. + * A basic description of a partition that is missing from either the fs or + * the ms. */ public static class PartitionResult implements Comparable { private String partitionName; private String tableName; - + /** * @return name of partition */ public String getPartitionName() { return partitionName; } - + /** - * @param partitionName name of partition + * @param partitionName + * name of partition */ public void setPartitionName(String partitionName) { this.partitionName = partitionName; } - + /** * @return table name */ public String getTableName() { return tableName; } - + /** - * @param tableName table name + * @param tableName + * table name */ public void setTableName(String tableName) { this.tableName = tableName; } - + + @Override public String toString() { return tableName + ":" + partitionName; } - + public int compareTo(PartitionResult o) { int ret = tableName.compareTo(o.tableName); return ret != 0 ? ret : partitionName.compareTo(o.partitionName); } } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java (working copy) @@ -20,7 +20,7 @@ /** * General collection of helper functions - * + * */ public class HiveUtils { @@ -32,7 +32,6 @@ public static final String RBRACE = "}"; public static final String LINE_SEP = System.getProperty("line.separator"); - public static String escapeString(String str) { int length = str.length(); StringBuilder escape = new StringBuilder(length + 16); @@ -78,13 +77,12 @@ } else { escape.append(c); } - break; + break; } } return (escape.toString()); } - public static String lightEscapeString(String str) { int length = str.length(); StringBuilder escape = new StringBuilder(length + 16); @@ -106,7 +104,7 @@ break; default: escape.append(c); - break; + break; } } return (escape.toString()); Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/InvalidTableException.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/InvalidTableException.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/InvalidTableException.java (working copy) @@ -18,9 +18,9 @@ package org.apache.hadoop.hive.ql.metadata; -/** +/** * Generic exception class for Hive - * + * */ public class InvalidTableException extends HiveException { @@ -30,7 +30,7 @@ super(); this.tableName = tableName; } - + public InvalidTableException(String message, String tableName) { super(message); this.tableName = tableName; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (working copy) @@ -18,21 +18,19 @@ package org.apache.hadoop.hive.ql.optimizer; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; import java.util.Set; -import java.io.Serializable; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.SelectOperator; -import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; @@ -40,32 +38,35 @@ import org.apache.hadoop.hive.ql.plan.tableDesc; /** - * Processor Context for creating map reduce task. Walk the tree in a DFS manner and process the nodes. Some state is - * maintained about the current nodes visited so far. + * Processor Context for creating map reduce task. Walk the tree in a DFS manner + * and process the nodes. Some state is maintained about the current nodes + * visited so far. */ public class GenMRProcContext implements NodeProcessorCtx { - /** - * GenMapRedCtx is used to keep track of the current state. + /** + * GenMapRedCtx is used to keep track of the current state. */ public static class GenMapRedCtx { - Task currTask; - Operator currTopOp; - String currAliasId; - - public GenMapRedCtx() { + Task currTask; + Operator currTopOp; + String currAliasId; + + public GenMapRedCtx() { } - + /** - * @param currTask the current task - * @param currTopOp the current top operator being traversed - * @param currAliasId the current alias for the to operator + * @param currTask + * the current task + * @param currTopOp + * the current top operator being traversed + * @param currAliasId + * the current alias for the to operator */ - public GenMapRedCtx (Task currTask, - Operator currTopOp, - String currAliasId) { - this.currTask = currTask; - this.currTopOp = currTopOp; + public GenMapRedCtx(Task currTask, + Operator currTopOp, String currAliasId) { + this.currTask = currTask; + this.currTopOp = currTopOp; this.currAliasId = currAliasId; } @@ -92,24 +93,24 @@ } public static class GenMRUnionCtx { - Task uTask; - List taskTmpDir; - List tt_desc; + Task uTask; + List taskTmpDir; + List tt_desc; - public GenMRUnionCtx() { + public GenMRUnionCtx() { uTask = null; taskTmpDir = new ArrayList(); - tt_desc = new ArrayList(); + tt_desc = new ArrayList(); } - public Task getUTask() { + public Task getUTask() { return uTask; } - public void setUTask(Task uTask) { + public void setUTask(Task uTask) { this.uTask = uTask; } - + public void addTaskTmpDir(String taskTmpDir) { this.taskTmpDir.add(taskTmpDir); } @@ -128,16 +129,16 @@ } public static class GenMRMapJoinCtx { - String taskTmpDir; - tableDesc tt_desc; - Operator rootMapJoinOp; - MapJoinOperator oldMapJoin; - - public GenMRMapJoinCtx() { - taskTmpDir = null; - tt_desc = null; + String taskTmpDir; + tableDesc tt_desc; + Operator rootMapJoinOp; + MapJoinOperator oldMapJoin; + + public GenMRMapJoinCtx() { + taskTmpDir = null; + tt_desc = null; rootMapJoinOp = null; - oldMapJoin = null; + oldMapJoin = null; } /** @@ -146,14 +147,15 @@ * @param rootMapJoinOp * @param oldMapJoin */ - public GenMRMapJoinCtx(String taskTmpDir, tableDesc tt_desc, - Operator rootMapJoinOp, MapJoinOperator oldMapJoin) { - this.taskTmpDir = taskTmpDir; - this.tt_desc = tt_desc; + public GenMRMapJoinCtx(String taskTmpDir, tableDesc tt_desc, + Operator rootMapJoinOp, + MapJoinOperator oldMapJoin) { + this.taskTmpDir = taskTmpDir; + this.tt_desc = tt_desc; this.rootMapJoinOp = rootMapJoinOp; - this.oldMapJoin = oldMapJoin; + this.oldMapJoin = oldMapJoin; } - + public void setTaskTmpDir(String taskTmpDir) { this.taskTmpDir = taskTmpDir; } @@ -178,7 +180,8 @@ } /** - * @param rootMapJoinOp the rootMapJoinOp to set + * @param rootMapJoinOp + * the rootMapJoinOp to set */ public void setRootMapJoinOp(Operator rootMapJoinOp) { this.rootMapJoinOp = rootMapJoinOp; @@ -192,7 +195,8 @@ } /** - * @param oldMapJoin the oldMapJoin to set + * @param oldMapJoin + * the oldMapJoin to set */ public void setOldMapJoin(MapJoinOperator oldMapJoin) { this.oldMapJoin = oldMapJoin; @@ -201,174 +205,188 @@ private HiveConf conf; private HashMap, Task> opTaskMap; - private HashMap unionTaskMap; - private HashMap mapJoinTaskMap; + private HashMap unionTaskMap; + private HashMap mapJoinTaskMap; private List> seenOps; - private List seenFileSinkOps; + private List seenFileSinkOps; - private ParseContext parseCtx; - private List> mvTask; - private List> rootTasks; + private ParseContext parseCtx; + private List> mvTask; + private List> rootTasks; - private LinkedHashMap, GenMapRedCtx> mapCurrCtx; - private Task currTask; - private Operator currTopOp; - private UnionOperator currUnionOp; - private MapJoinOperator currMapJoinOp; - private String currAliasId; + private LinkedHashMap, GenMapRedCtx> mapCurrCtx; + private Task currTask; + private Operator currTopOp; + private UnionOperator currUnionOp; + private MapJoinOperator currMapJoinOp; + private String currAliasId; private List> rootOps; - + /** - * Set of read entities. This list is generated by the walker and is - * passed to the hooks. + * Set of read entities. This list is generated by the walker and is passed to + * the hooks. */ - private Set inputs; + private Set inputs; /** - * Set of write entities. This list is generated by the walker and is - * passed to the hooks. + * Set of write entities. This list is generated by the walker and is passed + * to the hooks. */ - private Set outputs; - - public GenMRProcContext() { + private Set outputs; + + public GenMRProcContext() { } - + /** - * @param conf hive configuration - * @param opTaskMap reducer to task mapping - * @param seenOps operator already visited - * @param parseCtx current parse context - * @param rootTasks root tasks for the plan - * @param mvTask the final move task - * @param mapCurrCtx operator to task mappings - * @param inputs the set of input tables/partitions generated by the walk - * @param outputs the set of destinations generated by the walk + * @param conf + * hive configuration + * @param opTaskMap + * reducer to task mapping + * @param seenOps + * operator already visited + * @param parseCtx + * current parse context + * @param rootTasks + * root tasks for the plan + * @param mvTask + * the final move task + * @param mapCurrCtx + * operator to task mappings + * @param inputs + * the set of input tables/partitions generated by the walk + * @param outputs + * the set of destinations generated by the walk */ - public GenMRProcContext ( - HiveConf conf, - HashMap, Task> opTaskMap, - List> seenOps, - ParseContext parseCtx, - List> mvTask, - List> rootTasks, - LinkedHashMap, GenMapRedCtx> mapCurrCtx, - Set inputs, - Set outputs) - { - this.conf = conf; - this.opTaskMap = opTaskMap; - this.seenOps = seenOps; - this.mvTask = mvTask; - this.parseCtx = parseCtx; - this.rootTasks = rootTasks; + public GenMRProcContext( + HiveConf conf, + HashMap, Task> opTaskMap, + List> seenOps, ParseContext parseCtx, + List> mvTask, + List> rootTasks, + LinkedHashMap, GenMapRedCtx> mapCurrCtx, + Set inputs, Set outputs) { + this.conf = conf; + this.opTaskMap = opTaskMap; + this.seenOps = seenOps; + this.mvTask = mvTask; + this.parseCtx = parseCtx; + this.rootTasks = rootTasks; this.mapCurrCtx = mapCurrCtx; this.inputs = inputs; this.outputs = outputs; - currTask = null; - currTopOp = null; - currUnionOp = null; - currMapJoinOp = null; - currAliasId = null; - rootOps = new ArrayList>(); + currTask = null; + currTopOp = null; + currUnionOp = null; + currMapJoinOp = null; + currAliasId = null; + rootOps = new ArrayList>(); rootOps.addAll(parseCtx.getTopOps().values()); unionTaskMap = new HashMap(); mapJoinTaskMap = new HashMap(); } /** - * @return reducer to task mapping + * @return reducer to task mapping */ public HashMap, Task> getOpTaskMap() { return opTaskMap; } /** - * @param opTaskMap reducer to task mapping + * @param opTaskMap + * reducer to task mapping */ - public void setOpTaskMap(HashMap, Task> opTaskMap) { + public void setOpTaskMap( + HashMap, Task> opTaskMap) { this.opTaskMap = opTaskMap; } /** - * @return operators already visited + * @return operators already visited */ public List> getSeenOps() { return seenOps; } /** - * @return file operators already visited + * @return file operators already visited */ public List getSeenFileSinkOps() { return seenFileSinkOps; } /** - * @param seenOps operators already visited + * @param seenOps + * operators already visited */ public void setSeenOps(List> seenOps) { this.seenOps = seenOps; } /** - * @param seenFileSinkOps file sink operators already visited + * @param seenFileSinkOps + * file sink operators already visited */ public void setSeenFileSinkOps(List seenFileSinkOps) { this.seenFileSinkOps = seenFileSinkOps; } /** - * @return top operators for tasks + * @return top operators for tasks */ public List> getRootOps() { return rootOps; } /** - * @param rootOps top operators for tasks + * @param rootOps + * top operators for tasks */ public void setRootOps(List> rootOps) { this.rootOps = rootOps; } /** - * @return current parse context + * @return current parse context */ public ParseContext getParseCtx() { return parseCtx; } /** - * @param parseCtx current parse context + * @param parseCtx + * current parse context */ public void setParseCtx(ParseContext parseCtx) { this.parseCtx = parseCtx; } /** - * @return the final move task + * @return the final move task */ public List> getMvTask() { return mvTask; } /** - * @param mvTask the final move task + * @param mvTask + * the final move task */ public void setMvTask(List> mvTask) { this.mvTask = mvTask; } /** - * @return root tasks for the plan + * @return root tasks for the plan */ - public List> getRootTasks() { + public List> getRootTasks() { return rootTasks; } /** - * @param rootTasks root tasks for the plan + * @param rootTasks + * root tasks for the plan */ - public void setRootTasks(List> rootTasks) { + public void setRootTasks(List> rootTasks) { this.rootTasks = rootTasks; } @@ -380,23 +398,26 @@ } /** - * @param mapCurrCtx operator to task mappings + * @param mapCurrCtx + * operator to task mappings */ - public void setMapCurrCtx(LinkedHashMap, GenMapRedCtx> mapCurrCtx) { + public void setMapCurrCtx( + LinkedHashMap, GenMapRedCtx> mapCurrCtx) { this.mapCurrCtx = mapCurrCtx; } /** * @return current task */ - public Task getCurrTask() { + public Task getCurrTask() { return currTask; } /** - * @param currTask current task + * @param currTask + * current task */ - public void setCurrTask(Task currTask) { + public void setCurrTask(Task currTask) { this.currTask = currTask; } @@ -405,46 +426,50 @@ */ public Operator getCurrTopOp() { return currTopOp; - } - + } + /** - * @param currTopOp current top operator + * @param currTopOp + * current top operator */ public void setCurrTopOp(Operator currTopOp) { this.currTopOp = currTopOp; - } + } public UnionOperator getCurrUnionOp() { return currUnionOp; - } - + } + /** - * @param currUnionOp current union operator + * @param currUnionOp + * current union operator */ public void setCurrUnionOp(UnionOperator currUnionOp) { this.currUnionOp = currUnionOp; - } + } public MapJoinOperator getCurrMapJoinOp() { return currMapJoinOp; - } - + } + /** - * @param currMapJoinOp current map join operator + * @param currMapJoinOp + * current map join operator */ public void setCurrMapJoinOp(MapJoinOperator currMapJoinOp) { this.currMapJoinOp = currMapJoinOp; - } + } /** * @return current top alias */ - public String getCurrAliasId() { + public String getCurrAliasId() { return currAliasId; } /** - * @param currAliasId current top alias + * @param currAliasId + * current top alias */ public void setCurrAliasId(String currAliasId) { this.currAliasId = currAliasId; @@ -472,7 +497,7 @@ public Set getInputs() { return inputs; } - + /** * Get the output set. */ @@ -488,7 +513,8 @@ } /** - * @param conf the conf to set + * @param conf + * the conf to set */ public void setConf(HiveConf conf) { this.conf = conf; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (working copy) @@ -18,39 +18,37 @@ package org.apache.hadoop.hive.ql.optimizer; +import java.io.Serializable; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; -import java.util.ArrayList; +import java.util.Map; import java.util.Stack; -import java.io.Serializable; -import java.io.File; -import java.util.Map; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.UnionOperator; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.plan.mapredWork; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; +import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; +import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcFactory; +import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext; +import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.plan.tableDesc; -import org.apache.hadoop.hive.ql.plan.partitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.plan.fileSinkDesc; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.OperatorFactory; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcFactory; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext; +import org.apache.hadoop.hive.ql.plan.mapredWork; +import org.apache.hadoop.hive.ql.plan.partitionDesc; +import org.apache.hadoop.hive.ql.plan.tableDesc; /** * Processor for the rule - TableScan followed by Union @@ -61,41 +59,51 @@ } /** - * Union Operator encountered . - * Currently, the algorithm is pretty simple: - * If all the sub-queries are map-only, dont do anything. - * However, if there is a mapjoin followed by the union, merge at the union - * Otherwise, insert a FileSink on top of all the sub-queries. - * + * Union Operator encountered . Currently, the algorithm is pretty simple: If + * all the sub-queries are map-only, dont do anything. However, if there is a + * mapjoin followed by the union, merge at the union Otherwise, insert a + * FileSink on top of all the sub-queries. + * * This can be optimized later on. - * @param nd the file sink operator encountered - * @param opProcCtx context + * + * @param nd + * the file sink operator encountered + * @param opProcCtx + * context */ - public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { - UnionOperator union = (UnionOperator)nd; - GenMRProcContext ctx = (GenMRProcContext)opProcCtx; + public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, + Object... nodeOutputs) throws SemanticException { + UnionOperator union = (UnionOperator) nd; + GenMRProcContext ctx = (GenMRProcContext) opProcCtx; ParseContext parseCtx = ctx.getParseCtx(); UnionProcContext uCtx = parseCtx.getUCtx(); - // Map-only subqueries can be optimized in future to not write to a file in future - Map, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); + // Map-only subqueries can be optimized in future to not write to a file in + // future + Map, GenMapRedCtx> mapCurrCtx = ctx + .getMapCurrCtx(); - // The plan needs to be broken only if one of the sub-queries involve a map-reduce job + // The plan needs to be broken only if one of the sub-queries involve a + // map-reduce job if (uCtx.isMapOnlySubq()) { // merge currTask from multiple topOps - HashMap, Task> opTaskMap = ctx.getOpTaskMap(); - if ( opTaskMap != null && opTaskMap.size() > 0 ) { - Task tsk = opTaskMap.get(null); - if ( tsk != null ) + HashMap, Task> opTaskMap = ctx + .getOpTaskMap(); + if (opTaskMap != null && opTaskMap.size() > 0) { + Task tsk = opTaskMap.get(null); + if (tsk != null) { ctx.setCurrTask(tsk); + } } - + UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union); if ((uPrsCtx != null) && (uPrsCtx.getMapJoinQuery())) { - GenMapRedUtils.mergeMapJoinUnion(union, ctx, UnionProcFactory.getPositionParent(union, stack)); + GenMapRedUtils.mergeMapJoinUnion(union, ctx, UnionProcFactory + .getPositionParent(union, stack)); + } else { + mapCurrCtx.put((Operator) nd, new GenMapRedCtx( + ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId())); } - else - mapCurrCtx.put((Operator)nd, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId())); return null; } @@ -108,13 +116,15 @@ int pos = UnionProcFactory.getPositionParent(union, stack); // is the current task a root task - if (uPrsCtx.getRootTask(pos) && (!ctx.getRootTasks().contains(currTask))) + if (uPrsCtx.getRootTask(pos) && (!ctx.getRootTasks().contains(currTask))) { ctx.getRootTasks().add(currTask); + } GenMRUnionCtx uCtxTask = ctx.getUnionTask(union); Task uTask = null; - Operator parent = union.getParentOperators().get(pos); + Operator parent = union.getParentOperators().get( + pos); mapredWork uPlan = null; // union is encountered for the first time @@ -124,10 +134,9 @@ uTask = TaskFactory.get(uPlan, parseCtx.getConf()); uCtxTask.setUTask(uTask); ctx.setUnionTask(union, uCtxTask); - } - else { + } else { uTask = uCtxTask.getUTask(); - uPlan = (mapredWork)uTask.getWork(); + uPlan = (mapredWork) uTask.getWork(); } // If there is a mapjoin at position 'pos' @@ -143,12 +152,13 @@ assert plan.getPathToAliases().get(taskTmpDir) == null; plan.getPathToAliases().put(taskTmpDir, new ArrayList()); plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); - plan.getPathToPartitionInfo().put(taskTmpDir, new partitionDesc(tt_desc, null)); + plan.getPathToPartitionInfo().put(taskTmpDir, + new partitionDesc(tt_desc, null)); plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); } - tableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc( - PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol")); + tableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils + .getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol")); // generate the temporary file Context baseCtx = parseCtx.getContext(); @@ -158,15 +168,14 @@ uCtxTask.addTaskTmpDir(taskTmpDir); uCtxTask.addTTDesc(tt_desc); - // The union task is empty. The files created for all the inputs are assembled in the + // The union task is empty. The files created for all the inputs are + // assembled in the // union context and later used to initialize the union plan // Create a file sink operator for this file name - Operator fs_op = - OperatorFactory.get - (new fileSinkDesc(taskTmpDir, tt_desc, - parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)), - parent.getSchema()); + Operator fs_op = OperatorFactory.get( + new fileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar( + HiveConf.ConfVars.COMPRESSINTERMEDIATE)), parent.getSchema()); assert parent.getChildOperators().size() == 1; parent.getChildOperators().set(0, fs_op); @@ -178,14 +187,17 @@ currTask.addDependentTask(uTask); // If it is map-only task, add the files to be processed - if (uPrsCtx.getMapOnlySubq(pos) && uPrsCtx.getRootTask(pos)) - GenMapRedUtils.setTaskPlan(ctx.getCurrAliasId(), ctx.getCurrTopOp(), (mapredWork) currTask.getWork(), false, ctx); + if (uPrsCtx.getMapOnlySubq(pos) && uPrsCtx.getRootTask(pos)) { + GenMapRedUtils.setTaskPlan(ctx.getCurrAliasId(), ctx.getCurrTopOp(), + (mapredWork) currTask.getWork(), false, ctx); + } ctx.setCurrTask(uTask); ctx.setCurrAliasId(null); ctx.setCurrTopOp(null); - mapCurrCtx.put((Operator)nd, new GenMapRedCtx(ctx.getCurrTask(), null, null)); + mapCurrCtx.put((Operator) nd, new GenMapRedCtx(ctx + .getCurrTask(), null, null)); return null; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (working copy) @@ -60,7 +60,6 @@ import org.apache.hadoop.hive.ql.plan.reduceSinkDesc; import org.apache.hadoop.hive.ql.plan.selectDesc; import org.apache.hadoop.hive.ql.plan.tableDesc; -import org.apache.hadoop.hive.ql.plan.tableScanDesc; /** * Factory for generating the different node processors used by ColumnPruner. @@ -70,45 +69,51 @@ /** * Node Processor for Column Pruning on Filter Operators. */ - public static class ColumnPrunerFilterProc implements NodeProcessor { - public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { - FilterOperator op = (FilterOperator)nd; - ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx; + public static class ColumnPrunerFilterProc implements NodeProcessor { + public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, + Object... nodeOutputs) throws SemanticException { + FilterOperator op = (FilterOperator) nd; + ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; exprNodeDesc condn = op.getConf().getPredicate(); // get list of columns used in the filter List cl = condn.getCols(); // merge it with the downstream col list - cppCtx.getPrunedColLists().put(op, Utilities.mergeUniqElems(cppCtx.genColLists(op), cl)); + cppCtx.getPrunedColLists().put(op, + Utilities.mergeUniqElems(cppCtx.genColLists(op), cl)); return null; } } - + /** * Factory method to get the ColumnPrunerFilterProc class. + * * @return ColumnPrunerFilterProc */ public static ColumnPrunerFilterProc getFilterProc() { return new ColumnPrunerFilterProc(); } - + /** * Node Processor for Column Pruning on Group By Operators. */ public static class ColumnPrunerGroupByProc implements NodeProcessor { - public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { - GroupByOperator op = (GroupByOperator)nd; - ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx; + public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, + Object... nodeOutputs) throws SemanticException { + GroupByOperator op = (GroupByOperator) nd; + ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; List colLists = new ArrayList(); groupByDesc conf = op.getConf(); ArrayList keys = conf.getKeys(); - for (exprNodeDesc key : keys) + for (exprNodeDesc key : keys) { colLists = Utilities.mergeUniqElems(colLists, key.getCols()); + } ArrayList aggrs = conf.getAggregators(); - for (aggregationDesc aggr : aggrs) { + for (aggregationDesc aggr : aggrs) { ArrayList params = aggr.getParameters(); - for (exprNodeDesc param : params) + for (exprNodeDesc param : params) { colLists = Utilities.mergeUniqElems(colLists, param.getCols()); + } } cppCtx.getPrunedColLists().put(op, colLists); @@ -118,6 +123,7 @@ /** * Factory method to get the ColumnPrunerGroupByProc class. + * * @return ColumnPrunerGroupByProc */ public static ColumnPrunerGroupByProc getGroupByProc() { @@ -128,17 +134,19 @@ * The Default Node Processor for Column Pruning. */ public static class ColumnPrunerDefaultProc implements NodeProcessor { - public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { - ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx; - cppCtx.getPrunedColLists().put((Operator)nd, - cppCtx.genColLists((Operator)nd)); - + public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, + Object... nodeOutputs) throws SemanticException { + ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; + cppCtx.getPrunedColLists().put((Operator) nd, + cppCtx.genColLists((Operator) nd)); + return null; } } /** * Factory method to get the ColumnPrunerDefaultProc class. + * * @return ColumnPrunerDefaultProc */ public static ColumnPrunerDefaultProc getDefaultProc() { @@ -146,15 +154,18 @@ } /** - * The Node Processor for Column Pruning on Table Scan Operators. It will store - * needed columns in tableScanDesc. + * The Node Processor for Column Pruning on Table Scan Operators. It will + * store needed columns in tableScanDesc. */ public static class ColumnPrunerTableScanProc implements NodeProcessor { - public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { - TableScanOperator scanOp = (TableScanOperator)nd; - ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx; - List cols = cppCtx.genColLists((Operator)nd); - cppCtx.getPrunedColLists().put((Operator)nd, cols); + public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, + Object... nodeOutputs) throws SemanticException { + TableScanOperator scanOp = (TableScanOperator) nd; + ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; + List cols = cppCtx + .genColLists((Operator) nd); + cppCtx.getPrunedColLists().put((Operator) nd, + cols); ArrayList needed_columns = new ArrayList(); RowResolver inputRR = cppCtx.getOpToParseCtxMap().get(scanOp).getRR(); for (int i = 0; i < cols.size(); i++) { @@ -168,42 +179,50 @@ /** * Factory method to get the ColumnPrunerDefaultProc class. + * * @return ColumnPrunerTableScanProc */ public static ColumnPrunerTableScanProc getTableScanProc() { return new ColumnPrunerTableScanProc(); } - + /** * The Node Processor for Column Pruning on Reduce Sink Operators. */ public static class ColumnPrunerReduceSinkProc implements NodeProcessor { - public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { - ReduceSinkOperator op = (ReduceSinkOperator)nd; - ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx; - HashMap, OpParseContext> opToParseCtxMap = - cppCtx.getOpToParseCtxMap(); + public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, + Object... nodeOutputs) throws SemanticException { + ReduceSinkOperator op = (ReduceSinkOperator) nd; + ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; + HashMap, OpParseContext> opToParseCtxMap = cppCtx + .getOpToParseCtxMap(); RowResolver redSinkRR = opToParseCtxMap.get(op).getRR(); reduceSinkDesc conf = op.getConf(); - List> childOperators = op.getChildOperators(); - List> parentOperators = op.getParentOperators(); + List> childOperators = op + .getChildOperators(); + List> parentOperators = op + .getParentOperators(); List colLists = new ArrayList(); ArrayList keys = conf.getKeyCols(); - for (exprNodeDesc key : keys) + for (exprNodeDesc key : keys) { colLists = Utilities.mergeUniqElems(colLists, key.getCols()); + } - if ((childOperators.size() == 1) && (childOperators.get(0) instanceof JoinOperator)) { + if ((childOperators.size() == 1) + && (childOperators.get(0) instanceof JoinOperator)) { assert parentOperators.size() == 1; Operator par = parentOperators.get(0); - JoinOperator childJoin = (JoinOperator)childOperators.get(0); + JoinOperator childJoin = (JoinOperator) childOperators.get(0); RowResolver parRR = opToParseCtxMap.get(par).getRR(); - List childJoinCols = cppCtx.getJoinPrunedColLists().get(childJoin).get((byte)conf.getTag()); + List childJoinCols = cppCtx.getJoinPrunedColLists().get( + childJoin).get((byte) conf.getTag()); boolean[] flags = new boolean[conf.getValueCols().size()]; - for (int i = 0; i < flags.length; i++) + for (int i = 0; i < flags.length; i++) { flags[i] = false; + } if (childJoinCols != null && childJoinCols.size() > 0) { - Map exprMap = op.getColumnExprMap(); + Map exprMap = op.getColumnExprMap(); for (String childCol : childJoinCols) { exprNodeDesc desc = exprMap.get(childCol); int index = conf.getValueCols().indexOf(desc); @@ -211,19 +230,21 @@ String[] nm = redSinkRR.reverseLookup(childCol); if (nm != null) { ColumnInfo cInfo = parRR.get(nm[0], nm[1]); - if (!colLists.contains(cInfo.getInternalName())) + if (!colLists.contains(cInfo.getInternalName())) { colLists.add(cInfo.getInternalName()); + } } } } Collections.sort(colLists); pruneReduceSinkOperator(flags, op, cppCtx); - } - else { - // Reduce Sink contains the columns needed - no need to aggregate from children + } else { + // Reduce Sink contains the columns needed - no need to aggregate from + // children ArrayList vals = conf.getValueCols(); - for (exprNodeDesc val : vals) + for (exprNodeDesc val : vals) { colLists = Utilities.mergeUniqElems(colLists, val.getCols()); + } } cppCtx.getPrunedColLists().put(op, colLists); @@ -233,6 +254,7 @@ /** * The Factory method to get ColumnPrunerReduceSinkProc class. + * * @return ColumnPrunerReduceSinkProc */ public static ColumnPrunerReduceSinkProc getReduceSinkProc() { @@ -243,20 +265,25 @@ * The Node Processor for Column Pruning on Select Operators. */ public static class ColumnPrunerSelectProc implements NodeProcessor { - public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { - SelectOperator op = (SelectOperator)nd; - ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx; + public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, + Object... nodeOutputs) throws SemanticException { + SelectOperator op = (SelectOperator) nd; + ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; List cols = new ArrayList(); - if(op.getChildOperators() != null) { - for(Operator child: op.getChildOperators()) { + if (op.getChildOperators() != null) { + for (Operator child : op.getChildOperators()) { // If one of my children is a FileSink or Script, return all columns. - // Without this break, a bug in ReduceSink to Extract edge column pruning will manifest + // Without this break, a bug in ReduceSink to Extract edge column + // pruning will manifest // which should be fixed before remove this if ((child instanceof FileSinkOperator) - || (child instanceof ScriptOperator) || (child instanceof UDTFOperator) - || (child instanceof LimitOperator) || (child instanceof UnionOperator)) { - cppCtx.getPrunedColLists().put(op, cppCtx.getColsFromSelectExpr(op)); + || (child instanceof ScriptOperator) + || (child instanceof UDTFOperator) + || (child instanceof LimitOperator) + || (child instanceof UnionOperator)) { + cppCtx.getPrunedColLists() + .put(op, cppCtx.getColsFromSelectExpr(op)); return null; } } @@ -264,18 +291,21 @@ cols = cppCtx.genColLists(op); selectDesc conf = op.getConf(); - // The input to the select does not matter. Go over the expressions + // The input to the select does not matter. Go over the expressions // and return the ones which have a marked column - cppCtx.getPrunedColLists().put(op, cppCtx.getSelectColsFromChildren(op, cols)); - - if(conf.isSelStarNoCompute()) + cppCtx.getPrunedColLists().put(op, + cppCtx.getSelectColsFromChildren(op, cols)); + + if (conf.isSelStarNoCompute()) { return null; - + } + // do we need to prune the select operator? List originalColList = op.getConf().getColList(); List columns = new ArrayList(); - for (exprNodeDesc expr : originalColList) + for (exprNodeDesc expr : originalColList) { Utilities.mergeUniqElems(columns, expr.getCols()); + } // by now, 'prunedCols' are columns used by child operators, and 'columns' // are columns used by this select operator. ArrayList originalOutputColumnNames = conf.getOutputColumnNames(); @@ -286,7 +316,7 @@ Vector rs_newsignature = new Vector(); RowResolver old_rr = cppCtx.getOpToParseCtxMap().get(op).getRR(); RowResolver new_rr = new RowResolver(); - for(String col : cols){ + for (String col : cols) { int index = originalOutputColumnNames.indexOf(col); newOutputColumnNames.add(col); newColList.add(originalColList.get(index)); @@ -312,31 +342,37 @@ * * @param op * @param retainedSelOutputCols - * @throws SemanticException + * @throws SemanticException */ private void handleChildren(SelectOperator op, - List retainedSelOutputCols, ColumnPrunerProcCtx cppCtx) throws SemanticException { - for(Operator child: op.getChildOperators()) { + List retainedSelOutputCols, ColumnPrunerProcCtx cppCtx) + throws SemanticException { + for (Operator child : op.getChildOperators()) { if (child instanceof ReduceSinkOperator) { - boolean[] flags = getPruneReduceSinkOpRetainFlags(retainedSelOutputCols, (ReduceSinkOperator)child); - pruneReduceSinkOperator(flags, (ReduceSinkOperator)child, cppCtx); - }else if (child instanceof FilterOperator){ - //filter operator has the same output columns as its parent - for(Operator filterChild: child.getChildOperators()){ + boolean[] flags = getPruneReduceSinkOpRetainFlags( + retainedSelOutputCols, (ReduceSinkOperator) child); + pruneReduceSinkOperator(flags, (ReduceSinkOperator) child, cppCtx); + } else if (child instanceof FilterOperator) { + // filter operator has the same output columns as its parent + for (Operator filterChild : child + .getChildOperators()) { if (filterChild instanceof ReduceSinkOperator) { - boolean[] flags = getPruneReduceSinkOpRetainFlags(retainedSelOutputCols, (ReduceSinkOperator)filterChild); - pruneReduceSinkOperator(flags, (ReduceSinkOperator)filterChild, cppCtx); + boolean[] flags = getPruneReduceSinkOpRetainFlags( + retainedSelOutputCols, (ReduceSinkOperator) filterChild); + pruneReduceSinkOperator(flags, (ReduceSinkOperator) filterChild, + cppCtx); } } } } } } - + private static boolean[] getPruneReduceSinkOpRetainFlags( List retainedParentOpOutputCols, ReduceSinkOperator reduce) { reduceSinkDesc reduceConf = reduce.getConf(); - java.util.ArrayList originalValueEval = reduceConf.getValueCols(); + java.util.ArrayList originalValueEval = reduceConf + .getValueCols(); boolean[] flags = new boolean[originalValueEval.size()]; for (int i = 0; i < originalValueEval.size(); i++) { flags[i] = false; @@ -354,9 +390,10 @@ } return flags; } - + private static void pruneReduceSinkOperator(boolean[] retainFlags, - ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx) throws SemanticException { + ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx) + throws SemanticException { reduceSinkDesc reduceConf = reduce.getConf(); Map oldMap = reduce.getColumnExprMap(); Map newMap = new HashMap(); @@ -385,10 +422,11 @@ sig.add(colInfo); } } - + ArrayList keyCols = reduceConf.getKeyCols(); List keys = new ArrayList(); - RowResolver parResover = cppCtx.getOpToParseCtxMap().get(reduce.getParentOperators().get(0)).getRR(); + RowResolver parResover = cppCtx.getOpToParseCtxMap().get( + reduce.getParentOperators().get(0)).getRR(); for (int i = 0; i < keyCols.size(); i++) { keys = Utilities.mergeUniqElems(keys, keyCols.get(i).getCols()); } @@ -396,29 +434,31 @@ String outputCol = keys.get(i); String[] nm = parResover.reverseLookup(outputCol); ColumnInfo colInfo = oldRR.get(nm[0], nm[1]); - if (colInfo != null) + if (colInfo != null) { newRR.put(nm[0], nm[1], colInfo); + } } - + cppCtx.getOpToParseCtxMap().get(reduce).setRR(newRR); reduce.setColumnExprMap(newMap); reduce.getSchema().setSignature(sig); reduceConf.setOutputValueColumnNames(newOutputColNames); reduceConf.setValueCols(newValueEval); - tableDesc newValueTable = PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList( - reduceConf.getValueCols(), newOutputColNames, 0, "")); + tableDesc newValueTable = PlanUtils.getReduceValueTableDesc(PlanUtils + .getFieldSchemasFromColumnList(reduceConf.getValueCols(), + newOutputColNames, 0, "")); reduceConf.setValueSerializeInfo(newValueTable); } - /** * The Factory method to get the ColumnPrunerSelectProc class. + * * @return ColumnPrunerSelectProc */ public static ColumnPrunerSelectProc getSelectProc() { return new ColumnPrunerSelectProc(); } - + /** * The Node Processor for Column Pruning on Join Operators. */ @@ -426,7 +466,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { JoinOperator op = (JoinOperator) nd; - pruneJoinOperator(ctx, op, op.getConf(), op.getColumnExprMap(), null, false); + pruneJoinOperator(ctx, op, op.getConf(), op.getColumnExprMap(), null, + false); return null; } } @@ -439,7 +480,7 @@ public static ColumnPrunerJoinProc getJoinProc() { return new ColumnPrunerJoinProc(); } - + /** * The Node Processor for Column Pruning on Map Join Operators. */ @@ -447,27 +488,30 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { MapJoinOperator op = (MapJoinOperator) nd; - pruneJoinOperator(ctx, op, op.getConf(), op.getColumnExprMap(), op.getConf().getRetainList(), true); + pruneJoinOperator(ctx, op, op.getConf(), op.getColumnExprMap(), op + .getConf().getRetainList(), true); return null; } } - + private static void pruneJoinOperator(NodeProcessorCtx ctx, CommonJoinOperator op, joinDesc conf, Map columnExprMap, - Map> retainMap, boolean mapJoin) throws SemanticException { + Map> retainMap, boolean mapJoin) + throws SemanticException { ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; Map> prunedColLists = new HashMap>(); List> childOperators = op .getChildOperators(); for (Operator child : childOperators) { - if (child instanceof FileSinkOperator) + if (child instanceof FileSinkOperator) { return; + } } - List childColLists = cppCtx.genColLists((Operator)op); - + List childColLists = cppCtx.genColLists(op); + RowResolver joinRR = cppCtx.getOpToParseCtxMap().get(op).getRR(); RowResolver newJoinRR = new RowResolver(); ArrayList outputCols = new ArrayList(); @@ -480,11 +524,13 @@ Byte tag = conf.getReversedExprs().get(internalName); if (!childColLists.contains(internalName)) { int index = conf.getExprs().get(tag).indexOf(desc); - if (index < 0) + if (index < 0) { continue; + } conf.getExprs().get(tag).remove(desc); - if (retainMap != null) + if (retainMap != null) { retainMap.get(tag).remove(index); + } } else { List prunedRSList = prunedColLists.get(tag); if (prunedRSList == null) { @@ -496,7 +542,7 @@ newColExprMap.put(internalName, desc); } } - + if (mapJoin) { // regenerate the valueTableDesc List valueTableDescs = new ArrayList(); @@ -508,9 +554,8 @@ keyOrder.append("+"); } - tableDesc valueTableDesc = PlanUtils - .getMapJoinValueTableDesc(PlanUtils - .getFieldSchemasFromColumnList(valueCols, "mapjoinvalue")); + tableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils + .getFieldSchemasFromColumnList(valueCols, "mapjoinvalue")); valueTableDescs.add(valueTableDesc); } @@ -564,5 +609,5 @@ public static ColumnPrunerMapJoinProc getMapJoinProc() { return new ColumnPrunerMapJoinProc(); } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (working copy) @@ -29,9 +29,9 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; +import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; /** * Processor for the rule - table scan @@ -41,18 +41,24 @@ } /** - * Table Sink encountered - * @param nd the table sink operator encountered - * @param opProcCtx context + * Table Sink encountered + * + * @param nd + * the table sink operator encountered + * @param opProcCtx + * context */ - public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { - TableScanOperator op = (TableScanOperator)nd; - GenMRProcContext ctx = (GenMRProcContext)opProcCtx; + public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, + Object... nodeOutputs) throws SemanticException { + TableScanOperator op = (TableScanOperator) nd; + GenMRProcContext ctx = (GenMRProcContext) opProcCtx; ParseContext parseCtx = ctx.getParseCtx(); - Map, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); + Map, GenMapRedCtx> mapCurrCtx = ctx + .getMapCurrCtx(); - // create a dummy task - Task currTask = TaskFactory.get(GenMapRedUtils.getMapRedWork(), parseCtx.getConf()); + // create a dummy task + Task currTask = TaskFactory.get(GenMapRedUtils + .getMapRedWork(), parseCtx.getConf()); Operator currTopOp = op; ctx.setCurrTask(currTask); ctx.setCurrTopOp(currTopOp); @@ -71,4 +77,3 @@ } } - Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (working copy) @@ -18,20 +18,20 @@ package org.apache.hadoop.hive.ql.optimizer; +import java.io.Serializable; +import java.util.HashMap; import java.util.Map; -import java.util.HashMap; import java.util.Stack; -import java.io.Serializable; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.plan.mapredWork; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; +import org.apache.hadoop.hive.ql.plan.mapredWork; /** * Processor for the rule - table scan followed by reduce sink @@ -42,22 +42,28 @@ } /** - * Reduce Scan encountered - * @param nd the reduce sink operator encountered - * @param opProcCtx context + * Reduce Scan encountered + * + * @param nd + * the reduce sink operator encountered + * @param opProcCtx + * context */ - public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { - ReduceSinkOperator op = (ReduceSinkOperator)nd; - GenMRProcContext ctx = (GenMRProcContext)opProcCtx; + public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, + Object... nodeOutputs) throws SemanticException { + ReduceSinkOperator op = (ReduceSinkOperator) nd; + GenMRProcContext ctx = (GenMRProcContext) opProcCtx; - Map, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); - GenMapRedCtx mapredCtx = mapCurrCtx.get((Operator)stack.get(stack.size()-2)); - Task currTask = mapredCtx.getCurrTask(); + Map, GenMapRedCtx> mapCurrCtx = ctx + .getMapCurrCtx(); + GenMapRedCtx mapredCtx = mapCurrCtx.get(stack.get(stack.size() - 2)); + Task currTask = mapredCtx.getCurrTask(); mapredWork currPlan = (mapredWork) currTask.getWork(); - Operator currTopOp = mapredCtx.getCurrTopOp(); + Operator currTopOp = mapredCtx.getCurrTopOp(); String currAliasId = mapredCtx.getCurrAliasId(); Operator reducer = op.getChildOperators().get(0); - HashMap, Task> opTaskMap = ctx.getOpTaskMap(); + HashMap, Task> opTaskMap = ctx + .getOpTaskMap(); Task opMapTask = opTaskMap.get(reducer); ctx.setCurrTopOp(currTopOp); @@ -66,20 +72,24 @@ // If the plan for this reducer does not exist, initialize the plan if (opMapTask == null) { - if (currPlan.getReducer() == null) + if (currPlan.getReducer() == null) { GenMapRedUtils.initPlan(op, ctx); - else + } else { GenMapRedUtils.splitPlan(op, ctx); + } } - // This will happen in case of joins. The current plan can be thrown away after being merged with the + // This will happen in case of joins. The current plan can be thrown away + // after being merged with the // original plan else { - GenMapRedUtils.joinPlan(op, null, opMapTask, ctx, -1, false, false, false); + GenMapRedUtils + .joinPlan(op, null, opMapTask, ctx, -1, false, false, false); currTask = opMapTask; ctx.setCurrTask(currTask); } - mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId())); + mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), + ctx.getCurrAliasId())); return null; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (working copy) @@ -28,8 +28,8 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; /** * Processor for the rule - reduce sink followed by reduce sink @@ -40,36 +40,44 @@ } /** - * Reduce Scan encountered - * @param nd the reduce sink operator encountered - * @param opProcCtx context + * Reduce Scan encountered + * + * @param nd + * the reduce sink operator encountered + * @param opProcCtx + * context */ - public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { - ReduceSinkOperator op = (ReduceSinkOperator)nd; - GenMRProcContext ctx = (GenMRProcContext)opProcCtx; + public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, + Object... nodeOutputs) throws SemanticException { + ReduceSinkOperator op = (ReduceSinkOperator) nd; + GenMRProcContext ctx = (GenMRProcContext) opProcCtx; - Map, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); + Map, GenMapRedCtx> mapCurrCtx = ctx + .getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); - Task currTask = mapredCtx.getCurrTask(); + Task currTask = mapredCtx.getCurrTask(); Operator currTopOp = mapredCtx.getCurrTopOp(); String currAliasId = mapredCtx.getCurrAliasId(); Operator reducer = op.getChildOperators().get(0); - Map, Task> opTaskMap = ctx.getOpTaskMap(); + Map, Task> opTaskMap = ctx + .getOpTaskMap(); Task opMapTask = opTaskMap.get(reducer); ctx.setCurrTopOp(currTopOp); ctx.setCurrAliasId(currAliasId); ctx.setCurrTask(currTask); - if (opMapTask == null) + if (opMapTask == null) { GenMapRedUtils.splitPlan(op, ctx); - else { - GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, true, false, false); + } else { + GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, true, false, + false); currTask = opMapTask; ctx.setCurrTask(currTask); } - mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId())); + mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), + ctx.getCurrAliasId())); return null; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java (working copy) @@ -22,15 +22,19 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; /** - * Optimizer interface. All the rule-based optimizations implement this interface. All the transformations are invoked sequentially. They take the current - * parse context (which contains the operator tree among other things), perform all the optimizations, and then return the updated parse context. + * Optimizer interface. All the rule-based optimizations implement this + * interface. All the transformations are invoked sequentially. They take the + * current parse context (which contains the operator tree among other things), + * perform all the optimizations, and then return the updated parse context. */ public interface Transform { - /** - * All transformation steps implement this interface - * @param pctx input parse context - * @return ParseContext - * @throws SemanticException - */ - public ParseContext transform(ParseContext pctx) throws SemanticException; + /** + * All transformation steps implement this interface + * + * @param pctx + * input parse context + * @return ParseContext + * @throws SemanticException + */ + public ParseContext transform(ParseContext pctx) throws SemanticException; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink3.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink3.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink3.java (working copy) @@ -18,26 +18,22 @@ package org.apache.hadoop.hive.ql.optimizer; +import java.io.Serializable; +import java.util.HashMap; import java.util.Map; -import java.util.HashMap; import java.util.Stack; -import java.io.Serializable; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.UnionOperator; -import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.plan.mapredWork; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext; import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.plan.reduceSinkDesc; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.mapredWork; /** * Processor for the rule - union followed by reduce sink @@ -48,54 +44,64 @@ } /** - * Reduce Scan encountered - * @param nd the reduce sink operator encountered - * @param opProcCtx context + * Reduce Scan encountered + * + * @param nd + * the reduce sink operator encountered + * @param opProcCtx + * context */ - public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { - ReduceSinkOperator op = (ReduceSinkOperator)nd; - GenMRProcContext ctx = (GenMRProcContext)opProcCtx; + public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, + Object... nodeOutputs) throws SemanticException { + ReduceSinkOperator op = (ReduceSinkOperator) nd; + GenMRProcContext ctx = (GenMRProcContext) opProcCtx; ParseContext parseCtx = ctx.getParseCtx(); UnionProcContext uCtx = parseCtx.getUCtx(); // union was map only - no special processing needed - if (uCtx.isMapOnlySubq()) + if (uCtx.isMapOnlySubq()) { return (new GenMRRedSink1()).process(nd, stack, opProcCtx, nodeOutputs); + } - // union consisted on a bunch of map-reduce jobs, and it has been split at the union + // union consisted on a bunch of map-reduce jobs, and it has been split at + // the union Operator reducer = op.getChildOperators().get(0); - Map, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); + Map, GenMapRedCtx> mapCurrCtx = ctx + .getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); - Task currTask = mapredCtx.getCurrTask(); + Task currTask = mapredCtx.getCurrTask(); mapredWork plan = (mapredWork) currTask.getWork(); - HashMap, Task> opTaskMap = ctx.getOpTaskMap(); + HashMap, Task> opTaskMap = ctx + .getOpTaskMap(); Task opMapTask = opTaskMap.get(reducer); - + ctx.setCurrTask(currTask); // If the plan for this reducer does not exist, initialize the plan if (opMapTask == null) { // When the reducer is encountered for the first time - if (plan.getReducer() == null) + if (plan.getReducer() == null) { GenMapRedUtils.initUnionPlan(op, ctx); - // When union is followed by a multi-table insert - else + // When union is followed by a multi-table insert + } else { GenMapRedUtils.splitPlan(op, ctx); + } } - // The union is already initialized. However, the union is walked from another input + // The union is already initialized. However, the union is walked from + // another input // initUnionPlan is idempotent - else if (plan.getReducer() == reducer) + else if (plan.getReducer() == reducer) { GenMapRedUtils.initUnionPlan(op, ctx); - // There is a join after union. One of the branches of union has already been initialized. - // Initialize the current branch, and join with the original plan. - else { + } else { GenMapRedUtils.initUnionPlan(ctx, currTask, false); - GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, true, false, false); + GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, true, false, + false); } - mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId())); - + mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), + ctx.getCurrAliasId())); + // the union operator has been processed ctx.setCurrUnionOp(null); return null; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java (working copy) @@ -18,21 +18,20 @@ package org.apache.hadoop.hive.ql.optimizer; +import java.io.Serializable; +import java.util.HashMap; import java.util.Map; -import java.util.HashMap; import java.util.Stack; -import java.io.Serializable; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.plan.mapredWork; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; -import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.plan.mapredWork; /** * Processor for the rule - map join followed by reduce sink @@ -43,45 +42,56 @@ } /** - * Reduce Scan encountered - * @param nd the reduce sink operator encountered - * @param opProcCtx context + * Reduce Scan encountered + * + * @param nd + * the reduce sink operator encountered + * @param opProcCtx + * context */ - public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { - ReduceSinkOperator op = (ReduceSinkOperator)nd; - GenMRProcContext ctx = (GenMRProcContext)opProcCtx; + public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, + Object... nodeOutputs) throws SemanticException { + ReduceSinkOperator op = (ReduceSinkOperator) nd; + GenMRProcContext ctx = (GenMRProcContext) opProcCtx; - ParseContext parseCtx = ctx.getParseCtx(); + ctx.getParseCtx(); - // map-join consisted on a bunch of map-only jobs, and it has been split after the mapjoin + // map-join consisted on a bunch of map-only jobs, and it has been split + // after the mapjoin Operator reducer = op.getChildOperators().get(0); - Map, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); + Map, GenMapRedCtx> mapCurrCtx = ctx + .getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); - Task currTask = mapredCtx.getCurrTask(); + Task currTask = mapredCtx.getCurrTask(); mapredWork plan = (mapredWork) currTask.getWork(); - HashMap, Task> opTaskMap = ctx.getOpTaskMap(); + HashMap, Task> opTaskMap = ctx + .getOpTaskMap(); Task opMapTask = opTaskMap.get(reducer); - + ctx.setCurrTask(currTask); // If the plan for this reducer does not exist, initialize the plan if (opMapTask == null) { // When the reducer is encountered for the first time - if (plan.getReducer() == null) + if (plan.getReducer() == null) { GenMapRedUtils.initMapJoinPlan(op, ctx, true, false, true, -1); - // When mapjoin is followed by a multi-table insert - else + // When mapjoin is followed by a multi-table insert + } else { GenMapRedUtils.splitPlan(op, ctx); + } } - // There is a join after mapjoin. One of the branches of mapjoin has already been initialized. + // There is a join after mapjoin. One of the branches of mapjoin has already + // been initialized. // Initialize the current branch, and join with the original plan. else { assert plan.getReducer() != reducer; - GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, false, true, false); + GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, false, true, + false); } - mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId())); - + mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), + ctx.getCurrAliasId())); + // the mapjoin operator has been processed ctx.setCurrMapJoinOp(null); return null; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java (working copy) @@ -19,14 +19,16 @@ package org.apache.hadoop.hive.ql.optimizer; import java.util.ArrayList; +import java.util.HashMap; import java.util.LinkedHashMap; -import java.util.HashMap; import java.util.Map; import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -36,26 +38,23 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; -import org.apache.hadoop.hive.ql.optimizer.Transform; +import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.plan.filterDesc; import org.apache.hadoop.hive.ql.plan.filterDesc.sampleDesc; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.fs.Path; /** * The transformation step that does sample pruning. - * + * */ public class SamplePruner implements Transform { public static class SamplePrunerCtx implements NodeProcessorCtx { HashMap opToSamplePruner; - public SamplePrunerCtx(HashMap opToSamplePruner) { + public SamplePrunerCtx( + HashMap opToSamplePruner) { this.opToSamplePruner = opToSamplePruner; } @@ -70,28 +69,37 @@ * @param opToSamplePruner * the opToSamplePruner to set */ - public void setOpToSamplePruner(HashMap opToSamplePruner) { + public void setOpToSamplePruner( + HashMap opToSamplePruner) { this.opToSamplePruner = opToSamplePruner; } } // The log - private static final Log LOG = LogFactory.getLog("hive.ql.optimizer.SamplePruner"); + private static final Log LOG = LogFactory + .getLog("hive.ql.optimizer.SamplePruner"); - /* (non-Javadoc) - * @see org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop.hive.ql.parse.ParseContext) + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop + * .hive.ql.parse.ParseContext) */ @Override public ParseContext transform(ParseContext pctx) throws SemanticException { // create a the context for walking operators - SamplePrunerCtx samplePrunerCtx = new SamplePrunerCtx(pctx.getOpToSamplePruner()); + SamplePrunerCtx samplePrunerCtx = new SamplePrunerCtx(pctx + .getOpToSamplePruner()); Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp("R1", "(TS%FIL%FIL%)"), getFilterProc()); - // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, samplePrunerCtx); + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, + samplePrunerCtx); GraphWalker ogw = new DefaultGraphWalker(disp); // Create a list of topop nodes @@ -107,16 +115,17 @@ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - FilterOperator filOp = (FilterOperator)nd; - filterDesc filOpDesc = filOp.getConf(); - sampleDesc sampleDescr = filOpDesc.getSampleDescr(); + FilterOperator filOp = (FilterOperator) nd; + filterDesc filOpDesc = filOp.getConf(); + sampleDesc sampleDescr = filOpDesc.getSampleDescr(); - if ((sampleDescr == null) || !sampleDescr.getInputPruning()) + if ((sampleDescr == null) || !sampleDescr.getInputPruning()) { return null; + } assert stack.size() == 3; - TableScanOperator tsOp = (TableScanOperator)stack.get(0); - ((SamplePrunerCtx)procCtx).getOpToSamplePruner().put(tsOp, sampleDescr); + TableScanOperator tsOp = (TableScanOperator) stack.get(0); + ((SamplePrunerCtx) procCtx).getOpToSamplePruner().put(tsOp, sampleDescr); return null; } } @@ -141,14 +150,17 @@ } /** - * Prunes to get all the files in the partition that satisfy the TABLESAMPLE clause - * - * @param part The partition to prune + * Prunes to get all the files in the partition that satisfy the TABLESAMPLE + * clause + * + * @param part + * The partition to prune * @return Path[] * @throws SemanticException */ @SuppressWarnings("nls") - public static Path[] prune(Partition part, sampleDesc sampleDescr) throws SemanticException { + public static Path[] prune(Partition part, sampleDesc sampleDescr) + throws SemanticException { int num = sampleDescr.getNumerator(); int den = sampleDescr.getDenominator(); int bucketCount = part.getBucketCount(); @@ -160,38 +172,33 @@ LOG.trace("denominator = " + den); LOG.trace("bucket count = " + bucketCount); if (bucketCount == den) { - Path [] ret = new Path [1]; - ret[0] = part.getBucketPath(num-1); - return(ret); - } - else if (bucketCount > den && bucketCount % den == 0) { + Path[] ret = new Path[1]; + ret[0] = part.getBucketPath(num - 1); + return (ret); + } else if (bucketCount > den && bucketCount % den == 0) { int numPathsInSample = bucketCount / den; - Path [] ret = new Path[numPathsInSample]; + Path[] ret = new Path[numPathsInSample]; for (int i = 0; i < numPathsInSample; i++) { - ret[i] = part.getBucketPath(i*den+num-1); + ret[i] = part.getBucketPath(i * den + num - 1); } return ret; - } - else if (bucketCount < den && den % bucketCount == 0) { - Path [] ret = new Path[1]; - ret[0] = part.getBucketPath((num-1)%bucketCount); + } else if (bucketCount < den && den % bucketCount == 0) { + Path[] ret = new Path[1]; + ret[0] = part.getBucketPath((num - 1) % bucketCount); return ret; - } - else { + } else { // need to do full scan - fullScanMsg = "Tablesample denominator " - + den + " is not multiple/divisor of bucket count " - + bucketCount + " of table " + part.getTable().getName(); + fullScanMsg = "Tablesample denominator " + den + + " is not multiple/divisor of bucket count " + bucketCount + + " of table " + part.getTable().getName(); } - } - else { + } else { // need to do full scan fullScanMsg = "Tablesample not on clustered columns"; } LOG.warn(fullScanMsg + ", using full table scan"); - Path [] ret = part.getPath(); + Path[] ret = part.getPath(); return ret; } - } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (working copy) @@ -18,37 +18,36 @@ package org.apache.hadoop.hive.ql.optimizer; -import java.util.List; +import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Stack; -import java.io.Serializable; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.MoveTask; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.UnionOperator; -import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.plan.mapredWork; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; -import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles; -import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles.ConditionalResolverMergeFilesCtx; import org.apache.hadoop.hive.ql.plan.ConditionalWork; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc; @@ -56,14 +55,14 @@ import org.apache.hadoop.hive.ql.plan.extractDesc; import org.apache.hadoop.hive.ql.plan.fileSinkDesc; import org.apache.hadoop.hive.ql.plan.loadFileDesc; +import org.apache.hadoop.hive.ql.plan.mapredWork; import org.apache.hadoop.hive.ql.plan.moveWork; +import org.apache.hadoop.hive.ql.plan.partitionDesc; import org.apache.hadoop.hive.ql.plan.reduceSinkDesc; import org.apache.hadoop.hive.ql.plan.tableDesc; import org.apache.hadoop.hive.ql.plan.tableScanDesc; -import org.apache.hadoop.hive.ql.plan.partitionDesc; +import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles.ConditionalResolverMergeFilesCtx; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.conf.HiveConf; /** * Processor for the rule - table scan followed by reduce sink @@ -74,178 +73,206 @@ } /** - * File Sink Operator encountered - * @param nd the file sink operator encountered - * @param opProcCtx context + * File Sink Operator encountered + * + * @param nd + * the file sink operator encountered + * @param opProcCtx + * context */ - public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { - GenMRProcContext ctx = (GenMRProcContext)opProcCtx; + public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, + Object... nodeOutputs) throws SemanticException { + GenMRProcContext ctx = (GenMRProcContext) opProcCtx; ParseContext parseCtx = ctx.getParseCtx(); boolean chDir = false; Task currTask = ctx.getCurrTask(); // Has the user enabled merging of files for map-only jobs or for all jobs - if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) - { + if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) { List> mvTasks = ctx.getMvTask(); - // In case of unions or map-joins, it is possible that the file has already been seen. + // In case of unions or map-joins, it is possible that the file has + // already been seen. // So, no need to attempt to merge the files again. - if ((ctx.getSeenFileSinkOps() == null) || - (!ctx.getSeenFileSinkOps().contains((FileSinkOperator)nd))) { - + if ((ctx.getSeenFileSinkOps() == null) + || (!ctx.getSeenFileSinkOps().contains(nd))) { + // no need of merging if the move is to a local file system - MoveTask mvTask = (MoveTask)findMoveTask(mvTasks, (FileSinkOperator)nd); - if ((mvTask != null) && !mvTask.isLocal()) - { - // There are separate configuration parameters to control whether to merge for a map-only job + MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, + (FileSinkOperator) nd); + if ((mvTask != null) && !mvTask.isLocal()) { + // There are separate configuration parameters to control whether to + // merge for a map-only job // or for a map-reduce job - if ((parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES) && - (((mapredWork)currTask.getWork()).getReducer() == null)) || - (parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES) && - (((mapredWork)currTask.getWork()).getReducer() != null))) + if ((parseCtx.getConf().getBoolVar( + HiveConf.ConfVars.HIVEMERGEMAPFILES) && (((mapredWork) currTask + .getWork()).getReducer() == null)) + || (parseCtx.getConf().getBoolVar( + HiveConf.ConfVars.HIVEMERGEMAPREDFILES) && (((mapredWork) currTask + .getWork()).getReducer() != null))) { chDir = true; + } } } } String finalName = processFS(nd, stack, opProcCtx, chDir); - + // If it is a map-only job, insert a new task to do the concatenation if (chDir && (finalName != null)) { - createMergeJob((FileSinkOperator)nd, ctx, finalName); + createMergeJob((FileSinkOperator) nd, ctx, finalName); } - + return null; } - - private void createMergeJob(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName) { + + private void createMergeJob(FileSinkOperator fsOp, GenMRProcContext ctx, + String finalName) { Task currTask = ctx.getCurrTask(); RowSchema fsRS = fsOp.getSchema(); - + // create a reduce Sink operator - key is the first column ArrayList keyCols = new ArrayList(); - keyCols.add(TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand")); + keyCols.add(TypeCheckProcFactory.DefaultExprProcessor + .getFuncExprNodeDesc("rand")); ArrayList valueCols = new ArrayList(); for (ColumnInfo ci : fsRS.getSignature()) { - valueCols.add(new exprNodeColumnDesc(ci.getType(), ci.getInternalName(), ci.getTabAlias(), - ci.getIsPartitionCol())); + valueCols.add(new exprNodeColumnDesc(ci.getType(), ci.getInternalName(), + ci.getTabAlias(), ci.getIsPartitionCol())); } // create a dummy tableScan operator - Operator ts_op = - OperatorFactory.get(tableScanDesc.class, fsRS); + Operator ts_op = OperatorFactory.get( + tableScanDesc.class, fsRS); ArrayList outputColumns = new ArrayList(); - for (int i = 0; i < valueCols.size(); i++) + for (int i = 0; i < valueCols.size(); i++) { outputColumns.add(SemanticAnalyzer.getColumnInternalName(i)); - - reduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(new ArrayList(), valueCols, - outputColumns, false, -1, -1, -1); - ReduceSinkOperator rsOp = (ReduceSinkOperator)OperatorFactory.getAndMakeChild(rsDesc, fsRS, ts_op); + } + + reduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc( + new ArrayList(), valueCols, outputColumns, false, -1, -1, + -1); + OperatorFactory.getAndMakeChild(rsDesc, fsRS, ts_op); mapredWork cplan = GenMapRedUtils.getMapRedWork(); ParseContext parseCtx = ctx.getParseCtx(); - Task mergeTask = TaskFactory.get(cplan, parseCtx.getConf()); + Task mergeTask = TaskFactory.get(cplan, parseCtx + .getConf()); fileSinkDesc fsConf = fsOp.getConf(); - + // Add the extract operator to get the value fields RowResolver out_rwsch = new RowResolver(); - RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRR(); + RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp) + .getRR(); Integer pos = Integer.valueOf(0); - for(ColumnInfo colInfo: interim_rwsch.getColumnInfos()) { - String [] info = interim_rwsch.reverseLookup(colInfo.getInternalName()); - out_rwsch.put(info[0], info[1], - new ColumnInfo(pos.toString(), colInfo.getType(), info[0], - colInfo.getIsPartitionCol())); + for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) { + String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName()); + out_rwsch.put(info[0], info[1], new ColumnInfo(pos.toString(), colInfo + .getType(), info[0], colInfo.getIsPartitionCol())); pos = Integer.valueOf(pos.intValue() + 1); } - Operator extract = - OperatorFactory.getAndMakeChild( - new extractDesc(new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, - Utilities.ReduceField.VALUE.toString(), "", false)), - new RowSchema(out_rwsch.getColumnInfos())); - - tableDesc ts = (tableDesc)fsConf.getTableInfo().clone(); - fsConf.getTableInfo().getProperties().remove(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS); - FileSinkOperator newOutput = - (FileSinkOperator)OperatorFactory.getAndMakeChild( - new fileSinkDesc(finalName, ts, - parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT)), - fsRS, extract); + Operator extract = OperatorFactory.getAndMakeChild(new extractDesc( + new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, + Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema( + out_rwsch.getColumnInfos())); + tableDesc ts = (tableDesc) fsConf.getTableInfo().clone(); + fsConf + .getTableInfo() + .getProperties() + .remove( + org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS); + FileSinkOperator newOutput = (FileSinkOperator) OperatorFactory + .getAndMakeChild(new fileSinkDesc(finalName, ts, parseCtx.getConf() + .getBoolVar(HiveConf.ConfVars.COMPRESSRESULT)), fsRS, extract); + cplan.setReducer(extract); ArrayList aliases = new ArrayList(); aliases.add(fsConf.getDirName()); cplan.getPathToAliases().put(fsConf.getDirName(), aliases); - cplan.getAliasToWork().put(fsConf.getDirName(), ts_op); - cplan.getPathToPartitionInfo().put(fsConf.getDirName(), new partitionDesc(fsConf.getTableInfo(), null)); + cplan.getAliasToWork().put(fsConf.getDirName(), ts_op); + cplan.getPathToPartitionInfo().put(fsConf.getDirName(), + new partitionDesc(fsConf.getTableInfo(), null)); cplan.setNumReduceTasks(-1); - - moveWork dummyMv = new moveWork(null, null, null, new loadFileDesc(fsOp.getConf().getDirName(), finalName, true, null, null), false); - Task dummyMergeTask = TaskFactory.get(dummyMv, ctx.getConf()); + + moveWork dummyMv = new moveWork(null, null, null, new loadFileDesc(fsOp + .getConf().getDirName(), finalName, true, null, null), false); + Task dummyMergeTask = TaskFactory.get(dummyMv, ctx + .getConf()); List listWorks = new ArrayList(); listWorks.add(dummyMv); listWorks.add(mergeTask.getWork()); ConditionalWork cndWork = new ConditionalWork(listWorks); - - ConditionalTask cndTsk = (ConditionalTask)TaskFactory.get(cndWork, ctx.getConf()); + + ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, ctx + .getConf()); List> listTasks = new ArrayList>(); listTasks.add(dummyMergeTask); listTasks.add(mergeTask); cndTsk.setListTasks(listTasks); - + cndTsk.setResolver(new ConditionalResolverMergeFiles()); - cndTsk.setResolverCtx(new ConditionalResolverMergeFilesCtx(listTasks, fsOp.getConf().getDirName())); - + cndTsk.setResolverCtx(new ConditionalResolverMergeFilesCtx(listTasks, fsOp + .getConf().getDirName())); + currTask.addDependentTask(cndTsk); - + List> mvTasks = ctx.getMvTask(); Task mvTask = findMoveTask(mvTasks, newOutput); - + if (mvTask != null) { - for(Task tsk : cndTsk.getListTasks()) + for (Task tsk : cndTsk.getListTasks()) { tsk.addDependentTask(mvTask); + } } } - - private Task findMoveTask(List> mvTasks, FileSinkOperator fsOp) { + + private Task findMoveTask( + List> mvTasks, FileSinkOperator fsOp) { // find the move task for (Task mvTsk : mvTasks) { - moveWork mvWork = (moveWork)mvTsk.getWork(); + moveWork mvWork = (moveWork) mvTsk.getWork(); String srcDir = null; - if (mvWork.getLoadFileWork() != null) + if (mvWork.getLoadFileWork() != null) { srcDir = mvWork.getLoadFileWork().getSourceDir(); - else if (mvWork.getLoadTableWork() != null) + } else if (mvWork.getLoadTableWork() != null) { srcDir = mvWork.getLoadTableWork().getSourceDir(); - - if ((srcDir != null) && (srcDir.equalsIgnoreCase(fsOp.getConf().getDirName()))) + } + + if ((srcDir != null) + && (srcDir.equalsIgnoreCase(fsOp.getConf().getDirName()))) { return mvTsk; + } } - + return null; } - - private String processFS(Node nd, Stack stack, NodeProcessorCtx opProcCtx, boolean chDir) - throws SemanticException { - + + private String processFS(Node nd, Stack stack, + NodeProcessorCtx opProcCtx, boolean chDir) throws SemanticException { + // Is it the dummy file sink after the mapjoin - FileSinkOperator fsOp = (FileSinkOperator)nd; - if ((fsOp.getParentOperators().size() == 1) && (fsOp.getParentOperators().get(0) instanceof MapJoinOperator)) + FileSinkOperator fsOp = (FileSinkOperator) nd; + if ((fsOp.getParentOperators().size() == 1) + && (fsOp.getParentOperators().get(0) instanceof MapJoinOperator)) { return null; + } - GenMRProcContext ctx = (GenMRProcContext)opProcCtx; + GenMRProcContext ctx = (GenMRProcContext) opProcCtx; List seenFSOps = ctx.getSeenFileSinkOps(); - if (seenFSOps == null) + if (seenFSOps == null) { seenFSOps = new ArrayList(); - if (!seenFSOps.contains(fsOp)) + } + if (!seenFSOps.contains(fsOp)) { seenFSOps.add(fsOp); + } ctx.setSeenFileSinkOps(seenFSOps); Task currTask = ctx.getCurrTask(); - + // If the directory needs to be changed, send the new directory String dest = null; @@ -256,27 +283,30 @@ ParseContext parseCtx = ctx.getParseCtx(); Context baseCtx = parseCtx.getContext(); String tmpDir = baseCtx.getMRTmpFileURI(); - + fsOp.getConf().setDirName(tmpDir); } - - boolean ret = false; + Task mvTask = null; - - if (!chDir) + + if (!chDir) { mvTask = findMoveTask(ctx.getMvTask(), fsOp); - + } + Operator currTopOp = ctx.getCurrTopOp(); String currAliasId = ctx.getCurrAliasId(); - HashMap, Task> opTaskMap = ctx.getOpTaskMap(); + HashMap, Task> opTaskMap = ctx + .getOpTaskMap(); List> seenOps = ctx.getSeenOps(); - List> rootTasks = ctx.getRootTasks(); + List> rootTasks = ctx.getRootTasks(); // Set the move task to be dependent on the current task - if (mvTask != null) - ret = currTask.addDependentTask(mvTask); - - // In case of multi-table insert, the path to alias mapping is needed for all the sources. Since there is no + if (mvTask != null) { + currTask.addDependentTask(mvTask); + } + + // In case of multi-table insert, the path to alias mapping is needed for + // all the sources. Since there is no // reducer, treat it as a plan with null reducer // If it is a map-only job, the task needs to be processed if (currTopOp != null) { @@ -284,19 +314,20 @@ if (mapTask == null) { assert (!seenOps.contains(currTopOp)); seenOps.add(currTopOp); - GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, (mapredWork) currTask.getWork(), false, ctx); + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, + (mapredWork) currTask.getWork(), false, ctx); opTaskMap.put(null, currTask); rootTasks.add(currTask); - } - else { + } else { if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); - GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, (mapredWork) mapTask.getWork(), false, ctx); + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, + (mapredWork) mapTask.getWork(), false, ctx); } - // mapTask and currTask should be merged by and join/union operator + // mapTask and currTask should be merged by and join/union operator // (e.g., GenMRUnion1j) which has multiple topOps. - assert mapTask == currTask : - "mapTask.id = " + mapTask.getId() + "; currTask.id = " + currTask.getId(); + assert mapTask == currTask : "mapTask.id = " + mapTask.getId() + + "; currTask.id = " + currTask.getId(); } return dest; @@ -304,30 +335,31 @@ } UnionOperator currUnionOp = ctx.getCurrUnionOp(); - - if (currUnionOp != null) { + + if (currUnionOp != null) { opTaskMap.put(null, currTask); GenMapRedUtils.initUnionPlan(ctx, currTask, false); return dest; } - + MapJoinOperator currMapJoinOp = ctx.getCurrMapJoinOp(); - - if (currMapJoinOp != null) { + + if (currMapJoinOp != null) { opTaskMap.put(null, currTask); GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(currMapJoinOp); mapredWork plan = (mapredWork) currTask.getWork(); String taskTmpDir = mjCtx.getTaskTmpDir(); - tableDesc tt_desc = mjCtx.getTTDesc(); + tableDesc tt_desc = mjCtx.getTTDesc(); assert plan.getPathToAliases().get(taskTmpDir) == null; plan.getPathToAliases().put(taskTmpDir, new ArrayList()); plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); - plan.getPathToPartitionInfo().put(taskTmpDir, new partitionDesc(tt_desc, null)); + plan.getPathToPartitionInfo().put(taskTmpDir, + new partitionDesc(tt_desc, null)); plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); return dest; } - + return dest; } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcCtx.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcCtx.java (working copy) @@ -21,8 +21,8 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; /** - * The processor context for partition pruner. This contains the table - * alias that is being currently processed. + * The processor context for partition pruner. This contains the table alias + * that is being currently processed. */ public class ExprProcCtx implements NodeProcessorCtx { @@ -30,31 +30,31 @@ * The table alias that is being currently processed. */ String tabAlias; - + /** * Flag to hold whether there are any non partition columns accessed in the * expression. */ boolean hasNonPartCols; - + public ExprProcCtx(String tabAlias) { this.tabAlias = tabAlias; - this.hasNonPartCols = false; + hasNonPartCols = false; } - + public String getTabAlias() { return tabAlias; } - + public void setTabAlias(String tabAlias) { this.tabAlias = tabAlias; } - + public boolean getHasNonPartCols() { - return this.hasNonPartCols; + return hasNonPartCols; } - + public void setHasNonPartCols(boolean val) { - this.hasNonPartCols = val; + hasNonPartCols = val; } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpWalkerCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpWalkerCtx.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpWalkerCtx.java (working copy) @@ -28,31 +28,31 @@ * Context class for operator tree walker for partition pruner. */ public class OpWalkerCtx implements NodeProcessorCtx { - + private boolean hasNonPartCols; - + /** - * Map from tablescan operator to partition pruning predicate - * that is initialized from the ParseContext + * Map from tablescan operator to partition pruning predicate that is + * initialized from the ParseContext */ - private HashMap opToPartPruner; + private final HashMap opToPartPruner; /** * Constructor */ public OpWalkerCtx(HashMap opToPartPruner) { this.opToPartPruner = opToPartPruner; - this.hasNonPartCols = false; + hasNonPartCols = false; } - + public HashMap getOpToPartPruner() { - return this.opToPartPruner; + return opToPartPruner; } - + public void addHasNonPartCols(boolean val) { hasNonPartCols = (hasNonPartCols || val); } - + public boolean getHasNonPartCols() { return hasNonPartCols; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (working copy) @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -50,7 +51,6 @@ import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.exprNodeDesc; import org.apache.hadoop.hive.ql.plan.exprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -59,15 +59,20 @@ /** * The transformation step that does partition pruning. - * + * */ public class PartitionPruner implements Transform { // The log - private static final Log LOG = LogFactory.getLog("hive.ql.optimizer.ppr.PartitionPruner"); + private static final Log LOG = LogFactory + .getLog("hive.ql.optimizer.ppr.PartitionPruner"); - /* (non-Javadoc) - * @see org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop.hive.ql.parse.ParseContext) + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop + * .hive.ql.parse.ParseContext) */ @Override public ParseContext transform(ParseContext pctx) throws SemanticException { @@ -76,11 +81,13 @@ OpWalkerCtx opWalkerCtx = new OpWalkerCtx(pctx.getOpToPartPruner()); Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", "(TS%FIL%)|(TS%FIL%FIL%)"), - OpProcFactory.getFilterProc()); + opRules.put(new RuleRegExp("R1", "(TS%FIL%)|(TS%FIL%FIL%)"), OpProcFactory + .getFilterProc()); - // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, opWalkerCtx); + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), + opRules, opWalkerCtx); GraphWalker ogw = new DefaultGraphWalker(disp); // Create a list of topop nodes @@ -93,33 +100,39 @@ } /** - * Find out whether the condition only contains partitioned columns. Note that if the table - * is not partitioned, the function always returns true. + * Find out whether the condition only contains partitioned columns. Note that + * if the table is not partitioned, the function always returns true. * condition. - * - * @param tab the table object - * @param expr the pruner expression for the table + * + * @param tab + * the table object + * @param expr + * the pruner expression for the table */ public static boolean onlyContainsPartnCols(Table tab, exprNodeDesc expr) { - if (!tab.isPartitioned() || (expr == null)) + if (!tab.isPartitioned() || (expr == null)) { return true; + } if (expr instanceof exprNodeColumnDesc) { - String colName = ((exprNodeColumnDesc)expr).getColumn(); + String colName = ((exprNodeColumnDesc) expr).getColumn(); return tab.isPartitionKey(colName); } // It cannot contain a non-deterministic function - if ((expr instanceof exprNodeGenericFuncDesc) && - !FunctionRegistry.isDeterministic(((exprNodeGenericFuncDesc)expr).getGenericUDF())) + if ((expr instanceof exprNodeGenericFuncDesc) + && !FunctionRegistry.isDeterministic(((exprNodeGenericFuncDesc) expr) + .getGenericUDF())) { return false; + } // All columns of the expression must be parttioned columns List children = expr.getChildren(); if (children != null) { for (int i = 0; i < children.size(); i++) { - if (!onlyContainsPartnCols(tab, children.get(i))) + if (!onlyContainsPartnCols(tab, children.get(i))) { return false; + } } } @@ -129,79 +142,103 @@ /** * Get the partition list for the table that satisfies the partition pruner * condition. - * - * @param tab the table object for the alias - * @param prunerExpr the pruner expression for the alias - * @param conf for checking whether "strict" mode is on. - * @param alias for generating error message only. - * @return the partition list for the table that satisfies the partition pruner condition. + * + * @param tab + * the table object for the alias + * @param prunerExpr + * the pruner expression for the alias + * @param conf + * for checking whether "strict" mode is on. + * @param alias + * for generating error message only. + * @return the partition list for the table that satisfies the partition + * pruner condition. * @throws HiveException */ public static PrunedPartitionList prune(Table tab, exprNodeDesc prunerExpr, - HiveConf conf, String alias, Map prunedPartitionsMap) throws HiveException { + HiveConf conf, String alias, + Map prunedPartitionsMap) + throws HiveException { LOG.trace("Started pruning partiton"); LOG.trace("tabname = " + tab.getName()); LOG.trace("prune Expression = " + prunerExpr); - + String key = tab.getName() + ";"; - if (prunerExpr != null) + if (prunerExpr != null) { key = key + prunerExpr.getExprString(); + } PrunedPartitionList ret = prunedPartitionsMap.get(key); - if(ret !=null) + if (ret != null) { return ret; - + } + LinkedHashSet true_parts = new LinkedHashSet(); LinkedHashSet unkn_parts = new LinkedHashSet(); LinkedHashSet denied_parts = new LinkedHashSet(); try { - StructObjectInspector rowObjectInspector = (StructObjectInspector)tab.getDeserializer().getObjectInspector(); + StructObjectInspector rowObjectInspector = (StructObjectInspector) tab + .getDeserializer().getObjectInspector(); Object[] rowWithPart = new Object[2]; - if(tab.isPartitioned()) { - for(String partName: Hive.get().getPartitionNames(tab.getDbName(), tab.getName(), (short) -1)) { + if (tab.isPartitioned()) { + for (String partName : Hive.get().getPartitionNames(tab.getDbName(), + tab.getName(), (short) -1)) { // Set all the variables here - LinkedHashMap partSpec = Warehouse.makeSpecFromName(partName); + LinkedHashMap partSpec = Warehouse + .makeSpecFromName(partName); // Create the row object ArrayList partNames = new ArrayList(); ArrayList partValues = new ArrayList(); ArrayList partObjectInspectors = new ArrayList(); - for(Map.Entryentry : partSpec.entrySet()) { + for (Map.Entry entry : partSpec.entrySet()) { partNames.add(entry.getKey()); partValues.add(entry.getValue()); - partObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); + partObjectInspectors + .add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); } - StructObjectInspector partObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(partNames, partObjectInspectors); + StructObjectInspector partObjectInspector = ObjectInspectorFactory + .getStandardStructObjectInspector(partNames, partObjectInspectors); rowWithPart[1] = partValues; - ArrayList ois = new ArrayList(2); + ArrayList ois = new ArrayList( + 2); ois.add(rowObjectInspector); ois.add(partObjectInspector); - StructObjectInspector rowWithPartObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(ois); + StructObjectInspector rowWithPartObjectInspector = ObjectInspectorFactory + .getUnionStructObjectInspector(ois); - // If the "strict" mode is on, we have to provide partition pruner for each table. - if ("strict".equalsIgnoreCase(HiveConf.getVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE))) { + // If the "strict" mode is on, we have to provide partition pruner for + // each table. + if ("strict".equalsIgnoreCase(HiveConf.getVar(conf, + HiveConf.ConfVars.HIVEMAPREDMODE))) { if (!hasColumnExpr(prunerExpr)) { - throw new SemanticException(ErrorMsg.NO_PARTITION_PREDICATE.getMsg( - "for Alias \"" + alias + "\" Table \"" + tab.getName() + "\"")); + throw new SemanticException(ErrorMsg.NO_PARTITION_PREDICATE + .getMsg("for Alias \"" + alias + "\" Table \"" + + tab.getName() + "\"")); } } // evaluate the expression tree if (prunerExpr != null) { - ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(prunerExpr); - ObjectInspector evaluateResultOI = evaluator.initialize(rowWithPartObjectInspector); + ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory + .get(prunerExpr); + ObjectInspector evaluateResultOI = evaluator + .initialize(rowWithPartObjectInspector); Object evaluateResultO = evaluator.evaluate(rowWithPart); - Boolean r = (Boolean) ((PrimitiveObjectInspector)evaluateResultOI).getPrimitiveJavaObject(evaluateResultO); + Boolean r = (Boolean) ((PrimitiveObjectInspector) evaluateResultOI) + .getPrimitiveJavaObject(evaluateResultO); LOG.trace("prune result for partition " + partSpec + ": " + r); if (Boolean.FALSE.equals(r)) { if (denied_parts.isEmpty()) { - Partition part = Hive.get().getPartition(tab, partSpec, Boolean.FALSE); + Partition part = Hive.get().getPartition(tab, partSpec, + Boolean.FALSE); denied_parts.add(part); } LOG.trace("pruned partition: " + partSpec); } else { - Partition part = Hive.get().getPartition(tab, partSpec, Boolean.FALSE); + Partition part = Hive.get().getPartition(tab, partSpec, + Boolean.FALSE); if (Boolean.TRUE.equals(r)) { LOG.debug("retained partition: " + partSpec); true_parts.add(part); @@ -212,7 +249,8 @@ } } else { // is there is no parition pruning, all of them are needed - true_parts.add(Hive.get().getPartition(tab, partSpec, Boolean.FALSE)); + true_parts.add(Hive.get() + .getPartition(tab, partSpec, Boolean.FALSE)); } } } else { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprPrunerInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprPrunerInfo.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprPrunerInfo.java (working copy) @@ -21,8 +21,8 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; /** - * The processor context for partition pruner. This contains the table - * alias that is being currently processed. + * The processor context for partition pruner. This contains the table alias + * that is being currently processed. */ public class ExprPrunerInfo implements NodeProcessorCtx { @@ -30,11 +30,11 @@ * The table alias that is being currently processed. */ String tabAlias; - + public String getTabAlias() { return tabAlias; } - + public void setTabAlias(String tabAlias) { this.tabAlias = tabAlias; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java (working copy) @@ -42,16 +42,12 @@ import org.apache.hadoop.hive.ql.plan.exprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.exprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.exprNodeNullDesc; -import org.apache.hadoop.hive.ql.udf.UDFOPAnd; -import org.apache.hadoop.hive.ql.udf.UDFOPOr; -import org.apache.hadoop.hive.ql.udf.UDFOPNot; -import org.apache.hadoop.hive.ql.udf.UDFType; /** - * Expression processor factory for partition pruning. Each processor tries - * to convert the expression subtree into a partition pruning expression. - * This expression is then used to figure out whether a particular partition - * should be scanned or not. + * Expression processor factory for partition pruning. Each processor tries to + * convert the expression subtree into a partition pruning expression. This + * expression is then used to figure out whether a particular partition should + * be scanned or not. */ public class ExprProcFactory { @@ -63,25 +59,27 @@ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - + exprNodeDesc newcd = null; exprNodeColumnDesc cd = (exprNodeColumnDesc) nd; ExprProcCtx epc = (ExprProcCtx) procCtx; - if (cd.getTabAlias().equalsIgnoreCase(epc.getTabAlias()) && cd.getIsParititonCol()) + if (cd.getTabAlias().equalsIgnoreCase(epc.getTabAlias()) + && cd.getIsParititonCol()) { newcd = cd.clone(); - else { + } else { newcd = new exprNodeConstantDesc(cd.getTypeInfo(), null); epc.setHasNonPartCols(true); } - + return newcd; } } - + /** - * If all children are candidates and refer only to one table alias then this expr is a candidate - * else it is not a candidate but its children could be final candidates + * If all children are candidates and refer only to one table alias then this + * expr is a candidate else it is not a candidate but its children could be + * final candidates */ public static class GenericFuncExprProcessor implements NodeProcessor { @@ -93,90 +91,96 @@ exprNodeGenericFuncDesc fd = (exprNodeGenericFuncDesc) nd; boolean unknown = false; - + if (FunctionRegistry.isOpAndOrNot(fd)) { - // do nothing because "And" and "Or" and "Not" supports null value evaluation - // NOTE: In the future all UDFs that treats null value as UNKNOWN (both in parameters and return - // values) should derive from a common base class UDFNullAsUnknown, so instead of listing the classes - // here we would test whether a class is derived from that base class. + // do nothing because "And" and "Or" and "Not" supports null value + // evaluation + // NOTE: In the future all UDFs that treats null value as UNKNOWN (both + // in parameters and return + // values) should derive from a common base class UDFNullAsUnknown, so + // instead of listing the classes + // here we would test whether a class is derived from that base class. } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) { // If it's a non-deterministic UDF, set unknown to true unknown = true; } else { // If any child is null, set unknown to true - for(Object child: nodeOutputs) { - exprNodeDesc child_nd = (exprNodeDesc)child; - if (child_nd instanceof exprNodeConstantDesc && - ((exprNodeConstantDesc)child_nd).getValue() == null) { + for (Object child : nodeOutputs) { + exprNodeDesc child_nd = (exprNodeDesc) child; + if (child_nd instanceof exprNodeConstantDesc + && ((exprNodeConstantDesc) child_nd).getValue() == null) { unknown = true; } } } - - if (unknown) + + if (unknown) { newfd = new exprNodeConstantDesc(fd.getTypeInfo(), null); - else { + } else { // Create the list of children ArrayList children = new ArrayList(); - for(Object child: nodeOutputs) { + for (Object child : nodeOutputs) { children.add((exprNodeDesc) child); } // Create a copy of the function descriptor - newfd = new exprNodeGenericFuncDesc(fd.getTypeInfo(), fd.getGenericUDF(), children); + newfd = new exprNodeGenericFuncDesc(fd.getTypeInfo(), fd + .getGenericUDF(), children); } - + return newfd; } } - + public static class FieldExprProcessor implements NodeProcessor { @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - - exprNodeFieldDesc fnd = (exprNodeFieldDesc)nd; + + exprNodeFieldDesc fnd = (exprNodeFieldDesc) nd; boolean unknown = false; int idx = 0; exprNodeDesc left_nd = null; - for(Object child: nodeOutputs) { + for (Object child : nodeOutputs) { exprNodeDesc child_nd = (exprNodeDesc) child; - if (child_nd instanceof exprNodeConstantDesc && - ((exprNodeConstantDesc)child_nd).getValue() == null) + if (child_nd instanceof exprNodeConstantDesc + && ((exprNodeConstantDesc) child_nd).getValue() == null) { unknown = true; + } left_nd = child_nd; } - assert(idx == 0); + assert (idx == 0); exprNodeDesc newnd = null; if (unknown) { newnd = new exprNodeConstantDesc(fnd.getTypeInfo(), null); + } else { + newnd = new exprNodeFieldDesc(fnd.getTypeInfo(), left_nd, fnd + .getFieldName(), fnd.getIsList()); } - else { - newnd = new exprNodeFieldDesc(fnd.getTypeInfo(), left_nd, fnd.getFieldName(), fnd.getIsList()); - } return newnd; } } /** - * Processor for constants and null expressions. For such expressions - * the processor simply clones the exprNodeDesc and returns it. + * Processor for constants and null expressions. For such expressions the + * processor simply clones the exprNodeDesc and returns it. */ public static class DefaultExprProcessor implements NodeProcessor { @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - if (nd instanceof exprNodeConstantDesc) - return ((exprNodeConstantDesc)nd).clone(); - else if (nd instanceof exprNodeNullDesc) - return ((exprNodeNullDesc)nd).clone(); - - assert(false); + if (nd instanceof exprNodeConstantDesc) { + return ((exprNodeConstantDesc) nd).clone(); + } else if (nd instanceof exprNodeNullDesc) { + return ((exprNodeNullDesc) nd).clone(); + } + + assert (false); return null; } } @@ -196,39 +200,53 @@ public static NodeProcessor getColumnProcessor() { return new ColumnExprProcessor(); } - + /** * Generates the partition pruner for the expression tree - * @param tabAlias The table alias of the partition table that is being considered for pruning - * @param pred The predicate from which the partition pruner needs to be generated - * @return hasNonPartCols returns true/false depending upon whether this pred has a non partition column + * + * @param tabAlias + * The table alias of the partition table that is being considered + * for pruning + * @param pred + * The predicate from which the partition pruner needs to be + * generated + * @return hasNonPartCols returns true/false depending upon whether this pred + * has a non partition column * @throws SemanticException */ - public static exprNodeDesc genPruner(String tabAlias, exprNodeDesc pred, + public static exprNodeDesc genPruner(String tabAlias, exprNodeDesc pred, boolean hasNonPartCols) throws SemanticException { // Create the walker, the rules dispatcher and the context. - ExprProcCtx pprCtx= new ExprProcCtx(tabAlias); - - // create a walker which walks the tree in a DFS manner while maintaining the operator stack. The dispatcher + ExprProcCtx pprCtx = new ExprProcCtx(tabAlias); + + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. The dispatcher // generates the plan from the operator tree Map exprRules = new LinkedHashMap(); - exprRules.put(new RuleRegExp("R1", exprNodeColumnDesc.class.getName() + "%"), getColumnProcessor()); - exprRules.put(new RuleRegExp("R2", exprNodeFieldDesc.class.getName() + "%"), getFieldProcessor()); - exprRules.put(new RuleRegExp("R5", exprNodeGenericFuncDesc.class.getName() + "%"), getGenericFuncProcessor()); - - // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), exprRules, pprCtx); + exprRules.put( + new RuleRegExp("R1", exprNodeColumnDesc.class.getName() + "%"), + getColumnProcessor()); + exprRules.put( + new RuleRegExp("R2", exprNodeFieldDesc.class.getName() + "%"), + getFieldProcessor()); + exprRules.put(new RuleRegExp("R5", exprNodeGenericFuncDesc.class.getName() + + "%"), getGenericFuncProcessor()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), + exprRules, pprCtx); GraphWalker egw = new DefaultGraphWalker(disp); - + List startNodes = new ArrayList(); startNodes.add(pred); - + HashMap outputMap = new HashMap(); egw.startWalking(startNodes, outputMap); hasNonPartCols = pprCtx.getHasNonPartCols(); // Get the exprNodeDesc corresponding to the first start node; - return (exprNodeDesc)outputMap.get(pred); + return (exprNodeDesc) outputMap.get(pred); } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java (working copy) @@ -20,6 +20,7 @@ import java.util.Map; import java.util.Stack; + import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.Node; @@ -30,39 +31,38 @@ import org.apache.hadoop.hive.ql.plan.exprNodeDesc; /** - * Operator factory for partition pruning processing of operator graph - * We find all the filter operators that appear just beneath the table scan - * operators. We then pass the filter to the partition pruner to construct - * a pruner for that table alias and store a mapping from the table scan - * operator to that pruner. We call that pruner later during plan generation. + * Operator factory for partition pruning processing of operator graph We find + * all the filter operators that appear just beneath the table scan operators. + * We then pass the filter to the partition pruner to construct a pruner for + * that table alias and store a mapping from the table scan operator to that + * pruner. We call that pruner later during plan generation. */ public class OpProcFactory { /** - * Determines the partition pruner for the filter. This is called only when the filter - * follows a table scan operator. + * Determines the partition pruner for the filter. This is called only when + * the filter follows a table scan operator. */ public static class FilterPPR implements NodeProcessor { @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - OpWalkerCtx owc = (OpWalkerCtx)procCtx; + OpWalkerCtx owc = (OpWalkerCtx) procCtx; FilterOperator fop = (FilterOperator) nd; FilterOperator fop2 = null; - + // The stack contains either ... TS, Filter or // ... TS, Filter, Filter with the head of the stack being the rightmost - // symbol. So we just pop out the two elements from the top and if the + // symbol. So we just pop out the two elements from the top and if the // second one of them is not a table scan then the operator on the top of // the stack is the Table scan operator. Node tmp = stack.pop(); Node tmp2 = stack.pop(); TableScanOperator top = null; if (tmp2 instanceof TableScanOperator) { - top = (TableScanOperator)tmp2; - } - else { + top = (TableScanOperator) tmp2; + } else { top = (TableScanOperator) stack.peek(); fop2 = (FilterOperator) tmp2; } @@ -71,54 +71,55 @@ // If fop2 exists (i.e this is not the top level filter and fop2 is not // a sampling filter then we ignore the current filter - if (fop2 != null && !fop2.getConf().getIsSamplingPred()) + if (fop2 != null && !fop2.getConf().getIsSamplingPred()) { return null; - + } + // ignore the predicate in case it is not a sampling predicate if (fop.getConf().getIsSamplingPred()) { return null; } - - // Otherwise this is not a sampling predicate and we need to + + // Otherwise this is not a sampling predicate and we need to exprNodeDesc predicate = fop.getConf().getPredicate(); String alias = top.getConf().getAlias(); - + // Generate the partition pruning predicate boolean hasNonPartCols = false; - exprNodeDesc ppr_pred = ExprProcFactory.genPruner(alias, predicate, hasNonPartCols); + exprNodeDesc ppr_pred = ExprProcFactory.genPruner(alias, predicate, + hasNonPartCols); owc.addHasNonPartCols(hasNonPartCols); // Add the pruning predicate to the table scan operator addPruningPred(owc.getOpToPartPruner(), top, ppr_pred); - + return null; } - - private void addPruningPred(Map opToPPR, - TableScanOperator top, exprNodeDesc new_ppr_pred) { + + private void addPruningPred(Map opToPPR, + TableScanOperator top, exprNodeDesc new_ppr_pred) { exprNodeDesc old_ppr_pred = opToPPR.get(top); exprNodeDesc ppr_pred = null; if (old_ppr_pred != null) { // or the old_ppr_pred and the new_ppr_pred - ppr_pred = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("OR", old_ppr_pred, new_ppr_pred); - } - else { + ppr_pred = TypeCheckProcFactory.DefaultExprProcessor + .getFuncExprNodeDesc("OR", old_ppr_pred, new_ppr_pred); + } else { ppr_pred = new_ppr_pred; } - + // Put the mapping from table scan operator to ppr_pred opToPPR.put(top, ppr_pred); - + return; } } - /** * Default processor which just merges its children */ public static class DefaultPPR implements NodeProcessor { - + @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (working copy) @@ -22,11 +22,11 @@ import java.util.List; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcessor; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.ppd.PredicatePushDown; -import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcessor; /** * Implementation of the optimizer @@ -35,10 +35,11 @@ private ParseContext pctx; private List transformations; - /** - * create the list of transformations - * @param hiveConf - */ + /** + * create the list of transformations + * + * @param hiveConf + */ public void initialize(HiveConf hiveConf) { transformations = new ArrayList(); if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCP)) { @@ -48,7 +49,7 @@ transformations.add(new PredicatePushDown()); transformations.add(new PartitionPruner()); } - + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTGROUPBY)) { transformations.add(new GroupByOptimizer()); } @@ -61,12 +62,14 @@ /** * invoke all the transformations one-by-one, and alter the query plan + * * @return ParseContext * @throws SemanticException */ public ParseContext optimize() throws SemanticException { - for (Transform t : transformations) + for (Transform t : transformations) { pctx = t.transform(pctx); + } return pctx; } @@ -78,7 +81,8 @@ } /** - * @param pctx the pctx to set + * @param pctx + * the pctx to set */ public void setPctx(ParseContext pctx) { this.pctx = pctx; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/JoinReorder.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/JoinReorder.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/JoinReorder.java (working copy) @@ -19,15 +19,10 @@ package org.apache.hadoop.hive.ql.optimizer; import java.io.Serializable; -import java.util.ArrayList; import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; import java.util.Set; import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; @@ -38,29 +33,29 @@ /** * Implementation of rule-based join table reordering optimization. User passes * hints to specify which tables are to be streamed and they are moved to have - * largest tag so that they are processed last. - * In future, once statistics are implemented, this transformation can also be - * done based on costs. + * largest tag so that they are processed last. In future, once statistics are + * implemented, this transformation can also be done based on costs. */ public class JoinReorder implements Transform { /** * Estimate the size of the output based on the STREAMTABLE hints. To do so - * the whole tree is traversed. Possible sizes: - * 0: the operator and its subtree don't contain any big tables - * 1: the subtree of the operator contains a big table - * 2: the operator is a big table - * - * @param operator The operator which output size is to be estimated - * @param bigTables Set of tables that should be streamed + * the whole tree is traversed. Possible sizes: 0: the operator and its + * subtree don't contain any big tables 1: the subtree of the operator + * contains a big table 2: the operator is a big table + * + * @param operator + * The operator which output size is to be estimated + * @param bigTables + * Set of tables that should be streamed * @return The estimated size - 0 (no streamed tables), 1 (streamed tables in - * subtree) or 2 (a streamed table) + * subtree) or 2 (a streamed table) */ private int getOutputSize(Operator operator, - Set bigTables) { + Set bigTables) { // If a join operator contains a big subtree, there is a chance that its // output is also big, so the output size is 1 (medium) if (operator instanceof JoinOperator) { - for(Operator o: operator.getParentOperators()) { + for (Operator o : operator.getParentOperators()) { if (getOutputSize(o, bigTables) != 0) { return 1; } @@ -69,7 +64,7 @@ // If a table is in bigTables then its output is big (2) if (operator instanceof TableScanOperator) { - String alias = ((TableScanOperator)operator).getConf().getAlias(); + String alias = ((TableScanOperator) operator).getConf().getAlias(); if (bigTables.contains(alias)) { return 2; } @@ -79,7 +74,7 @@ // the biggest output from a parent int maxSize = 0; if (operator.getParentOperators() != null) { - for(Operator o: operator.getParentOperators()) { + for (Operator o : operator.getParentOperators()) { int current = getOutputSize(o, bigTables); if (current > maxSize) { maxSize = current; @@ -92,14 +87,15 @@ /** * Find all big tables from STREAMTABLE hints - * - * @param joinCtx The join context + * + * @param joinCtx + * The join context * @return Set of all big tables */ private Set getBigTables(ParseContext joinCtx) { Set bigTables = new HashSet(); - for (QBJoinTree qbJoin: joinCtx.getJoinContext().values()) { + for (QBJoinTree qbJoin : joinCtx.getJoinContext().values()) { if (qbJoin.getStreamAliases() != null) { bigTables.addAll(qbJoin.getStreamAliases()); } @@ -111,20 +107,22 @@ /** * Reorder the tables in a join operator appropriately (by reordering the tags * of the reduces sinks) - * - * @param joinOp The join operator to be processed - * @param bigTables Set of all big tables + * + * @param joinOp + * The join operator to be processed + * @param bigTables + * Set of all big tables */ private void reorder(JoinOperator joinOp, Set bigTables) { int count = joinOp.getParentOperators().size(); // Find the biggest reduce sink - int biggestPos = count - 1; - int biggestSize = getOutputSize(joinOp.getParentOperators().get(biggestPos), - bigTables); + int biggestPos = count - 1; + int biggestSize = getOutputSize( + joinOp.getParentOperators().get(biggestPos), bigTables); for (int i = 0; i < count - 1; i++) { int currSize = getOutputSize(joinOp.getParentOperators().get(i), - bigTables); + bigTables); if (currSize > biggestSize) { biggestSize = currSize; biggestPos = i; @@ -135,14 +133,14 @@ if (biggestPos != (count - 1)) { Byte[] tagOrder = joinOp.getConf().getTagOrder(); Byte temp = tagOrder[biggestPos]; - tagOrder[biggestPos] = tagOrder[count-1]; - tagOrder[count-1] = temp; + tagOrder[biggestPos] = tagOrder[count - 1]; + tagOrder[count - 1] = temp; // Update tags of reduce sinks - ((ReduceSinkOperator)joinOp.getParentOperators().get(biggestPos)) - .getConf().setTag(count-1); - ((ReduceSinkOperator)joinOp.getParentOperators().get(count-1)).getConf() - .setTag(biggestPos); + ((ReduceSinkOperator) joinOp.getParentOperators().get(biggestPos)) + .getConf().setTag(count - 1); + ((ReduceSinkOperator) joinOp.getParentOperators().get(count - 1)) + .getConf().setTag(biggestPos); } } @@ -150,13 +148,14 @@ * Transform the query tree. For each join, check which reduce sink will * output the biggest result (based on STREAMTABLE hints) and give it the * biggest tag so that it gets streamed. - * - * @param pactx current parse context + * + * @param pactx + * current parse context */ public ParseContext transform(ParseContext pactx) throws SemanticException { Set bigTables = getBigTables(pactx); - for (JoinOperator joinOp: pactx.getJoinContext().keySet()) { + for (JoinOperator joinOp : pactx.getJoinContext().keySet()) { reorder(joinOp, bigTables); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -18,87 +18,99 @@ package org.apache.hadoop.hive.ql.optimizer; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; -import java.util.ArrayList; import java.util.Map; -import java.util.HashMap; -import java.util.LinkedHashMap; import java.util.Set; -import java.io.Serializable; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.exec.OperatorFactory; -import org.apache.hadoop.hive.ql.plan.fetchWork; -import org.apache.hadoop.hive.ql.plan.mapredWork; -import org.apache.hadoop.hive.ql.plan.mapredLocalWork; -import org.apache.hadoop.hive.ql.plan.mapJoinDesc; -import org.apache.hadoop.hive.ql.plan.reduceSinkDesc; -import org.apache.hadoop.hive.ql.plan.tableDesc; -import org.apache.hadoop.hive.ql.plan.partitionDesc; -import org.apache.hadoop.hive.ql.plan.fileSinkDesc; -import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.plan.tableScanDesc; -import org.apache.hadoop.hive.ql.plan.filterDesc.sampleDesc; -import org.apache.hadoop.hive.ql.metadata.*; -import org.apache.hadoop.hive.ql.parse.*; -import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; -import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext; +import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.fetchWork; +import org.apache.hadoop.hive.ql.plan.fileSinkDesc; +import org.apache.hadoop.hive.ql.plan.mapJoinDesc; +import org.apache.hadoop.hive.ql.plan.mapredLocalWork; +import org.apache.hadoop.hive.ql.plan.mapredWork; +import org.apache.hadoop.hive.ql.plan.partitionDesc; +import org.apache.hadoop.hive.ql.plan.reduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.tableDesc; +import org.apache.hadoop.hive.ql.plan.tableScanDesc; +import org.apache.hadoop.hive.ql.plan.filterDesc.sampleDesc; /** - * General utility common functions for the Processor to convert operator into map-reduce tasks + * General utility common functions for the Processor to convert operator into + * map-reduce tasks */ public class GenMapRedUtils { private static Log LOG; static { - LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils"); + LOG = LogFactory + .getLog("org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils"); } /** * Initialize the current plan by adding it to root tasks - * @param op the reduce sink operator encountered - * @param opProcCtx processing context + * + * @param op + * the reduce sink operator encountered + * @param opProcCtx + * processing context */ - public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException { + public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) + throws SemanticException { Operator reducer = op.getChildOperators().get(0); - Map, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx(); + Map, GenMapRedCtx> mapCurrCtx = opProcCtx + .getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); - Task currTask = mapredCtx.getCurrTask(); + Task currTask = mapredCtx.getCurrTask(); mapredWork plan = (mapredWork) currTask.getWork(); - HashMap, Task> opTaskMap = opProcCtx.getOpTaskMap(); + HashMap, Task> opTaskMap = opProcCtx + .getOpTaskMap(); Operator currTopOp = opProcCtx.getCurrTopOp(); opTaskMap.put(reducer, currTask); plan.setReducer(reducer); - reduceSinkDesc desc = (reduceSinkDesc)op.getConf(); + reduceSinkDesc desc = op.getConf(); plan.setNumReduceTasks(desc.getNumReducers()); List> rootTasks = opProcCtx.getRootTasks(); rootTasks.add(currTask); - if (reducer.getClass() == JoinOperator.class) + if (reducer.getClass() == JoinOperator.class) { plan.setNeedsTagging(true); + } assert currTopOp != null; List> seenOps = opProcCtx.getSeenOps(); @@ -117,39 +129,51 @@ /** * Initialize the current plan by adding it to root tasks - * @param op the map join operator encountered - * @param opProcCtx processing context - * @param pos position of the parent + * + * @param op + * the map join operator encountered + * @param opProcCtx + * processing context + * @param pos + * position of the parent */ - public static void initMapJoinPlan(Operator op, GenMRProcContext opProcCtx, boolean readInputMapJoin, boolean readInputUnion, - boolean setReducer, int pos) - throws SemanticException { - Map, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx(); + public static void initMapJoinPlan(Operator op, + GenMRProcContext opProcCtx, boolean readInputMapJoin, + boolean readInputUnion, boolean setReducer, int pos) + throws SemanticException { + Map, GenMapRedCtx> mapCurrCtx = opProcCtx + .getMapCurrCtx(); assert (((pos == -1) && (readInputMapJoin)) || (pos != -1)); int parentPos = (pos == -1) ? 0 : pos; - GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(parentPos)); - Task currTask = mapredCtx.getCurrTask(); + GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get( + parentPos)); + Task currTask = mapredCtx.getCurrTask(); mapredWork plan = (mapredWork) currTask.getWork(); - HashMap, Task> opTaskMap = opProcCtx.getOpTaskMap(); + HashMap, Task> opTaskMap = opProcCtx + .getOpTaskMap(); Operator currTopOp = opProcCtx.getCurrTopOp(); - // The mapjoin has already been encountered. Some context must be stored about that + // The mapjoin has already been encountered. Some context must be stored + // about that if (readInputMapJoin) { MapJoinOperator currMapJoinOp = opProcCtx.getCurrMapJoinOp(); assert currMapJoinOp != null; - boolean local = ((pos == -1) || (pos == ((mapJoinDesc)currMapJoinOp.getConf()).getPosBigTable())) ? false : true; + boolean local = ((pos == -1) || (pos == (currMapJoinOp.getConf()) + .getPosBigTable())) ? false : true; if (setReducer) { - Operator reducer = op.getChildOperators().get(0); + Operator reducer = op.getChildOperators() + .get(0); plan.setReducer(reducer); opTaskMap.put(reducer, currTask); - if (reducer.getClass() == JoinOperator.class) + if (reducer.getClass() == JoinOperator.class) { plan.setNeedsTagging(true); - reduceSinkDesc desc = (reduceSinkDesc)op.getConf(); + } + reduceSinkDesc desc = (reduceSinkDesc) op.getConf(); plan.setNumReduceTasks(desc.getNumReducers()); + } else { + opTaskMap.put(op, currTask); } - else - opTaskMap.put(op, currTask); if (!readInputUnion) { GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(currMapJoinOp); @@ -161,24 +185,22 @@ taskTmpDir = mjCtx.getTaskTmpDir(); tt_desc = mjCtx.getTTDesc(); rootOp = mjCtx.getRootMapJoinOp(); - } - else { - GenMRMapJoinCtx oldMjCtx = opProcCtx.getMapJoinCtx(mjCtx.getOldMapJoin()); + } else { + GenMRMapJoinCtx oldMjCtx = opProcCtx.getMapJoinCtx(mjCtx + .getOldMapJoin()); taskTmpDir = oldMjCtx.getTaskTmpDir(); tt_desc = oldMjCtx.getTTDesc(); rootOp = oldMjCtx.getRootMapJoinOp(); } setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); - } - else { + } else { initUnionPlan(opProcCtx, currTask, false); } opProcCtx.setCurrMapJoinOp(null); - } - else { - mapJoinDesc desc = (mapJoinDesc)op.getConf(); + } else { + mapJoinDesc desc = (mapJoinDesc) op.getConf(); // The map is overloaded to keep track of mapjoins also opTaskMap.put(op, currTask); @@ -202,43 +224,50 @@ /** * Initialize the current union plan. - * - * @param op the reduce sink operator encountered - * @param opProcCtx processing context + * + * @param op + * the reduce sink operator encountered + * @param opProcCtx + * processing context */ - public static void initUnionPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException { + public static void initUnionPlan(ReduceSinkOperator op, + GenMRProcContext opProcCtx) throws SemanticException { Operator reducer = op.getChildOperators().get(0); - Map, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx(); + Map, GenMapRedCtx> mapCurrCtx = opProcCtx + .getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); - Task currTask = mapredCtx.getCurrTask(); + Task currTask = mapredCtx.getCurrTask(); mapredWork plan = (mapredWork) currTask.getWork(); - HashMap, Task> opTaskMap = opProcCtx.getOpTaskMap(); + HashMap, Task> opTaskMap = opProcCtx + .getOpTaskMap(); opTaskMap.put(reducer, currTask); plan.setReducer(reducer); - reduceSinkDesc desc = (reduceSinkDesc)op.getConf(); + reduceSinkDesc desc = op.getConf(); plan.setNumReduceTasks(desc.getNumReducers()); - if (reducer.getClass() == JoinOperator.class) + if (reducer.getClass() == JoinOperator.class) { plan.setNeedsTagging(true); + } initUnionPlan(opProcCtx, currTask, false); } /* - * It is a idempotent function to add various intermediate files as the source for the - * union. The plan has already been created. + * It is a idempotent function to add various intermediate files as the source + * for the union. The plan has already been created. */ - public static void initUnionPlan(GenMRProcContext opProcCtx, Task currTask, boolean local) { + public static void initUnionPlan(GenMRProcContext opProcCtx, + Task currTask, boolean local) { mapredWork plan = (mapredWork) currTask.getWork(); UnionOperator currUnionOp = opProcCtx.getCurrUnionOp(); assert currUnionOp != null; GenMRUnionCtx uCtx = opProcCtx.getUnionTask(currUnionOp); assert uCtx != null; - List taskTmpDirLst = uCtx.getTaskTmpDir(); - List tt_descLst = uCtx.getTTDesc(); + List taskTmpDirLst = uCtx.getTaskTmpDir(); + List tt_descLst = uCtx.getTTDesc(); assert !taskTmpDirLst.isEmpty() && !tt_descLst.isEmpty(); assert taskTmpDirLst.size() == tt_descLst.size(); int size = taskTmpDirLst.size(); @@ -250,7 +279,8 @@ if (plan.getPathToAliases().get(taskTmpDir) == null) { plan.getPathToAliases().put(taskTmpDir, new ArrayList()); plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); - plan.getPathToPartitionInfo().put(taskTmpDir, new partitionDesc(tt_desc, null)); + plan.getPathToPartitionInfo().put(taskTmpDir, + new partitionDesc(tt_desc, null)); plan.getAliasToWork().put(taskTmpDir, currUnionOp); } } @@ -258,19 +288,22 @@ /** * Merge the current task with the task for the current reducer - * @param op operator being processed - * @param oldTask the old task for the current reducer - * @param task the current task for the current reducer - * @param opProcCtx processing context - * @param pos position of the parent in the stack + * + * @param op + * operator being processed + * @param oldTask + * the old task for the current reducer + * @param task + * the current task for the current reducer + * @param opProcCtx + * processing context + * @param pos + * position of the parent in the stack */ public static void joinPlan(Operator op, - Task oldTask, - Task task, - GenMRProcContext opProcCtx, - int pos, boolean split, - boolean readMapJoinData, - boolean readUnionData) throws SemanticException { + Task oldTask, Task task, + GenMRProcContext opProcCtx, int pos, boolean split, + boolean readMapJoinData, boolean readUnionData) throws SemanticException { Task currTask = task; mapredWork plan = (mapredWork) currTask.getWork(); Operator currTopOp = opProcCtx.getCurrTopOp(); @@ -279,72 +312,76 @@ // terminate the old task and make current task dependent on it if (split) { assert oldTask != null; - splitTasks((ReduceSinkOperator)op, oldTask, currTask, opProcCtx, true, false, 0); - } - else { - if ((oldTask != null) && (oldTask.getParentTasks() != null) && !oldTask.getParentTasks().isEmpty()) { + splitTasks(op, oldTask, currTask, opProcCtx, true, false, 0); + } else { + if ((oldTask != null) && (oldTask.getParentTasks() != null) + && !oldTask.getParentTasks().isEmpty()) { parTasks = new ArrayList>(); parTasks.addAll(oldTask.getParentTasks()); Object[] parTaskArr = parTasks.toArray(); - for (int i = 0; i < parTaskArr.length; i++) - ((Task)parTaskArr[i]).removeDependentTask(oldTask); + for (Object element : parTaskArr) { + ((Task) element).removeDependentTask(oldTask); + } } } if (currTopOp != null) { List> seenOps = opProcCtx.getSeenOps(); - String currAliasId = opProcCtx.getCurrAliasId(); + String currAliasId = opProcCtx.getCurrAliasId(); if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); boolean local = false; - if (pos != -1) - local = (pos == ((mapJoinDesc)op.getConf()).getPosBigTable()) ? false : true; + if (pos != -1) { + local = (pos == ((mapJoinDesc) op.getConf()).getPosBigTable()) ? false + : true; + } setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); } currTopOp = null; opProcCtx.setCurrTopOp(currTopOp); - } - else if (opProcCtx.getCurrMapJoinOp() != null) { - MapJoinOperator mjOp = opProcCtx.getCurrMapJoinOp(); + } else if (opProcCtx.getCurrMapJoinOp() != null) { + MapJoinOperator mjOp = opProcCtx.getCurrMapJoinOp(); if (readUnionData) { initUnionPlan(opProcCtx, currTask, false); - } - else { + } else { GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(mjOp); - // In case of map-join followed by map-join, the file needs to be obtained from the old map join + // In case of map-join followed by map-join, the file needs to be + // obtained from the old map join MapJoinOperator oldMapJoin = mjCtx.getOldMapJoin(); - String taskTmpDir = null; - tableDesc tt_desc = null; + String taskTmpDir = null; + tableDesc tt_desc = null; Operator rootOp = null; if (oldMapJoin == null) { taskTmpDir = mjCtx.getTaskTmpDir(); - tt_desc = mjCtx.getTTDesc(); - rootOp = mjCtx.getRootMapJoinOp(); - } - else { + tt_desc = mjCtx.getTTDesc(); + rootOp = mjCtx.getRootMapJoinOp(); + } else { GenMRMapJoinCtx oldMjCtx = opProcCtx.getMapJoinCtx(oldMapJoin); assert oldMjCtx != null; taskTmpDir = oldMjCtx.getTaskTmpDir(); - tt_desc = oldMjCtx.getTTDesc(); - rootOp = oldMjCtx.getRootMapJoinOp(); + tt_desc = oldMjCtx.getTTDesc(); + rootOp = oldMjCtx.getRootMapJoinOp(); } - boolean local = ((pos == -1) || (pos == ((mapJoinDesc)mjOp.getConf()).getPosBigTable())) ? false : true; + boolean local = ((pos == -1) || (pos == (mjOp.getConf()) + .getPosBigTable())) ? false : true; setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); } opProcCtx.setCurrMapJoinOp(null); if ((oldTask != null) && (parTasks != null)) { - for (Task parTask : parTasks) + for (Task parTask : parTasks) { parTask.addDependentTask(currTask); + } } - if (opProcCtx.getRootTasks().contains(currTask)) + if (opProcCtx.getRootTasks().contains(currTask)) { opProcCtx.getRootTasks().remove(currTask); + } } opProcCtx.setCurrTask(currTask); @@ -352,26 +389,31 @@ /** * Split the current plan by creating a temporary destination - * @param op the reduce sink operator encountered - * @param opProcCtx processing context + * + * @param op + * the reduce sink operator encountered + * @param opProcCtx + * processing context */ public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) - throws SemanticException { + throws SemanticException { // Generate a new task mapredWork cplan = getMapRedWork(); ParseContext parseCtx = opProcCtx.getParseCtx(); - Task redTask = TaskFactory.get(cplan, parseCtx.getConf()); + Task redTask = TaskFactory.get(cplan, parseCtx + .getConf()); Operator reducer = op.getChildOperators().get(0); // Add the reducer cplan.setReducer(reducer); - reduceSinkDesc desc = (reduceSinkDesc)op.getConf(); + reduceSinkDesc desc = op.getConf(); cplan.setNumReduceTasks(new Integer(desc.getNumReducers())); - HashMap, Task> opTaskMap = opProcCtx.getOpTaskMap(); + HashMap, Task> opTaskMap = opProcCtx + .getOpTaskMap(); opTaskMap.put(reducer, redTask); - Task currTask = opProcCtx.getCurrTask(); + Task currTask = opProcCtx.getCurrTask(); splitTasks(op, currTask, redTask, opProcCtx, true, false, 0); opProcCtx.getRootOps().add(op); @@ -379,30 +421,36 @@ /** * set the current task in the mapredWork - * @param alias_id current alias - * @param topOp the top operator of the stack - * @param plan current plan - * @param local whether you need to add to map-reduce or local work - * @param opProcCtx processing context + * + * @param alias_id + * current alias + * @param topOp + * the top operator of the stack + * @param plan + * current plan + * @param local + * whether you need to add to map-reduce or local work + * @param opProcCtx + * processing context */ - public static void setTaskPlan(String alias_id, Operator topOp, - mapredWork plan, boolean local, GenMRProcContext opProcCtx) - throws SemanticException { + public static void setTaskPlan(String alias_id, + Operator topOp, mapredWork plan, boolean local, + GenMRProcContext opProcCtx) throws SemanticException { ParseContext parseCtx = opProcCtx.getParseCtx(); Set inputs = opProcCtx.getInputs(); ArrayList partDir = new ArrayList(); ArrayList partDesc = new ArrayList(); - Path tblDir = null; - tableDesc tblDesc = null; + Path tblDir = null; + tableDesc tblDesc = null; PrunedPartitionList partsList = null; try { partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp), - parseCtx.getOpToPartPruner().get(topOp), - opProcCtx.getConf(), alias_id, parseCtx.getPrunedPartitions()); + parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(), + alias_id, parseCtx.getPrunedPartitions()); } catch (SemanticException e) { throw e; } catch (HiveException e) { @@ -412,35 +460,40 @@ // Generate the map work for this alias_id Set parts = null; - // pass both confirmed and unknown partitions through the map-reduce framework + // pass both confirmed and unknown partitions through the map-reduce + // framework parts = partsList.getConfirmedPartns(); parts.addAll(partsList.getUnknownPartns()); partitionDesc aliasPartnDesc = null; - try{ - if (parts.isEmpty()) { - if (!partsList.getDeniedPartns().isEmpty()) - aliasPartnDesc = Utilities.getPartitionDesc(partsList.getDeniedPartns() - .iterator().next()); - } else { - aliasPartnDesc = Utilities.getPartitionDesc(parts.iterator().next()); - } + try { + if (parts.isEmpty()) { + if (!partsList.getDeniedPartns().isEmpty()) { + aliasPartnDesc = Utilities.getPartitionDesc(partsList + .getDeniedPartns().iterator().next()); + } + } else { + aliasPartnDesc = Utilities.getPartitionDesc(parts.iterator().next()); + } } catch (HiveException e) { - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); + LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); } // The table does not have any partitions - if (aliasPartnDesc == null) - aliasPartnDesc = new partitionDesc(Utilities.getTableDesc(parseCtx.getTopToTable().get(topOp)), null); + if (aliasPartnDesc == null) { + aliasPartnDesc = new partitionDesc(Utilities.getTableDesc(parseCtx + .getTopToTable().get(topOp)), null); + } plan.getAliasToPartnInfo().put(alias_id, aliasPartnDesc); for (Partition part : parts) { - if (part.getTable().isPartitioned()) + if (part.getTable().isPartitioned()) { inputs.add(new ReadEntity(part)); - else + } else { inputs.add(new ReadEntity(part.getTable())); + } // Later the properties have to come from the partition as opposed // to from the table in order to support versioning. @@ -449,8 +502,7 @@ if (sampleDescr != null) { paths = SamplePruner.prune(part, sampleDescr); - } - else { + } else { paths = part.getPath(); } @@ -462,23 +514,24 @@ tblDesc = Utilities.getTableDesc(part.getTable()); } - for (Path p: paths) { - if(p == null) + for (Path p : paths) { + if (p == null) { continue; + } String path = p.toString(); LOG.debug("Adding " + path + " of table" + alias_id); partDir.add(p); - try{ - partDesc.add(Utilities.getPartitionDesc(part)); + try { + partDesc.add(Utilities.getPartitionDesc(part)); } catch (HiveException e) { - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); + LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); } } } - Iterator iterPath = partDir.iterator(); + Iterator iterPath = partDir.iterator(); Iterator iterPartnDesc = partDesc.iterator(); if (!local) { @@ -499,53 +552,65 @@ assert plan.getAliasToWork().get(alias_id) == null; plan.getAliasToWork().put(alias_id, topOp); - } - else { + } else { // populate local work if needed mapredLocalWork localPlan = plan.getMapLocalWork(); - if (localPlan == null) + if (localPlan == null) { localPlan = new mapredLocalWork( new LinkedHashMap>(), new LinkedHashMap()); + } assert localPlan.getAliasToWork().get(alias_id) == null; assert localPlan.getAliasToFetchWork().get(alias_id) == null; localPlan.getAliasToWork().put(alias_id, topOp); - if (tblDir == null) - localPlan.getAliasToFetchWork().put(alias_id, new fetchWork(fetchWork.convertPathToStringArray(partDir), partDesc)); - else - localPlan.getAliasToFetchWork().put(alias_id, new fetchWork(tblDir.toString(), tblDesc)); + if (tblDir == null) { + localPlan.getAliasToFetchWork() + .put( + alias_id, + new fetchWork(fetchWork.convertPathToStringArray(partDir), + partDesc)); + } else { + localPlan.getAliasToFetchWork().put(alias_id, + new fetchWork(tblDir.toString(), tblDesc)); + } plan.setMapLocalWork(localPlan); } } - /** * set the current task in the mapredWork - * @param alias current alias - * @param topOp the top operator of the stack - * @param plan current plan - * @param local whether you need to add to map-reduce or local work - * @param tt_desc table descriptor + * + * @param alias + * current alias + * @param topOp + * the top operator of the stack + * @param plan + * current plan + * @param local + * whether you need to add to map-reduce or local work + * @param tt_desc + * table descriptor */ - public static void setTaskPlan(String path, String alias, Operator topOp, - mapredWork plan, boolean local, tableDesc tt_desc) - throws SemanticException { + public static void setTaskPlan(String path, String alias, + Operator topOp, mapredWork plan, boolean local, + tableDesc tt_desc) throws SemanticException { if (!local) { - if (plan.getPathToAliases().get(path) == null) + if (plan.getPathToAliases().get(path) == null) { plan.getPathToAliases().put(path, new ArrayList()); + } plan.getPathToAliases().get(path).add(alias); plan.getPathToPartitionInfo().put(path, new partitionDesc(tt_desc, null)); plan.getAliasToWork().put(alias, topOp); - } - else { + } else { // populate local work if needed mapredLocalWork localPlan = plan.getMapLocalWork(); - if (localPlan == null) + if (localPlan == null) { localPlan = new mapredLocalWork( - new LinkedHashMap>(), - new LinkedHashMap()); + new LinkedHashMap>(), + new LinkedHashMap()); + } assert localPlan.getAliasToWork().get(alias) == null; assert localPlan.getAliasToFetchWork().get(alias) == null; @@ -557,15 +622,20 @@ /** * set key and value descriptor - * @param plan current plan - * @param topOp current top operator in the path + * + * @param plan + * current plan + * @param topOp + * current top operator in the path */ - public static void setKeyAndValueDesc(mapredWork plan, Operator topOp) { - if (topOp == null) + public static void setKeyAndValueDesc(mapredWork plan, + Operator topOp) { + if (topOp == null) { return; + } if (topOp instanceof ReduceSinkOperator) { - ReduceSinkOperator rs = (ReduceSinkOperator)topOp; + ReduceSinkOperator rs = (ReduceSinkOperator) topOp; plan.setKeyDesc(rs.getConf().getKeySerializeInfo()); int tag = Math.max(0, rs.getConf().getTag()); List tagToSchema = plan.getTagToValueDesc(); @@ -574,9 +644,10 @@ } tagToSchema.set(tag, rs.getConf().getValueSerializeInfo()); } else { - List> children = topOp.getChildOperators(); + List> children = topOp + .getChildOperators(); if (children != null) { - for(Operator op: children) { + for (Operator op : children) { setKeyAndValueDesc(plan, op); } } @@ -585,13 +656,15 @@ /** * create a new plan and return + * * @return the new plan */ public static mapredWork getMapRedWork() { mapredWork work = new mapredWork(); work.setPathToAliases(new LinkedHashMap>()); work.setPathToPartitionInfo(new LinkedHashMap()); - work.setAliasToWork(new LinkedHashMap>()); + work + .setAliasToWork(new LinkedHashMap>()); work.setTagToValueDesc(new ArrayList()); work.setReducer(null); return work; @@ -599,13 +672,17 @@ /** * insert in the map for the operator to row resolver - * @param op operator created - * @param rr row resolver - * @param parseCtx parse context + * + * @param op + * operator created + * @param rr + * row resolver + * @param parseCtx + * parse context */ @SuppressWarnings("nls") - private static Operator putOpInsertMap(Operator op, RowResolver rr, ParseContext parseCtx) - { + private static Operator putOpInsertMap( + Operator op, RowResolver rr, ParseContext parseCtx) { OpParseContext ctx = new OpParseContext(rr); parseCtx.getOpParseCtx().put(op, ctx); return op; @@ -622,40 +699,47 @@ * @param pos position of the parent **/ public static void splitTasks(Operator op, - Task parentTask, - Task childTask, - GenMRProcContext opProcCtx, boolean setReducer, - boolean local, int posn) throws SemanticException { - mapredWork plan = (mapredWork) childTask.getWork(); + Task parentTask, + Task childTask, GenMRProcContext opProcCtx, + boolean setReducer, boolean local, int posn) throws SemanticException { + childTask.getWork(); Operator currTopOp = opProcCtx.getCurrTopOp(); ParseContext parseCtx = opProcCtx.getParseCtx(); parentTask.addDependentTask(childTask); - // Root Task cannot depend on any other task, therefore childTask cannot be a root Task + // Root Task cannot depend on any other task, therefore childTask cannot be + // a root Task List> rootTasks = opProcCtx.getRootTasks(); - if (rootTasks.contains(childTask)) + if (rootTasks.contains(childTask)) { rootTasks.remove(childTask); + } // generate the temporary file Context baseCtx = parseCtx.getContext(); String taskTmpDir = baseCtx.getMRTmpFileURI(); Operator parent = op.getParentOperators().get(posn); - tableDesc tt_desc = - PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol")); + tableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils + .getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol")); // Create a file sink operator for this file name - boolean compressIntermediate = parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE); - fileSinkDesc desc = new fileSinkDesc(taskTmpDir, tt_desc, compressIntermediate); + boolean compressIntermediate = parseCtx.getConf().getBoolVar( + HiveConf.ConfVars.COMPRESSINTERMEDIATE); + fileSinkDesc desc = new fileSinkDesc(taskTmpDir, tt_desc, + compressIntermediate); if (compressIntermediate) { - desc.setCompressCodec(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC)); - desc.setCompressType(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE)); + desc.setCompressCodec(parseCtx.getConf().getVar( + HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC)); + desc.setCompressType(parseCtx.getConf().getVar( + HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE)); } - Operator fs_op = putOpInsertMap(OperatorFactory.get(desc, parent.getSchema()), null, parseCtx); + Operator fs_op = putOpInsertMap(OperatorFactory + .get(desc, parent.getSchema()), null, parseCtx); // replace the reduce child with this operator - List> childOpList = parent.getChildOperators(); + List> childOpList = parent + .getChildOperators(); for (int pos = 0; pos < childOpList.size(); pos++) { if (childOpList.get(pos) == op) { childOpList.set(pos, fs_op); @@ -668,15 +752,16 @@ fs_op.setParentOperators(parentOpList); // create a dummy tableScan operator on top of op - Operator ts_op = - putOpInsertMap(OperatorFactory.get(tableScanDesc.class, parent.getSchema()), null, parseCtx); + Operator ts_op = putOpInsertMap(OperatorFactory + .get(tableScanDesc.class, parent.getSchema()), null, parseCtx); childOpList = new ArrayList>(); childOpList.add(op); ts_op.setChildOperators(childOpList); op.getParentOperators().set(posn, ts_op); - Map, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx(); + Map, GenMapRedCtx> mapCurrCtx = opProcCtx + .getMapCurrCtx(); mapCurrCtx.put(ts_op, new GenMapRedCtx(childTask, null, null)); String streamDesc = taskTmpDir; @@ -690,14 +775,16 @@ streamDesc = "$INTNAME"; origStreamDesc = streamDesc; int pos = 0; - while (cplan.getAliasToWork().get(streamDesc) != null) + while (cplan.getAliasToWork().get(streamDesc) != null) { streamDesc = origStreamDesc.concat(String.valueOf(++pos)); + } } // TODO: Allocate work to remove the temporary files and make that // dependent on the redTask - if (reducer.getClass() == JoinOperator.class) + if (reducer.getClass() == JoinOperator.class) { cplan.setNeedsTagging(true); + } } // Add the path to alias mapping @@ -705,18 +792,19 @@ // This can be cleaned up as a function table in future if (op instanceof MapJoinOperator) { - MapJoinOperator mjOp = (MapJoinOperator)op; + MapJoinOperator mjOp = (MapJoinOperator) op; opProcCtx.setCurrMapJoinOp(mjOp); GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(mjOp); - if (mjCtx == null) + if (mjCtx == null) { mjCtx = new GenMRMapJoinCtx(taskTmpDir, tt_desc, ts_op, null); - else { + } else { mjCtx.setTaskTmpDir(taskTmpDir); mjCtx.setTTDesc(tt_desc); mjCtx.setRootMapJoinOp(ts_op); } opProcCtx.setMapJoinCtx(mjOp, mjCtx); - opProcCtx.getMapCurrCtx().put(parent, new GenMapRedCtx(childTask, null, null)); + opProcCtx.getMapCurrCtx().put(parent, + new GenMapRedCtx(childTask, null, null)); } currTopOp = null; @@ -727,7 +815,8 @@ opProcCtx.setCurrTask(childTask); } - static public void mergeMapJoinUnion(UnionOperator union, GenMRProcContext ctx, int pos) throws SemanticException { + static public void mergeMapJoinUnion(UnionOperator union, + GenMRProcContext ctx, int pos) throws SemanticException { ParseContext parseCtx = ctx.getParseCtx(); UnionProcContext uCtx = parseCtx.getUCtx(); @@ -739,7 +828,7 @@ GenMRUnionCtx uCtxTask = ctx.getUnionTask(union); Task uTask = null; - Operator parent = union.getParentOperators().get(pos); + union.getParentOperators().get(pos); mapredWork uPlan = null; // union is encountered for the first time @@ -749,10 +838,9 @@ uTask = TaskFactory.get(uPlan, parseCtx.getConf()); uCtxTask.setUTask(uTask); ctx.setUnionTask(union, uCtxTask); - } - else { + } else { uTask = uCtxTask.getUTask(); - uPlan = (mapredWork)uTask.getWork(); + uPlan = (mapredWork) uTask.getWork(); } // If there is a mapjoin at position 'pos' @@ -762,30 +850,34 @@ if (uPlan.getPathToAliases().get(taskTmpDir) == null) { uPlan.getPathToAliases().put(taskTmpDir, new ArrayList()); uPlan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); - uPlan.getPathToPartitionInfo().put(taskTmpDir, new partitionDesc(mjCtx.getTTDesc(), null)); + uPlan.getPathToPartitionInfo().put(taskTmpDir, + new partitionDesc(mjCtx.getTTDesc(), null)); uPlan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); } - for (Task t : currTask.getParentTasks()) + for (Task t : currTask.getParentTasks()) { t.addDependentTask(uTask); + } try { boolean notDone = true; while (notDone) { - for (Task t : currTask.getParentTasks()) + for (Task t : currTask.getParentTasks()) { t.removeDependentTask(currTask); + } notDone = false; } } catch (java.util.ConcurrentModificationException e) { } + } else { + setTaskPlan(ctx.getCurrAliasId(), ctx.getCurrTopOp(), uPlan, false, ctx); } - else - setTaskPlan(ctx.getCurrAliasId(), ctx.getCurrTopOp(), uPlan, false, ctx); ctx.setCurrTask(uTask); ctx.setCurrAliasId(null); ctx.setCurrTopOp(null); ctx.setCurrMapJoinOp(null); - ctx.getMapCurrCtx().put((Operator)union, new GenMapRedCtx(ctx.getCurrTask(), null, null)); + ctx.getMapCurrCtx().put(union, + new GenMapRedCtx(ctx.getCurrTask(), null, null)); } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMROperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMROperator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMROperator.java (working copy) @@ -23,11 +23,11 @@ import java.util.Stack; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; +import org.apache.hadoop.hive.ql.parse.SemanticException; /** * Processor for the rule - no specific rule fired @@ -38,17 +38,23 @@ } /** - * Reduce Scan encountered - * @param nd the reduce sink operator encountered - * @param procCtx context + * Reduce Scan encountered + * + * @param nd + * the reduce sink operator encountered + * @param procCtx + * context */ - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - GenMRProcContext ctx = (GenMRProcContext)procCtx; + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + GenMRProcContext ctx = (GenMRProcContext) procCtx; - Map, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); - GenMapRedCtx mapredCtx = mapCurrCtx.get((Operator)stack.get(stack.size()-2)); - mapCurrCtx.put((Operator)nd, - new GenMapRedCtx(mapredCtx.getCurrTask(), mapredCtx.getCurrTopOp(), mapredCtx.getCurrAliasId())); + Map, GenMapRedCtx> mapCurrCtx = ctx + .getMapCurrCtx(); + GenMapRedCtx mapredCtx = mapCurrCtx.get(stack.get(stack.size() - 2)); + mapCurrCtx.put((Operator) nd, new GenMapRedCtx( + mapredCtx.getCurrTask(), mapredCtx.getCurrTopOp(), mapredCtx + .getCurrAliasId())); return null; } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (working copy) @@ -18,36 +18,34 @@ package org.apache.hadoop.hive.ql.optimizer; import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; -import java.util.ArrayList; +import java.util.Map; import java.util.Stack; -import java.util.Map; -import java.util.HashMap; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.UnionOperator; -import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; +import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.parse.ErrorMsg; import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; -import org.apache.hadoop.hive.ql.plan.mapJoinDesc; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.fileSinkDesc; import org.apache.hadoop.hive.ql.plan.mapredWork; -import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.tableDesc; -import org.apache.hadoop.hive.ql.plan.fileSinkDesc; -import org.apache.hadoop.hive.conf.HiveConf; /** * Operator factory for MapJoin processing @@ -58,13 +56,14 @@ int pos = 0; int size = stack.size(); assert size >= 2 && stack.get(size - 1) == op; - Operator parent = (Operator)stack.get(size - 2); + Operator parent = (Operator) stack + .get(size - 2); List> parOp = op.getParentOperators(); pos = parOp.indexOf(parent); - assert pos < parOp.size(); + assert pos < parOp.size(); return pos; } - + /** * TableScan followed by MapJoin */ @@ -73,43 +72,49 @@ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - MapJoinOperator mapJoin = (MapJoinOperator)nd; - GenMRProcContext ctx = (GenMRProcContext)procCtx; + MapJoinOperator mapJoin = (MapJoinOperator) nd; + GenMRProcContext ctx = (GenMRProcContext) procCtx; // find the branch on which this processor was invoked int pos = getPositionParent(mapJoin, stack); - Map, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); - GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get(pos)); - Task currTask = mapredCtx.getCurrTask(); + Map, GenMapRedCtx> mapCurrCtx = ctx + .getMapCurrCtx(); + GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get( + pos)); + Task currTask = mapredCtx.getCurrTask(); mapredWork currPlan = (mapredWork) currTask.getWork(); - Operator currTopOp = mapredCtx.getCurrTopOp(); + Operator currTopOp = mapredCtx.getCurrTopOp(); String currAliasId = mapredCtx.getCurrAliasId(); Operator reducer = mapJoin; - HashMap, Task> opTaskMap = ctx.getOpTaskMap(); + HashMap, Task> opTaskMap = ctx + .getOpTaskMap(); Task opMapTask = opTaskMap.get(reducer); - + ctx.setCurrTopOp(currTopOp); ctx.setCurrAliasId(currAliasId); ctx.setCurrTask(currTask); - + // If the plan for this reducer does not exist, initialize the plan if (opMapTask == null) { assert currPlan.getReducer() == null; GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, false, false, false, pos); } - // The current plan can be thrown away after being merged with the original plan + // The current plan can be thrown away after being merged with the + // original plan else { - GenMapRedUtils.joinPlan(mapJoin, null, opMapTask, ctx, pos, false, false, false); + GenMapRedUtils.joinPlan(mapJoin, null, opMapTask, ctx, pos, false, + false, false); currTask = opMapTask; ctx.setCurrTask(currTask); } - - mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId())); + + mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx + .getCurrTopOp(), ctx.getCurrAliasId())); return null; } } - + /** * ReduceSink followed by MapJoin */ @@ -118,37 +123,43 @@ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - MapJoinOperator mapJoin = (MapJoinOperator)nd; - GenMRProcContext opProcCtx = (GenMRProcContext)procCtx; - + MapJoinOperator mapJoin = (MapJoinOperator) nd; + GenMRProcContext opProcCtx = (GenMRProcContext) procCtx; + mapredWork cplan = GenMapRedUtils.getMapRedWork(); ParseContext parseCtx = opProcCtx.getParseCtx(); - Task redTask = TaskFactory.get(cplan, parseCtx.getConf()); + Task redTask = TaskFactory.get(cplan, parseCtx + .getConf()); Task currTask = opProcCtx.getCurrTask(); // find the branch on which this processor was invoked int pos = getPositionParent(mapJoin, stack); - boolean local = (pos == ((mapJoinDesc)mapJoin.getConf()).getPosBigTable()) ? false : true; - - GenMapRedUtils.splitTasks(mapJoin, currTask, redTask, opProcCtx, false, local, pos); + boolean local = (pos == (mapJoin.getConf()).getPosBigTable()) ? false + : true; + GenMapRedUtils.splitTasks(mapJoin, currTask, redTask, opProcCtx, false, + local, pos); + currTask = opProcCtx.getCurrTask(); - HashMap, Task> opTaskMap = opProcCtx.getOpTaskMap(); + HashMap, Task> opTaskMap = opProcCtx + .getOpTaskMap(); Task opMapTask = opTaskMap.get(mapJoin); - + // If the plan for this reducer does not exist, initialize the plan if (opMapTask == null) { assert cplan.getReducer() == null; opTaskMap.put(mapJoin, currTask); opProcCtx.setCurrMapJoinOp(null); } - // The current plan can be thrown away after being merged with the original plan + // The current plan can be thrown away after being merged with the + // original plan else { - GenMapRedUtils.joinPlan(mapJoin, currTask, opMapTask, opProcCtx, pos, false, false, false); + GenMapRedUtils.joinPlan(mapJoin, currTask, opMapTask, opProcCtx, pos, + false, false, false); currTask = opMapTask; opProcCtx.setCurrTask(currTask); } - + return null; } } @@ -159,87 +170,93 @@ public static class MapJoin implements NodeProcessor { /** - * Create a task by splitting the plan below the join. The reason, we have to do so in the - * processing of Select and not MapJoin is due to the walker. While processing a node, it is not safe - * to alter its children because that will decide the course of the walk. It is perfectly fine to muck around - * with its parents though, since those nodes have already been visited. + * Create a task by splitting the plan below the join. The reason, we have + * to do so in the processing of Select and not MapJoin is due to the + * walker. While processing a node, it is not safe to alter its children + * because that will decide the course of the walk. It is perfectly fine to + * muck around with its parents though, since those nodes have already been + * visited. */ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - - SelectOperator sel = (SelectOperator)nd; - MapJoinOperator mapJoin = (MapJoinOperator)sel.getParentOperators().get(0); + + SelectOperator sel = (SelectOperator) nd; + MapJoinOperator mapJoin = (MapJoinOperator) sel.getParentOperators().get( + 0); assert sel.getParentOperators().size() == 1; - - GenMRProcContext ctx = (GenMRProcContext)procCtx; + + GenMRProcContext ctx = (GenMRProcContext) procCtx; ParseContext parseCtx = ctx.getParseCtx(); - + // is the mapjoin followed by a reducer - List listMapJoinOps = parseCtx.getListMapJoinOpsNoReducer(); - + List listMapJoinOps = parseCtx + .getListMapJoinOpsNoReducer(); + if (listMapJoinOps.contains(mapJoin)) { ctx.setCurrAliasId(null); ctx.setCurrTopOp(null); - Map, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); - mapCurrCtx.put((Operator)nd, new GenMapRedCtx(ctx.getCurrTask(), null, null)); + Map, GenMapRedCtx> mapCurrCtx = ctx + .getMapCurrCtx(); + mapCurrCtx.put((Operator) nd, new GenMapRedCtx( + ctx.getCurrTask(), null, null)); return null; } ctx.setCurrMapJoinOp(mapJoin); - + Task currTask = ctx.getCurrTask(); GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin); if (mjCtx == null) { mjCtx = new GenMRMapJoinCtx(); ctx.setMapJoinCtx(mapJoin, mjCtx); } - + mapredWork mjPlan = GenMapRedUtils.getMapRedWork(); - Task mjTask = TaskFactory.get(mjPlan, parseCtx.getConf()); - - tableDesc tt_desc = - PlanUtils.getIntermediateFileTableDesc( - PlanUtils.getFieldSchemasFromRowSchema(mapJoin.getSchema(), "temporarycol")); - + Task mjTask = TaskFactory.get(mjPlan, parseCtx + .getConf()); + + tableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils + .getFieldSchemasFromRowSchema(mapJoin.getSchema(), "temporarycol")); + // generate the temporary file Context baseCtx = parseCtx.getContext(); String taskTmpDir = baseCtx.getMRTmpFileURI(); - + // Add the path to alias mapping mjCtx.setTaskTmpDir(taskTmpDir); mjCtx.setTTDesc(tt_desc); mjCtx.setRootMapJoinOp(sel); - + sel.setParentOperators(null); - + // Create a file sink operator for this file name - Operator fs_op = - OperatorFactory.get - (new fileSinkDesc(taskTmpDir, tt_desc, - parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)), - mapJoin.getSchema()); - + Operator fs_op = OperatorFactory.get( + new fileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar( + HiveConf.ConfVars.COMPRESSINTERMEDIATE)), mapJoin.getSchema()); + assert mapJoin.getChildOperators().size() == 1; mapJoin.getChildOperators().set(0, fs_op); - + List> parentOpList = new ArrayList>(); parentOpList.add(mapJoin); fs_op.setParentOperators(parentOpList); - + currTask.addDependentTask(mjTask); - + ctx.setCurrTask(mjTask); ctx.setCurrAliasId(null); ctx.setCurrTopOp(null); - - Map, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); - mapCurrCtx.put((Operator)nd, new GenMapRedCtx(ctx.getCurrTask(), null, null)); - + + Map, GenMapRedCtx> mapCurrCtx = ctx + .getMapCurrCtx(); + mapCurrCtx.put((Operator) nd, new GenMapRedCtx( + ctx.getCurrTask(), null, null)); + return null; } } - + /** * MapJoin followed by MapJoin */ @@ -248,50 +265,57 @@ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - MapJoinOperator mapJoin = (MapJoinOperator)nd; - GenMRProcContext ctx = (GenMRProcContext)procCtx; + MapJoinOperator mapJoin = (MapJoinOperator) nd; + GenMRProcContext ctx = (GenMRProcContext) procCtx; - ParseContext parseCtx = ctx.getParseCtx(); + ctx.getParseCtx(); MapJoinOperator oldMapJoin = ctx.getCurrMapJoinOp(); assert oldMapJoin != null; GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin); - if (mjCtx != null) + if (mjCtx != null) { mjCtx.setOldMapJoin(oldMapJoin); - else - ctx.setMapJoinCtx(mapJoin, new GenMRMapJoinCtx(null, null, null, oldMapJoin)); + } else { + ctx.setMapJoinCtx(mapJoin, new GenMRMapJoinCtx(null, null, null, + oldMapJoin)); + } ctx.setCurrMapJoinOp(mapJoin); // find the branch on which this processor was invoked int pos = getPositionParent(mapJoin, stack); - Map, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); - GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get(pos)); - Task currTask = mapredCtx.getCurrTask(); + Map, GenMapRedCtx> mapCurrCtx = ctx + .getMapCurrCtx(); + GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get( + pos)); + Task currTask = mapredCtx.getCurrTask(); mapredWork currPlan = (mapredWork) currTask.getWork(); - String currAliasId = mapredCtx.getCurrAliasId(); + mapredCtx.getCurrAliasId(); Operator reducer = mapJoin; - HashMap, Task> opTaskMap = ctx.getOpTaskMap(); + HashMap, Task> opTaskMap = ctx + .getOpTaskMap(); Task opMapTask = opTaskMap.get(reducer); - + ctx.setCurrTask(currTask); - + // If the plan for this reducer does not exist, initialize the plan if (opMapTask == null) { assert currPlan.getReducer() == null; GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, true, false, false, pos); } - // The current plan can be thrown away after being merged with the original plan + // The current plan can be thrown away after being merged with the + // original plan else { - GenMapRedUtils.joinPlan(mapJoin, currTask, opMapTask, ctx, pos, false, true, false); + GenMapRedUtils.joinPlan(mapJoin, currTask, opMapTask, ctx, pos, false, + true, false); currTask = opMapTask; ctx.setCurrTask(currTask); } - + mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), null, null)); return null; } } - + /** * Union followed by MapJoin */ @@ -300,36 +324,43 @@ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - GenMRProcContext ctx = (GenMRProcContext)procCtx; + GenMRProcContext ctx = (GenMRProcContext) procCtx; ParseContext parseCtx = ctx.getParseCtx(); UnionProcContext uCtx = parseCtx.getUCtx(); // union was map only - no special processing needed - if (uCtx.isMapOnlySubq()) - return (new TableScanMapJoin()).process(nd, stack, procCtx, nodeOutputs); - + if (uCtx.isMapOnlySubq()) { + return (new TableScanMapJoin()) + .process(nd, stack, procCtx, nodeOutputs); + } + UnionOperator currUnion = ctx.getCurrUnionOp(); assert currUnion != null; - GenMRUnionCtx unionCtx = ctx.getUnionTask(currUnion); - MapJoinOperator mapJoin = (MapJoinOperator)nd; + ctx.getUnionTask(currUnion); + MapJoinOperator mapJoin = (MapJoinOperator) nd; // find the branch on which this processor was invoked int pos = getPositionParent(mapJoin, stack); - Map, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); - GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get(pos)); - Task currTask = mapredCtx.getCurrTask(); + Map, GenMapRedCtx> mapCurrCtx = ctx + .getMapCurrCtx(); + GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get( + pos)); + Task currTask = mapredCtx.getCurrTask(); mapredWork currPlan = (mapredWork) currTask.getWork(); Operator reducer = mapJoin; - HashMap, Task> opTaskMap = ctx.getOpTaskMap(); + HashMap, Task> opTaskMap = ctx + .getOpTaskMap(); Task opMapTask = opTaskMap.get(reducer); - + // union result cannot be a map table - boolean local = (pos == ((mapJoinDesc)mapJoin.getConf()).getPosBigTable()) ? false : true; - if (local) + boolean local = (pos == (mapJoin.getConf()).getPosBigTable()) ? false + : true; + if (local) { throw new SemanticException(ErrorMsg.INVALID_MAPJOIN_TABLE.getMsg()); - + } + // If the plan for this reducer does not exist, initialize the plan if (opMapTask == null) { assert currPlan.getReducer() == null; @@ -337,26 +368,32 @@ GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, true, true, false, pos); ctx.setCurrUnionOp(null); } - // The current plan can be thrown away after being merged with the original plan + // The current plan can be thrown away after being merged with the + // original plan else { - Task uTask = ctx.getUnionTask(ctx.getCurrUnionOp()).getUTask(); - if (uTask.getId().equals(opMapTask.getId())) - GenMapRedUtils.joinPlan(mapJoin, null, opMapTask, ctx, pos, false, false, true); - else - GenMapRedUtils.joinPlan(mapJoin, uTask, opMapTask, ctx, pos, false, false, true); + Task uTask = ctx.getUnionTask( + ctx.getCurrUnionOp()).getUTask(); + if (uTask.getId().equals(opMapTask.getId())) { + GenMapRedUtils.joinPlan(mapJoin, null, opMapTask, ctx, pos, false, + false, true); + } else { + GenMapRedUtils.joinPlan(mapJoin, uTask, opMapTask, ctx, pos, false, + false, true); + } currTask = opMapTask; ctx.setCurrTask(currTask); } - - mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId())); + + mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx + .getCurrTopOp(), ctx.getCurrAliasId())); return null; } } - + public static NodeProcessor getTableScanMapJoin() { return new TableScanMapJoin(); } - + public static NodeProcessor getUnionMapJoin() { return new UnionMapJoin(); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java (working copy) @@ -23,6 +23,7 @@ import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; + import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ScriptOperator; @@ -30,28 +31,27 @@ import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; /** - * Implementation of one of the rule-based optimization steps. ColumnPruner gets the current operator tree. The \ - * tree is traversed to find out the columns used - * for all the base tables. If all the columns for a table are not used, a select is pushed on top of that table - * (to select only those columns). Since this - * changes the row resolver, the tree is built again. This can be optimized later to patch the tree. + * Implementation of one of the rule-based optimization steps. ColumnPruner gets + * the current operator tree. The \ tree is traversed to find out the columns + * used for all the base tables. If all the columns for a table are not used, a + * select is pushed on top of that table (to select only those columns). Since + * this changes the row resolver, the tree is built again. This can be optimized + * later to patch the tree. */ public class ColumnPruner implements Transform { protected ParseContext pGraphContext; private HashMap, OpParseContext> opToParseCtxMap; - /** * empty constructor */ @@ -60,54 +60,55 @@ } /** - * update the map between operator and row resolver - * @param op operator being inserted - * @param rr row resolver of the operator - * @return + * Transform the query tree. For each table under consideration, check if all + * columns are needed. If not, only select the operators needed at the + * beginning and proceed + * + * @param pactx + * the current parse context */ - @SuppressWarnings("nls") - private Operator putOpInsertMap(Operator op, RowResolver rr) { - OpParseContext ctx = new OpParseContext(rr); - pGraphContext.getOpParseCtx().put(op, ctx); - return op; - } - - /** - * Transform the query tree. For each table under consideration, check if all columns are needed. If not, - * only select the operators needed at the beginning and proceed - * @param pactx the current parse context - */ public ParseContext transform(ParseContext pactx) throws SemanticException { - this.pGraphContext = pactx; - this.opToParseCtxMap = pGraphContext.getOpParseCtx(); + pGraphContext = pactx; + opToParseCtxMap = pGraphContext.getOpParseCtx(); // generate pruned column list for all relevant operators ColumnPrunerProcCtx cppCtx = new ColumnPrunerProcCtx(opToParseCtxMap); - - // create a walker which walks the tree in a DFS manner while maintaining the operator stack. The dispatcher + + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. The dispatcher // generates the plan from the operator tree Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", "FIL%"), ColumnPrunerProcFactory.getFilterProc()); - opRules.put(new RuleRegExp("R2", "GBY%"), ColumnPrunerProcFactory.getGroupByProc()); - opRules.put(new RuleRegExp("R3", "RS%"), ColumnPrunerProcFactory.getReduceSinkProc()); - opRules.put(new RuleRegExp("R4", "SEL%"), ColumnPrunerProcFactory.getSelectProc()); - opRules.put(new RuleRegExp("R5", "JOIN%"), ColumnPrunerProcFactory.getJoinProc()); - opRules.put(new RuleRegExp("R6", "MAPJOIN%"), ColumnPrunerProcFactory.getMapJoinProc()); - opRules.put(new RuleRegExp("R7", "TS%"), ColumnPrunerProcFactory.getTableScanProc()); + opRules.put(new RuleRegExp("R1", "FIL%"), ColumnPrunerProcFactory + .getFilterProc()); + opRules.put(new RuleRegExp("R2", "GBY%"), ColumnPrunerProcFactory + .getGroupByProc()); + opRules.put(new RuleRegExp("R3", "RS%"), ColumnPrunerProcFactory + .getReduceSinkProc()); + opRules.put(new RuleRegExp("R4", "SEL%"), ColumnPrunerProcFactory + .getSelectProc()); + opRules.put(new RuleRegExp("R5", "JOIN%"), ColumnPrunerProcFactory + .getJoinProc()); + opRules.put(new RuleRegExp("R6", "MAPJOIN%"), ColumnPrunerProcFactory + .getMapJoinProc()); + opRules.put(new RuleRegExp("R7", "TS%"), ColumnPrunerProcFactory + .getTableScanProc()); - // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(ColumnPrunerProcFactory.getDefaultProc(), opRules, cppCtx); + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(ColumnPrunerProcFactory + .getDefaultProc(), opRules, cppCtx); GraphWalker ogw = new ColumnPrunerWalker(disp); - + // Create a list of topop nodes ArrayList topNodes = new ArrayList(); topNodes.addAll(pGraphContext.getTopOps().values()); ogw.startWalking(topNodes, null); return pGraphContext; } - + /** - * Walks the op tree in post order fashion (skips selects with file sink or script op children) + * Walks the op tree in post order fashion (skips selects with file sink or + * script op children) */ public static class ColumnPrunerWalker extends DefaultGraphWalker { @@ -123,18 +124,20 @@ boolean walkChildren = true; opStack.push(nd); - // no need to go further down for a select op with a file sink or script child + // no need to go further down for a select op with a file sink or script + // child // since all cols are needed for these ops - if(nd instanceof SelectOperator) { - for(Node child: nd.getChildren()) { - if ((child instanceof FileSinkOperator) || (child instanceof ScriptOperator)) + if (nd instanceof SelectOperator) { + for (Node child : nd.getChildren()) { + if ((child instanceof FileSinkOperator) + || (child instanceof ScriptOperator)) { walkChildren = false; + } } } - if((nd.getChildren() == null) - || getDispatchedList().containsAll(nd.getChildren()) - || !walkChildren) { + if ((nd.getChildren() == null) + || getDispatchedList().containsAll(nd.getChildren()) || !walkChildren) { // all children are done or no need to walk the children dispatch(nd, opStack); opStack.pop(); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (working copy) @@ -130,13 +130,14 @@ throws SemanticException { // if this is not a HASH groupby, return - if (curr.getConf().getMode() != groupByDesc.Mode.HASH) + if (curr.getConf().getMode() != groupByDesc.Mode.HASH) { return; + } - Set tblNames = this.pGraphContext.getGroupOpToInputTables().get( - curr); - if (tblNames == null || tblNames.size() == 0) + Set tblNames = pGraphContext.getGroupOpToInputTables().get(curr); + if (tblNames == null || tblNames.size() == 0) { return; + } boolean bucketGroupBy = true; groupByDesc desc = curr.getConf(); @@ -144,7 +145,7 @@ groupByKeys.addAll(desc.getKeys()); // compute groupby columns from groupby keys List groupByCols = new ArrayList(); - while (groupByKeys.size() >0) { + while (groupByKeys.size() > 0) { exprNodeDesc node = groupByKeys.remove(0); if (node instanceof exprNodeColumnDesc) { groupByCols.addAll(node.getCols()); @@ -155,22 +156,24 @@ groupByKeys.add(0, ((exprNodeFieldDesc) node).getDesc()); continue; } else if (node instanceof exprNodeGenericFuncDesc) { - exprNodeGenericFuncDesc udfNode = ((exprNodeGenericFuncDesc)node); + exprNodeGenericFuncDesc udfNode = ((exprNodeGenericFuncDesc) node); GenericUDF udf = udfNode.getGenericUDF(); - if(!FunctionRegistry.isDeterministic(udf)) + if (!FunctionRegistry.isDeterministic(udf)) { return; + } groupByKeys.addAll(0, udfNode.getChildExprs()); } else { return; } } - - if(groupByCols.size() == 0) + + if (groupByCols.size() == 0) { return; + } for (String table : tblNames) { - Operator topOp = this.pGraphContext.getTopOps() - .get(table); + Operator topOp = pGraphContext.getTopOps().get( + table); if (topOp == null || (!(topOp instanceof TableScanOperator))) { // this is in a sub-query. // In future, we need to infer subq's columns propery. For example @@ -180,21 +183,25 @@ return; } TableScanOperator ts = (TableScanOperator) topOp; - Table destTable = this.pGraphContext.getTopToTable().get(ts); - if (destTable == null) + Table destTable = pGraphContext.getTopToTable().get(ts); + if (destTable == null) { return; + } if (!destTable.isPartitioned()) { List bucketCols = destTable.getBucketCols(); - List sortCols = Utilities.getColumnNamesFromSortCols(destTable.getSortCols()); - bucketGroupBy = matchBucketOrSortedColumns(groupByCols, bucketCols, sortCols); - if (!bucketGroupBy) + List sortCols = Utilities + .getColumnNamesFromSortCols(destTable.getSortCols()); + bucketGroupBy = matchBucketOrSortedColumns(groupByCols, bucketCols, + sortCols); + if (!bucketGroupBy) { return; + } } else { PrunedPartitionList partsList = null; try { - partsList = PartitionPruner.prune(destTable, this.pGraphContext - .getOpToPartPruner().get(ts), this.pGraphContext.getConf(), - table, this.pGraphContext.getPrunedPartitions()); + partsList = PartitionPruner.prune(destTable, pGraphContext + .getOpToPartPruner().get(ts), pGraphContext.getConf(), table, + pGraphContext.getPrunedPartitions()); } catch (HiveException e) { // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils @@ -206,10 +213,13 @@ parts.addAll(partsList.getUnknownPartns()); for (Partition part : parts) { List bucketCols = part.getBucketCols(); - List sortCols = Utilities.getColumnNamesFromSortCols(part.getTPartition().getSd().getSortCols()); - bucketGroupBy = matchBucketOrSortedColumns(groupByCols, bucketCols, sortCols); - if (!bucketGroupBy) + List sortCols = Utilities.getColumnNamesFromSortCols(part + .getTPartition().getSd().getSortCols()); + bucketGroupBy = matchBucketOrSortedColumns(groupByCols, bucketCols, + sortCols); + if (!bucketGroupBy) { return; + } } } } @@ -235,26 +245,29 @@ * @throws SemanticException */ private boolean matchBucketOrSortedColumns(List groupByCols, - List bucketCols, List sortCols) throws SemanticException { + List bucketCols, List sortCols) + throws SemanticException { boolean ret = false; - + if (sortCols == null || sortCols.size() == 0) { ret = matchBucketColumns(groupByCols, bucketCols); } - + if (!ret && sortCols != null && sortCols.size() >= groupByCols.size()) { // check sort columns, if groupByCols is a prefix subset of sort // columns, we will use sorted group by. For example, if data is sorted // by column a, b, c, and a query wants to group by b,a, we will use - // sorted group by. But if the query wants to groupby b,c, then sorted group by can not be used. + // sorted group by. But if the query wants to groupby b,c, then sorted + // group by can not be used. int num = groupByCols.size(); - for(int i =0;i (num -1)) + for (int i = 0; i < num; i++) { + if (sortCols.indexOf(groupByCols.get(i)) > (num - 1)) { return false; + } } return true; } - + return ret; } @@ -267,13 +280,15 @@ List tblBucketCols) throws SemanticException { if (tblBucketCols == null || tblBucketCols.size() == 0 - || grpCols.size() == 0 || grpCols.size() != tblBucketCols.size()) + || grpCols.size() == 0 || grpCols.size() != tblBucketCols.size()) { return false; + } for (int i = 0; i < grpCols.size(); i++) { String tblCol = grpCols.get(i); - if (!tblBucketCols.contains(tblCol)) + if (!tblBucketCols.contains(tblCol)) { return false; + } } return true; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java (working copy) @@ -18,18 +18,18 @@ package org.apache.hadoop.hive.ql.optimizer.unionproc; +import java.util.HashMap; import java.util.Map; -import java.util.HashMap; +import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.exec.UnionOperator; public class UnionProcContext implements NodeProcessorCtx { public static class UnionParseContext { - transient private boolean[] mapOnlySubq; - transient private boolean[] rootTask; - transient private boolean[] mapJoinSubq; + transient private final boolean[] mapOnlySubq; + transient private final boolean[] rootTask; + transient private final boolean[] mapJoinSubq; transient private int numInputs; transient private boolean mapJoinQuery; @@ -37,7 +37,7 @@ public UnionParseContext(int numInputs) { this.numInputs = numInputs; mapOnlySubq = new boolean[numInputs]; - rootTask = new boolean[numInputs]; + rootTask = new boolean[numInputs]; mapJoinSubq = new boolean[numInputs]; } @@ -55,8 +55,9 @@ public void setMapJoinSubq(int pos, boolean mapJoinSubq) { this.mapJoinSubq[pos] = mapJoinSubq; - if (mapJoinSubq) + if (mapJoinSubq) { mapJoinQuery = true; + } } public boolean getMapJoinQuery() { @@ -91,13 +92,14 @@ } /** - * @param mapOnlySubq the mapOnlySubq to set + * @param mapOnlySubq + * the mapOnlySubq to set */ public void setMapOnlySubq(boolean mapOnlySubq) { this.mapOnlySubq = mapOnlySubq; } - private Map uCtxMap; + private final Map uCtxMap; public UnionProcContext() { uCtxMap = new HashMap(); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java (working copy) @@ -21,13 +21,13 @@ import java.util.List; import java.util.Stack; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext; /** * Operator factory for union processing @@ -38,8 +38,10 @@ int pos = 0; int size = stack.size(); assert size >= 2 && stack.get(size - 1) == union; - Operator parent = (Operator)stack.get(size - 2); - List> parUnion = union.getParentOperators(); + Operator parent = (Operator) stack + .get(size - 2); + List> parUnion = union + .getParentOperators(); pos = parUnion.indexOf(parent); assert pos < parUnion.size(); return pos; @@ -53,14 +55,15 @@ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - UnionOperator union = (UnionOperator)nd; + UnionOperator union = (UnionOperator) nd; UnionProcContext ctx = (UnionProcContext) procCtx; // find the branch on which this processor was invoked int pos = getPositionParent(union, stack); UnionParseContext uCtx = ctx.getUnionParseContext(union); - if (uCtx == null) + if (uCtx == null) { uCtx = new UnionParseContext(union.getConf().getNumInputs()); + } ctx.setMapOnlySubq(false); uCtx.setMapOnlySubq(pos, false); @@ -78,14 +81,15 @@ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - UnionOperator union = (UnionOperator)nd; + UnionOperator union = (UnionOperator) nd; UnionProcContext ctx = (UnionProcContext) procCtx; // find the branch on which this processor was invoked int pos = getPositionParent(union, stack); UnionParseContext uCtx = ctx.getUnionParseContext(union); - if (uCtx == null) + if (uCtx == null) { uCtx = new UnionParseContext(union.getConf().getNumInputs()); + } uCtx.setMapOnlySubq(pos, true); uCtx.setRootTask(pos, true); @@ -102,14 +106,15 @@ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - UnionOperator union = (UnionOperator)nd; + UnionOperator union = (UnionOperator) nd; UnionProcContext ctx = (UnionProcContext) procCtx; // find the branch on which this processor was invoked int pos = getPositionParent(union, stack); UnionParseContext uCtx = ctx.getUnionParseContext(union); - if (uCtx == null) + if (uCtx == null) { uCtx = new UnionParseContext(union.getConf().getNumInputs()); + } uCtx.setMapJoinSubq(pos, true); ctx.setUnionParseContext(union, uCtx); @@ -125,14 +130,15 @@ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - UnionOperator union = (UnionOperator)nd; + UnionOperator union = (UnionOperator) nd; UnionProcContext ctx = (UnionProcContext) procCtx; // find the branch on which this processor was invoked int pos = getPositionParent(union, stack); UnionParseContext uCtx = ctx.getUnionParseContext(union); - if (uCtx == null) + if (uCtx == null) { uCtx = new UnionParseContext(union.getConf().getNumInputs()); + } uCtx.setMapOnlySubq(pos, true); uCtx.setRootTask(pos, false); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java (working copy) @@ -21,26 +21,27 @@ import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.Map; -import org.apache.hadoop.hive.ql.lib.PreOrderWalker; + import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.PreOrderWalker; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.optimizer.Transform; /** * Implementation of the union processor. This can be enhanced later on. - * Currently, it does the following: - * Identify if both the subqueries of UNION are map-only. - * Store that fact in the unionDesc/UnionOperator. - * If either of the sub-query involves a map-reduce job, a FS is introduced on top of the UNION. - * This can be later optimized to clone all the operators above the UNION. - + * Currently, it does the following: Identify if both the subqueries of UNION + * are map-only. Store that fact in the unionDesc/UnionOperator. If either of + * the sub-query involves a map-reduce job, a FS is introduced on top of the + * UNION. This can be later optimized to clone all the operators above the + * UNION. + * * The parse Context is not changed. */ public class UnionProcessor implements Transform { @@ -48,24 +49,34 @@ /** * empty constructor */ - public UnionProcessor() { } + public UnionProcessor() { + } /** * Transform the query tree. For each union, store the fact whether both the * sub-queries are map-only - * @param pCtx the current parse context + * + * @param pCtx + * the current parse context */ public ParseContext transform(ParseContext pCtx) throws SemanticException { - // create a walker which walks the tree in a DFS manner while maintaining the operator stack. + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp(new String("R1"), "RS%.*UNION%"), UnionProcFactory.getMapRedUnion()); - opRules.put(new RuleRegExp(new String("R2"), "UNION%.*UNION%"), UnionProcFactory.getUnknownUnion()); - opRules.put(new RuleRegExp(new String("R3"), "TS%.*UNION%"), UnionProcFactory.getMapUnion()); - opRules.put(new RuleRegExp(new String("R3"), "MAPJOIN%.*UNION%"), UnionProcFactory.getMapJoinUnion()); + opRules.put(new RuleRegExp(new String("R1"), "RS%.*UNION%"), + UnionProcFactory.getMapRedUnion()); + opRules.put(new RuleRegExp(new String("R2"), "UNION%.*UNION%"), + UnionProcFactory.getUnknownUnion()); + opRules.put(new RuleRegExp(new String("R3"), "TS%.*UNION%"), + UnionProcFactory.getMapUnion()); + opRules.put(new RuleRegExp(new String("R3"), "MAPJOIN%.*UNION%"), + UnionProcFactory.getMapJoinUnion()); - // The dispatcher fires the processor for the matching rule and passes the context along + // The dispatcher fires the processor for the matching rule and passes the + // context along UnionProcContext uCtx = new UnionProcContext(); - Dispatcher disp = new DefaultRuleDispatcher(UnionProcFactory.getNoUnion(), opRules, uCtx); + Dispatcher disp = new DefaultRuleDispatcher(UnionProcFactory.getNoUnion(), + opRules, uCtx); GraphWalker ogw = new PreOrderWalker(disp); // Create a list of topop nodes Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java (working copy) @@ -25,8 +25,6 @@ import java.util.Map; import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; -import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -40,24 +38,24 @@ * This class implements the processor context for Column Pruner. */ public class ColumnPrunerProcCtx implements NodeProcessorCtx { - - private Map,List> prunedColLists; - - private HashMap, OpParseContext> opToParseCtxMap; - - private Map>> joinPrunedColLists; - - public ColumnPrunerProcCtx(HashMap, OpParseContext> opToParseContextMap) { + private final Map, List> prunedColLists; + + private final HashMap, OpParseContext> opToParseCtxMap; + + private final Map>> joinPrunedColLists; + + public ColumnPrunerProcCtx( + HashMap, OpParseContext> opToParseContextMap) { prunedColLists = new HashMap, List>(); - this.opToParseCtxMap = opToParseContextMap; - joinPrunedColLists = new HashMap>>(); + opToParseCtxMap = opToParseContextMap; + joinPrunedColLists = new HashMap>>(); } public Map>> getJoinPrunedColLists() { return joinPrunedColLists; } - + /** * @return the prunedColLists */ @@ -68,30 +66,31 @@ public HashMap, OpParseContext> getOpToParseCtxMap() { return opToParseCtxMap; } - + public Map, List> getPrunedColLists() { return prunedColLists; } - + /** - * Creates the list of internal column names(these names are used in the RowResolver and - * are different from the external column names) that are needed in the subtree. These columns - * eventually have to be selected from the table scan. + * Creates the list of internal column names(these names are used in the + * RowResolver and are different from the external column names) that are + * needed in the subtree. These columns eventually have to be selected from + * the table scan. * - * @param curOp The root of the operator subtree. + * @param curOp + * The root of the operator subtree. * @return List of the internal column names. * @throws SemanticException */ - public List genColLists(Operator curOp) throws SemanticException { + public List genColLists(Operator curOp) + throws SemanticException { List colList = new ArrayList(); - if(curOp.getChildOperators() != null) { + if (curOp.getChildOperators() != null) { for (Operator child : curOp.getChildOperators()) { if (child instanceof CommonJoinOperator) { int tag = child.getParentOperators().indexOf(curOp); - List prunList = joinPrunedColLists.get((CommonJoinOperator) child).get( - (byte) tag); - colList = Utilities - .mergeUniqElems(colList, prunList); + List prunList = joinPrunedColLists.get(child).get((byte) tag); + colList = Utilities.mergeUniqElems(colList, prunList); } else { colList = Utilities .mergeUniqElems(colList, prunedColLists.get(child)); @@ -100,52 +99,60 @@ } return colList; } - + /** - * Creates the list of internal column names from select expressions in a select operator. - * This function is used for the select operator instead of the genColLists function (which is - * used by the rest of the operators). + * Creates the list of internal column names from select expressions in a + * select operator. This function is used for the select operator instead of + * the genColLists function (which is used by the rest of the operators). * - * @param op The select operator. + * @param op + * The select operator. * @return List of the internal column names. */ public List getColsFromSelectExpr(SelectOperator op) { List cols = new ArrayList(); selectDesc conf = op.getConf(); ArrayList exprList = conf.getColList(); - for (exprNodeDesc expr : exprList) + for (exprNodeDesc expr : exprList) { cols = Utilities.mergeUniqElems(cols, expr.getCols()); + } return cols; } /** * Creates the list of internal column names for select * expressions. * - * @param op The select operator. - * @param colList The list of internal column names returned by the children of the select operator. + * @param op + * The select operator. + * @param colList + * The list of internal column names returned by the children of the + * select operator. * @return List of the internal column names. */ - public List getSelectColsFromChildren(SelectOperator op, List colList) { + public List getSelectColsFromChildren(SelectOperator op, + List colList) { List cols = new ArrayList(); selectDesc conf = op.getConf(); - - if(conf.isSelStarNoCompute()){ + + if (conf.isSelStarNoCompute()) { cols.addAll(colList); return cols; } - + ArrayList selectExprs = conf.getColList(); - - // The colList is the output columns used by child operators, they are different - // from input columns of the current operator. we need to find out which input columns are used. + + // The colList is the output columns used by child operators, they are + // different + // from input columns of the current operator. we need to find out which + // input columns are used. ArrayList outputColumnNames = conf.getOutputColumnNames(); - for(int i=0;i(); - if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) + if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) { resolvers.add(new SkewJoinResolver()); + } } /** @@ -57,8 +58,9 @@ * @throws HiveException */ public PhysicalContext optimize() throws SemanticException { - for (PhysicalPlanResolver r : resolvers) + for (PhysicalPlanResolver r : resolvers) { pctx = r.resolve(pctx); + } return pctx; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalContext.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalContext.java (working copy) @@ -30,13 +30,13 @@ * physical context used by physical resolvers. */ public class PhysicalContext { - + protected HiveConf conf; private ParseContext parseContext; private Context context; protected List> rootTasks; protected Task fetchTask; - + public PhysicalContext(HiveConf conf, ParseContext parseContext, Context context, List> rootTasks, Task fetchTask) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java (working copy) @@ -100,18 +100,19 @@ public static void processSkewJoin(JoinOperator joinOp, Task currTask, ParseContext parseCtx) throws SemanticException { - + // We are trying to adding map joins to handle skew keys, and map join right // now does not work with outer joins - if (!GenMRSkewJoinProcessor.skewJoinEnabled(parseCtx.getConf(), joinOp)) + if (!GenMRSkewJoinProcessor.skewJoinEnabled(parseCtx.getConf(), joinOp)) { return; - + } + String baseTmpDir = parseCtx.getContext().getMRTmpFileURI(); - + joinDesc joinDescriptor = joinOp.getConf(); Map> joinValues = joinDescriptor.getExprs(); int numAliases = joinValues.size(); - + Map bigKeysDirMap = new HashMap(); Map> smallKeysDirMap = new HashMap>(); Map skewJoinJobResultsDir = new HashMap(); @@ -122,35 +123,46 @@ bigKeysDirMap.put(alias, bigKeysDir); Map smallKeysMap = new HashMap(); smallKeysDirMap.put(alias, smallKeysMap); - for(Byte src2 : tags) { - if(!src2.equals(alias)) + for (Byte src2 : tags) { + if (!src2.equals(alias)) { smallKeysMap.put(src2, getSmallKeysDir(baseTmpDir, alias, src2)); + } } - skewJoinJobResultsDir.put(alias, getBigKeysSkewJoinResultDir(baseTmpDir, alias)); + skewJoinJobResultsDir.put(alias, getBigKeysSkewJoinResultDir(baseTmpDir, + alias)); } - + joinDescriptor.setHandleSkewJoin(true); joinDescriptor.setBigKeysDirMap(bigKeysDirMap); joinDescriptor.setSmallKeysDirMap(smallKeysDirMap); - joinDescriptor.setSkewKeyDefinition(HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVESKEWJOINKEY)); - + joinDescriptor.setSkewKeyDefinition(HiveConf.getIntVar(parseCtx.getConf(), + HiveConf.ConfVars.HIVESKEWJOINKEY)); + Map> bigKeysDirToTaskMap = new HashMap>(); List listWorks = new ArrayList(); List> listTasks = new ArrayList>(); mapredWork currPlan = (mapredWork) currTask.getWork(); - tableDesc keyTblDesc = (tableDesc) currPlan.getKeyDesc().clone(); - List joinKeys = Utilities.getColumnNames(keyTblDesc.getProperties()); - List joinKeyTypes = Utilities.getColumnTypes(keyTblDesc.getProperties()); - + tableDesc keyTblDesc = (tableDesc) currPlan.getKeyDesc().clone(); + List joinKeys = Utilities + .getColumnNames(keyTblDesc.getProperties()); + List joinKeyTypes = Utilities.getColumnTypes(keyTblDesc + .getProperties()); + Map tableDescList = new HashMap(); Map> newJoinValues = new HashMap>(); Map> newJoinKeys = new HashMap>(); - List newJoinValueTblDesc = new ArrayList();// used for create mapJoinDesc, should be in order - - for (int i = 0; i < tags.length; i++) // fill with null, otherwise we will expect NPE + List newJoinValueTblDesc = new ArrayList();// used for + // create + // mapJoinDesc, + // should + // be in + // order + + for (Byte tag : tags) { newJoinValueTblDesc.add(null); - + } + for (int i = 0; i < numAliases; i++) { Byte alias = tags[i]; List valueCols = joinValues.get(alias); @@ -159,44 +171,47 @@ int columnSize = valueCols.size(); List newValueExpr = new ArrayList(); List newKeyExpr = new ArrayList(); - + boolean first = true; for (int k = 0; k < columnSize; k++) { TypeInfo type = valueCols.get(k).getTypeInfo(); String newColName = i + "_VALUE_" + k; // any name, it does not matter. - newValueExpr.add(new exprNodeColumnDesc(type, newColName, ""+i, false)); - if(!first) { + newValueExpr + .add(new exprNodeColumnDesc(type, newColName, "" + i, false)); + if (!first) { colNames = colNames + ","; - colTypes = colTypes +","; + colTypes = colTypes + ","; } first = false; colNames = colNames + newColName; colTypes = colTypes + valueCols.get(k).getTypeString(); } - - //we are putting join keys at last part of the spilled table + + // we are putting join keys at last part of the spilled table for (int k = 0; k < joinKeys.size(); k++) { - if(!first) { + if (!first) { colNames = colNames + ","; - colTypes = colTypes +","; + colTypes = colTypes + ","; } first = false; - colNames = colNames + joinKeys.get(k); + colNames = colNames + joinKeys.get(k); colTypes = colTypes + joinKeyTypes.get(k); - newKeyExpr.add(new exprNodeColumnDesc(TypeInfoFactory.getPrimitiveTypeInfo(joinKeyTypes.get(k)), joinKeys.get(k), ""+i, false)); + newKeyExpr.add(new exprNodeColumnDesc(TypeInfoFactory + .getPrimitiveTypeInfo(joinKeyTypes.get(k)), joinKeys.get(k), + "" + i, false)); } - + newJoinValues.put(alias, newValueExpr); newJoinKeys.put(alias, newKeyExpr); tableDescList.put(alias, Utilities.getTableDesc(colNames, colTypes)); - - //construct value table Desc - String valueColNames =""; - String valueColTypes =""; + + // construct value table Desc + String valueColNames = ""; + String valueColTypes = ""; first = true; for (int k = 0; k < columnSize; k++) { String newColName = i + "_VALUE_" + k; // any name, it does not matter. - if(!first) { + if (!first) { valueColNames = valueColNames + ","; valueColTypes = valueColTypes + ","; } @@ -204,13 +219,14 @@ valueColTypes = valueColTypes + valueCols.get(k).getTypeString(); first = false; } - newJoinValueTblDesc.set(Byte.valueOf((byte)i), Utilities.getTableDesc(valueColNames, valueColTypes)); + newJoinValueTblDesc.set(Byte.valueOf((byte) i), Utilities.getTableDesc( + valueColNames, valueColTypes)); } - + joinDescriptor.setSkewKeysValuesTables(tableDescList); joinDescriptor.setKeyTableDesc(keyTblDesc); - - for (int i = 0; i < numAliases -1; i++) { + + for (int i = 0; i < numAliases - 1; i++) { Byte src = tags[i]; mapredWork newPlan = PlanUtils.getMapRedWork(); mapredWork clonePlan = null; @@ -226,7 +242,8 @@ Operator[] parentOps = new TableScanOperator[tags.length]; for (int k = 0; k < tags.length; k++) { - Operator ts = OperatorFactory.get(tableScanDesc.class, (RowSchema) null); + Operator ts = OperatorFactory.get( + tableScanDesc.class, (RowSchema) null); parentOps[k] = ts; } Operator tblScan_op = parentOps[i]; @@ -240,26 +257,27 @@ partitionDesc part = new partitionDesc(tableDescList.get(src), null); newPlan.getPathToPartitionInfo().put(bigKeyDirPath, part); newPlan.getAliasToPartnInfo().put(alias, part); - + Operator reducer = clonePlan.getReducer(); assert reducer instanceof JoinOperator; JoinOperator cloneJoinOp = (JoinOperator) reducer; - - mapJoinDesc mapJoinDescriptor = new mapJoinDesc(newJoinKeys, - keyTblDesc, newJoinValues, newJoinValueTblDesc, joinDescriptor.getOutputColumnNames(), - i, joinDescriptor.getConds()); + + mapJoinDesc mapJoinDescriptor = new mapJoinDesc(newJoinKeys, keyTblDesc, + newJoinValues, newJoinValueTblDesc, joinDescriptor + .getOutputColumnNames(), i, joinDescriptor.getConds()); mapJoinDescriptor.setNoOuterJoin(joinDescriptor.isNoOuterJoin()); mapJoinDescriptor.setTagOrder(tags); mapJoinDescriptor.setHandleSkewJoin(false); - + mapredLocalWork localPlan = new mapredLocalWork( new LinkedHashMap>(), new LinkedHashMap()); Map smallTblDirs = smallKeysDirMap.get(src); - + for (int j = 0; j < numAliases; j++) { - if (j == i) + if (j == i) { continue; + } Byte small_alias = tags[j]; Operator tblScan_op2 = parentOps[j]; localPlan.getAliasToWork().put(small_alias.toString(), tblScan_op2); @@ -267,81 +285,96 @@ localPlan.getAliasToFetchWork().put(small_alias.toString(), new fetchWork(tblDir.toString(), tableDescList.get(small_alias))); } - + newPlan.setMapLocalWork(localPlan); - + // construct a map join and set it as the child operator of tblScan_op - MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(mapJoinDescriptor, (RowSchema) null, parentOps); + MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory + .getAndMakeChild(mapJoinDescriptor, (RowSchema) null, parentOps); // change the children of the original join operator to point to the map // join operator - List> childOps = cloneJoinOp.getChildOperators(); - for (Operator childOp : childOps) + List> childOps = cloneJoinOp + .getChildOperators(); + for (Operator childOp : childOps) { childOp.replaceParent(cloneJoinOp, mapJoinOp); + } mapJoinOp.setChildOperators(childOps); - - HiveConf jc = new HiveConf(parseCtx.getConf(), GenMRSkewJoinProcessor.class); + + HiveConf jc = new HiveConf(parseCtx.getConf(), + GenMRSkewJoinProcessor.class); HiveConf.setVar(jc, HiveConf.ConfVars.HIVEINPUTFORMAT, - org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.class.getCanonicalName()); - Task skewJoinMapJoinTask = TaskFactory.get(newPlan, jc); + org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.class + .getCanonicalName()); + Task skewJoinMapJoinTask = TaskFactory.get( + newPlan, jc); bigKeysDirToTaskMap.put(bigKeyDirPath, skewJoinMapJoinTask); listWorks.add(skewJoinMapJoinTask.getWork()); listTasks.add(skewJoinMapJoinTask); } - + ConditionalWork cndWork = new ConditionalWork(listWorks); - ConditionalTask cndTsk = (ConditionalTask)TaskFactory.get(cndWork, parseCtx.getConf()); + ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, + parseCtx.getConf()); cndTsk.setListTasks(listTasks); cndTsk.setResolver(new ConditionalResolverSkewJoin()); - cndTsk.setResolverCtx(new ConditionalResolverSkewJoin.ConditionalResolverSkewJoinCtx(bigKeysDirToTaskMap)); + cndTsk + .setResolverCtx(new ConditionalResolverSkewJoin.ConditionalResolverSkewJoinCtx( + bigKeysDirToTaskMap)); List> oldChildTasks = currTask.getChildTasks(); currTask.setChildTasks(new ArrayList>()); currTask.addDependentTask(cndTsk); - + if (oldChildTasks != null) { - for(Task tsk : cndTsk.getListTasks()) - for (Task oldChild : oldChildTasks) + for (Task tsk : cndTsk.getListTasks()) { + for (Task oldChild : oldChildTasks) { tsk.addDependentTask(oldChild); + } + } } return; } - + public static boolean skewJoinEnabled(HiveConf conf, JoinOperator joinOp) { - if (conf != null && !conf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) + if (conf != null && !conf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) { return false; + } - if(!joinOp.getConf().isNoOuterJoin()) + if (!joinOp.getConf().isNoOuterJoin()) { return false; - + } + byte pos = 0; - for(Byte tag: joinOp.getConf().getTagOrder()) { - if(tag != pos) + for (Byte tag : joinOp.getConf().getTagOrder()) { + if (tag != pos) { return false; + } pos++; } - + return true; } - + private static String skewJoinPrefix = "hive_skew_join"; private static String UNDERLINE = "_"; private static String BIGKEYS = "bigkeys"; private static String SMALLKEYS = "smallkeys"; private static String RESULTS = "results"; + static String getBigKeysDir(String baseDir, Byte srcTbl) { return baseDir + File.separator + skewJoinPrefix + UNDERLINE + BIGKEYS + UNDERLINE + srcTbl; } - + static String getBigKeysSkewJoinResultDir(String baseDir, Byte srcTbl) { return baseDir + File.separator + skewJoinPrefix + UNDERLINE + BIGKEYS - + UNDERLINE + RESULTS + UNDERLINE+ srcTbl; + + UNDERLINE + RESULTS + UNDERLINE + srcTbl; } - + static String getSmallKeysDir(String baseDir, Byte srcTblBigTbl, Byte srcTblSmallTbl) { return baseDir + File.separator + skewJoinPrefix + UNDERLINE + SMALLKEYS + UNDERLINE + srcTblBigTbl + UNDERLINE + srcTblSmallTbl; } - + } \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinProcFactory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinProcFactory.java (working copy) @@ -42,10 +42,11 @@ public static NodeProcessor getJoinProc() { return new SkewJoinJoinProcessor(); } - - public static class SkewJoinJoinProcessor implements NodeProcessor { - public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { - SkewJoinProcCtx context = (SkewJoinProcCtx)ctx; + + public static class SkewJoinJoinProcessor implements NodeProcessor { + public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, + Object... nodeOutputs) throws SemanticException { + SkewJoinProcCtx context = (SkewJoinProcCtx) ctx; JoinOperator op = (JoinOperator) nd; ParseContext parseContext = context.getParseCtx(); Task currentTsk = context.getCurrentTask(); @@ -53,8 +54,8 @@ return null; } } - - public static class SkewJoinDefaultProcessor implements NodeProcessor{ + + public static class SkewJoinDefaultProcessor implements NodeProcessor { public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { return null; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalPlanResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalPlanResolver.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalPlanResolver.java (working copy) @@ -28,6 +28,7 @@ /** * All physical plan resolvers have to implement this entry method. + * * @param pctx * @return */ Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java (working copy) @@ -41,10 +41,10 @@ /** * An implementation of PhysicalPlanResolver. It iterator each task with a rule - * dispatcher for its reducer operator tree, for task with join op in reducer, it - * will try to add a conditional task associated a list of skew join tasks. + * dispatcher for its reducer operator tree, for task with join op in reducer, + * it will try to add a conditional task associated a list of skew join tasks. */ -public class SkewJoinResolver implements PhysicalPlanResolver{ +public class SkewJoinResolver implements PhysicalPlanResolver { @Override public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { Dispatcher disp = new SkewJoinTaskDispatcher(pctx); @@ -54,37 +54,42 @@ ogw.startWalking(topNodes, null); return null; } - + /** * Iterator a task with a rule dispatcher for its reducer operator tree, */ - class SkewJoinTaskDispatcher implements Dispatcher{ - + class SkewJoinTaskDispatcher implements Dispatcher { + private PhysicalContext physicalContext; public SkewJoinTaskDispatcher(PhysicalContext context) { super(); - this.physicalContext = context; + physicalContext = context; } @Override public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) throws SemanticException { - Task task = (Task)nd; - - if (!task.isMapRedTask() - || task instanceof ConditionalTask ||((mapredWork) task.getWork()).getReducer() == null) + Task task = (Task) nd; + + if (!task.isMapRedTask() || task instanceof ConditionalTask + || ((mapredWork) task.getWork()).getReducer() == null) { return null; - - SkewJoinProcCtx skewJoinProcContext = new SkewJoinProcCtx(task, this.physicalContext.getParseContext()); - + } + + SkewJoinProcCtx skewJoinProcContext = new SkewJoinProcCtx(task, + physicalContext.getParseContext()); + Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", "JOIN%"), SkewJoinProcFactory.getJoinProc()); + opRules.put(new RuleRegExp("R1", "JOIN%"), SkewJoinProcFactory + .getJoinProc()); - // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(SkewJoinProcFactory.getDefaultProc(), opRules, skewJoinProcContext); + // The dispatcher fires the processor corresponding to the closest + // matching rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(SkewJoinProcFactory + .getDefaultProc(), opRules, skewJoinProcContext); GraphWalker ogw = new DefaultGraphWalker(disp); - + // iterator the reducer operator tree ArrayList topNodes = new ArrayList(); topNodes.add(((mapredWork) task.getWork()).getReducer()); @@ -100,16 +105,17 @@ this.physicalContext = physicalContext; } } - + /** - * A container of current task and parse context. + * A container of current task and parse context. */ public static class SkewJoinProcCtx implements NodeProcessorCtx { private Task currentTask; private ParseContext parseCtx; - - public SkewJoinProcCtx(Task task, ParseContext parseCtx) { - this.currentTask = task; + + public SkewJoinProcCtx(Task task, + ParseContext parseCtx) { + currentTask = task; this.parseCtx = parseCtx; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (working copy) @@ -33,13 +33,13 @@ import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; @@ -51,22 +51,21 @@ import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.exprNodeDesc; +import org.apache.hadoop.hive.ql.plan.joinDesc; import org.apache.hadoop.hive.ql.plan.mapJoinDesc; import org.apache.hadoop.hive.ql.plan.reduceSinkDesc; import org.apache.hadoop.hive.ql.plan.selectDesc; import org.apache.hadoop.hive.ql.plan.tableDesc; -import org.apache.hadoop.hive.ql.plan.joinDesc; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.ql.lib.Node; /** - * Implementation of one of the rule-based map join optimization. User passes hints to specify map-joins and during this optimization, - * all user specified map joins are converted to MapJoins - the reduce sink operator above the join are converted to map sink operators. - * In future, once statistics are implemented, this transformation can also be done based on costs. + * Implementation of one of the rule-based map join optimization. User passes + * hints to specify map-joins and during this optimization, all user specified + * map joins are converted to MapJoins - the reduce sink operator above the join + * are converted to map sink operators. In future, once statistics are + * implemented, this transformation can also be done based on costs. */ public class MapJoinProcessor implements Transform { private ParseContext pGraphContext; @@ -74,44 +73,56 @@ /** * empty constructor */ - public MapJoinProcessor() { + public MapJoinProcessor() { pGraphContext = null; - } + } @SuppressWarnings("nls") - private Operator putOpInsertMap(Operator op, RowResolver rr) { + private Operator putOpInsertMap( + Operator op, RowResolver rr) { OpParseContext ctx = new OpParseContext(rr); pGraphContext.getOpParseCtx().put(op, ctx); return op; } - - + /** - * convert a regular join to a a map-side join. - * @param op join operator - * @param qbJoin qb join tree - * @param mapJoinPos position of the source to be read as part of map-reduce framework. All other sources are cached in memory + * convert a regular join to a a map-side join. + * + * @param op + * join operator + * @param qbJoin + * qb join tree + * @param mapJoinPos + * position of the source to be read as part of map-reduce framework. + * All other sources are cached in memory */ - private MapJoinOperator convertMapJoin(ParseContext pctx, JoinOperator op, QBJoinTree joinTree, int mapJoinPos) throws SemanticException { + private MapJoinOperator convertMapJoin(ParseContext pctx, JoinOperator op, + QBJoinTree joinTree, int mapJoinPos) throws SemanticException { // outer join cannot be performed on a table which is being cached joinDesc desc = op.getConf(); org.apache.hadoop.hive.ql.plan.joinCond[] condns = desc.getConds(); for (org.apache.hadoop.hive.ql.plan.joinCond condn : condns) { - if (condn.getType() == joinDesc.FULL_OUTER_JOIN) + if (condn.getType() == joinDesc.FULL_OUTER_JOIN) { throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg()); - if ((condn.getType() == joinDesc.LEFT_OUTER_JOIN) && (condn.getLeft() != mapJoinPos)) + } + if ((condn.getType() == joinDesc.LEFT_OUTER_JOIN) + && (condn.getLeft() != mapJoinPos)) { throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg()); - if ((condn.getType() == joinDesc.RIGHT_OUTER_JOIN) && (condn.getRight() != mapJoinPos)) + } + if ((condn.getType() == joinDesc.RIGHT_OUTER_JOIN) + && (condn.getRight() != mapJoinPos)) { throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg()); + } } - + RowResolver oldOutputRS = pctx.getOpParseCtx().get(op).getRR(); RowResolver outputRS = new RowResolver(); ArrayList outputColumnNames = new ArrayList(); - Map> keyExprMap = new HashMap>(); + Map> keyExprMap = new HashMap>(); Map> valueExprMap = new HashMap>(); - // Walk over all the sources (which are guaranteed to be reduce sink operators). + // Walk over all the sources (which are guaranteed to be reduce sink + // operators). // The join outputs a concatenation of all the inputs. QBJoinTree leftSrc = joinTree.getJoinSrc(); @@ -121,10 +132,11 @@ Map colExprMap = new HashMap(); // found a source which is not to be stored in memory if (leftSrc != null) { - // assert mapJoinPos == 0; + // assert mapJoinPos == 0; Operator parentOp = parentOps.get(0); assert parentOp.getParentOperators().size() == 1; - Operator grandParentOp = parentOp.getParentOperators().get(0); + Operator grandParentOp = parentOp + .getParentOperators().get(0); oldReduceSinkParentOps.add(parentOp); grandParentOp.removeChild(parentOp); newParentOps.add(grandParentOp); @@ -136,8 +148,9 @@ if (src != null) { Operator parentOp = parentOps.get(pos); assert parentOp.getParentOperators().size() == 1; - Operator grandParentOp = parentOp.getParentOperators().get(0); - + Operator grandParentOp = parentOp + .getParentOperators().get(0); + grandParentOp.removeChild(parentOp); oldReduceSinkParentOps.add(parentOp); newParentOps.add(grandParentOp); @@ -145,93 +158,102 @@ pos++; } - //get the join keys from old parent ReduceSink operators + // get the join keys from old parent ReduceSink operators for (pos = 0; pos < newParentOps.size(); pos++) { - ReduceSinkOperator oldPar = (ReduceSinkOperator)oldReduceSinkParentOps.get(pos); + ReduceSinkOperator oldPar = (ReduceSinkOperator) oldReduceSinkParentOps + .get(pos); reduceSinkDesc rsconf = oldPar.getConf(); - Byte tag = (byte)rsconf.getTag(); + Byte tag = (byte) rsconf.getTag(); List keys = rsconf.getKeyCols(); keyExprMap.put(tag, keys); } - + // create the map-join operator for (pos = 0; pos < newParentOps.size(); pos++) { - RowResolver inputRS = pGraphContext.getOpParseCtx().get(newParentOps.get(pos)).getRR(); - + RowResolver inputRS = pGraphContext.getOpParseCtx().get( + newParentOps.get(pos)).getRR(); + List values = new ArrayList(); Iterator keysIter = inputRS.getTableNames().iterator(); - while (keysIter.hasNext()) - { + while (keysIter.hasNext()) { String key = keysIter.next(); HashMap rrMap = inputRS.getFieldMap(key); Iterator fNamesIter = rrMap.keySet().iterator(); - while (fNamesIter.hasNext()) - { + while (fNamesIter.hasNext()) { String field = fNamesIter.next(); ColumnInfo valueInfo = inputRS.get(key, field); ColumnInfo oldValueInfo = oldOutputRS.get(key, field); - if(oldValueInfo == null) + if (oldValueInfo == null) { continue; + } String outputCol = oldValueInfo.getInternalName(); if (outputRS.get(key, field) == null) { outputColumnNames.add(outputCol); - exprNodeDesc colDesc = new exprNodeColumnDesc(valueInfo.getType(), valueInfo.getInternalName(), - valueInfo.getTabAlias(), valueInfo.getIsPartitionCol()); + exprNodeDesc colDesc = new exprNodeColumnDesc(valueInfo.getType(), + valueInfo.getInternalName(), valueInfo.getTabAlias(), valueInfo + .getIsPartitionCol()); values.add(colDesc); - outputRS.put(key, field, new ColumnInfo(outputCol, - valueInfo.getType(), valueInfo.getTabAlias(), valueInfo.getIsPartitionCol())); + outputRS.put(key, field, new ColumnInfo(outputCol, valueInfo + .getType(), valueInfo.getTabAlias(), valueInfo + .getIsPartitionCol())); colExprMap.put(outputCol, colDesc); } } } - - valueExprMap.put(new Byte((byte)pos), values); + + valueExprMap.put(new Byte((byte) pos), values); } - org.apache.hadoop.hive.ql.plan.joinCond[] joinCondns = op.getConf().getConds(); + org.apache.hadoop.hive.ql.plan.joinCond[] joinCondns = op.getConf() + .getConds(); Operator[] newPar = new Operator[newParentOps.size()]; pos = 0; - for (Operator o : newParentOps) + for (Operator o : newParentOps) { newPar[pos++] = o; + } - List keyCols = keyExprMap.get(new Byte((byte)0)); + List keyCols = keyExprMap.get(new Byte((byte) 0)); StringBuilder keyOrder = new StringBuilder(); - for (int i=0; i < keyCols.size(); i++) { + for (int i = 0; i < keyCols.size(); i++) { keyOrder.append("+"); } - - tableDesc keyTableDesc = - PlanUtils.getMapJoinKeyTableDesc(PlanUtils.getFieldSchemasFromColumnList(keyCols, "mapjoinkey")); + tableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(PlanUtils + .getFieldSchemasFromColumnList(keyCols, "mapjoinkey")); + List valueTableDescs = new ArrayList(); - + for (pos = 0; pos < newParentOps.size(); pos++) { - List valueCols = valueExprMap.get(new Byte((byte)pos)); + List valueCols = valueExprMap.get(new Byte((byte) pos)); keyOrder = new StringBuilder(); - for (int i=0; i < valueCols.size(); i++) { + for (int i = 0; i < valueCols.size(); i++) { keyOrder.append("+"); } - - tableDesc valueTableDesc = - PlanUtils.getMapJoinValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(valueCols, "mapjoinvalue")); - + + tableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils + .getFieldSchemasFromColumnList(valueCols, "mapjoinvalue")); + valueTableDescs.add(valueTableDesc); } - - MapJoinOperator mapJoinOp = (MapJoinOperator)putOpInsertMap(OperatorFactory.getAndMakeChild( - new mapJoinDesc(keyExprMap, keyTableDesc, valueExprMap, valueTableDescs, outputColumnNames, mapJoinPos, joinCondns), - new RowSchema(outputRS.getColumnInfos()), newPar), outputRS); - + + MapJoinOperator mapJoinOp = (MapJoinOperator) putOpInsertMap( + OperatorFactory.getAndMakeChild(new mapJoinDesc(keyExprMap, + keyTableDesc, valueExprMap, valueTableDescs, outputColumnNames, + mapJoinPos, joinCondns), new RowSchema(outputRS.getColumnInfos()), + newPar), outputRS); + mapJoinOp.getConf().setReversedExprs(op.getConf().getReversedExprs()); mapJoinOp.setColumnExprMap(colExprMap); - - // change the children of the original join operator to point to the map join operator + + // change the children of the original join operator to point to the map + // join operator List> childOps = op.getChildOperators(); - for (Operator childOp : childOps) + for (Operator childOp : childOps) { childOp.replaceParent(op, mapJoinOp); - + } + mapJoinOp.setChildOperators(childOps); mapJoinOp.setParentOperators(newParentOps); op.setChildOperators(null); @@ -242,20 +264,22 @@ return mapJoinOp; } - private void genSelectPlan(ParseContext pctx, MapJoinOperator input) throws SemanticException { + private void genSelectPlan(ParseContext pctx, MapJoinOperator input) + throws SemanticException { List> childOps = input.getChildOperators(); input.setChildOperators(null); - // create a dummy select - This select is needed by the walker to split the mapJoin later on - RowResolver inputRR = pctx.getOpParseCtx().get(input).getRR(); - - ArrayList exprs = new ArrayList(); - ArrayList outputs = new ArrayList(); + // create a dummy select - This select is needed by the walker to split the + // mapJoin later on + RowResolver inputRR = pctx.getOpParseCtx().get(input).getRR(); + + ArrayList exprs = new ArrayList(); + ArrayList outputs = new ArrayList(); List outputCols = input.getConf().getOutputColumnNames(); RowResolver outputRS = new RowResolver(); - + Map colExprMap = new HashMap(); - + for (int i = 0; i < outputCols.size(); i++) { String internalName = outputCols.get(i); String[] nm = inputRR.reverseLookup(internalName); @@ -264,88 +288,102 @@ valueInfo.getInternalName(), nm[0], valueInfo.getIsPartitionCol()); exprs.add(colDesc); outputs.add(internalName); - outputRS .put(nm[0], nm[1], new ColumnInfo(internalName, - valueInfo.getType(), nm[0], valueInfo.getIsPartitionCol())); + outputRS.put(nm[0], nm[1], new ColumnInfo(internalName, valueInfo + .getType(), nm[0], valueInfo.getIsPartitionCol())); colExprMap.put(internalName, colDesc); } - - selectDesc select = new selectDesc(exprs, outputs, false); - - SelectOperator sel = - (SelectOperator)putOpInsertMap(OperatorFactory.getAndMakeChild( - select, new RowSchema(inputRR.getColumnInfos()), input), inputRR); - + + selectDesc select = new selectDesc(exprs, outputs, false); + + SelectOperator sel = (SelectOperator) putOpInsertMap( + OperatorFactory.getAndMakeChild(select, new RowSchema(inputRR + .getColumnInfos()), input), inputRR); + sel.setColumnExprMap(colExprMap); - - // Insert the select operator in between. + + // Insert the select operator in between. sel.setChildOperators(childOps); - for (Operator ch: childOps) { + for (Operator ch : childOps) { ch.replaceParent(input, sel); } } /** - * Is it a map-side join. - * @param op join operator - * @param qbJoin qb join tree - * @return -1 if it cannot be converted to a map-side join, position of the map join node otherwise + * Is it a map-side join. + * + * @param op + * join operator + * @param qbJoin + * qb join tree + * @return -1 if it cannot be converted to a map-side join, position of the + * map join node otherwise */ - private int mapSideJoin(JoinOperator op, QBJoinTree joinTree) throws SemanticException { + private int mapSideJoin(JoinOperator op, QBJoinTree joinTree) + throws SemanticException { int mapJoinPos = -1; if (joinTree.isMapSideJoin()) { int pos = 0; // In a map-side join, exactly one table is not present in memory. - // The client provides the list of tables which can be cached in memory via a hint. - if (joinTree.getJoinSrc() != null) + // The client provides the list of tables which can be cached in memory + // via a hint. + if (joinTree.getJoinSrc() != null) { mapJoinPos = pos; + } for (String src : joinTree.getBaseSrc()) { if (src != null) { if (!joinTree.getMapAliases().contains(src)) { - if (mapJoinPos >= 0) + if (mapJoinPos >= 0) { return -1; + } mapJoinPos = pos; } } pos++; } - - // All tables are to be cached - this is not possible. In future, we can support this by randomly + + // All tables are to be cached - this is not possible. In future, we can + // support this by randomly // leaving some table from the list of tables to be cached - if (mapJoinPos == -1) - throw new SemanticException(ErrorMsg.INVALID_MAPJOIN_HINT.getMsg(pGraphContext.getQB().getParseInfo().getHints())); + if (mapJoinPos == -1) { + throw new SemanticException(ErrorMsg.INVALID_MAPJOIN_HINT + .getMsg(pGraphContext.getQB().getParseInfo().getHints())); + } } return mapJoinPos; } /** - * Transform the query tree. For each join, check if it is a map-side join (user specified). If yes, - * convert it to a map-side join. - * @param pactx current parse context + * Transform the query tree. For each join, check if it is a map-side join + * (user specified). If yes, convert it to a map-side join. + * + * @param pactx + * current parse context */ public ParseContext transform(ParseContext pactx) throws SemanticException { - this.pGraphContext = pactx; + pGraphContext = pactx; List listMapJoinOps = new ArrayList(); - + // traverse all the joins and convert them if necessary if (pGraphContext.getJoinContext() != null) { Map joinMap = new HashMap(); - - Set> joinCtx = pGraphContext.getJoinContext().entrySet(); - Iterator> joinCtxIter = joinCtx.iterator(); + + Set> joinCtx = pGraphContext + .getJoinContext().entrySet(); + Iterator> joinCtxIter = joinCtx + .iterator(); while (joinCtxIter.hasNext()) { Map.Entry joinEntry = joinCtxIter.next(); JoinOperator joinOp = joinEntry.getKey(); - QBJoinTree qbJoin = joinEntry.getValue(); + QBJoinTree qbJoin = joinEntry.getValue(); int mapJoinPos = mapSideJoin(joinOp, qbJoin); if (mapJoinPos >= 0) { listMapJoinOps.add(convertMapJoin(pactx, joinOp, qbJoin, mapJoinPos)); - } - else { + } else { joinMap.put(joinOp, qbJoin); } } - + // store the new joinContext pGraphContext.setJoinContext(joinMap); } @@ -353,26 +391,34 @@ // Go over the list and find if a reducer is not needed List listMapJoinOpsNoRed = new ArrayList(); - // create a walker which walks the tree in a DFS manner while maintaining the operator stack. + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. // The dispatcher generates the plan from the operator tree Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp(new String("R0"), "MAPJOIN%"), getCurrentMapJoin()); - opRules.put(new RuleRegExp(new String("R1"), "MAPJOIN%.*FS%"), getMapJoinFS()); - opRules.put(new RuleRegExp(new String("R2"), "MAPJOIN%.*RS%"), getMapJoinDefault()); - opRules.put(new RuleRegExp(new String("R3"), "MAPJOIN%.*MAPJOIN%"), getMapJoinDefault()); - opRules.put(new RuleRegExp(new String("R4"), "MAPJOIN%.*UNION%"), getMapJoinDefault()); + opRules.put(new RuleRegExp(new String("R0"), "MAPJOIN%"), + getCurrentMapJoin()); + opRules.put(new RuleRegExp(new String("R1"), "MAPJOIN%.*FS%"), + getMapJoinFS()); + opRules.put(new RuleRegExp(new String("R2"), "MAPJOIN%.*RS%"), + getMapJoinDefault()); + opRules.put(new RuleRegExp(new String("R3"), "MAPJOIN%.*MAPJOIN%"), + getMapJoinDefault()); + opRules.put(new RuleRegExp(new String("R4"), "MAPJOIN%.*UNION%"), + getMapJoinDefault()); - // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(getDefault(), opRules, new MapJoinWalkerCtx(listMapJoinOpsNoRed)); + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(getDefault(), opRules, + new MapJoinWalkerCtx(listMapJoinOpsNoRed)); GraphWalker ogw = new GenMapRedWalker(disp); ArrayList topNodes = new ArrayList(); topNodes.addAll(listMapJoinOps); ogw.startWalking(topNodes, null); - + pGraphContext.setListMapJoinOpsNoReducer(listMapJoinOpsNoRed); return pGraphContext; - } + } public static class CurrentMapJoin implements NodeProcessor { @@ -382,14 +428,14 @@ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - - MapJoinWalkerCtx ctx = (MapJoinWalkerCtx)procCtx; - MapJoinOperator mapJoin = (MapJoinOperator)nd; + + MapJoinWalkerCtx ctx = (MapJoinWalkerCtx) procCtx; + MapJoinOperator mapJoin = (MapJoinOperator) nd; ctx.setCurrMapJoinOp(mapJoin); return null; } } - + public static class MapJoinFS implements NodeProcessor { /** @@ -398,25 +444,28 @@ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - - MapJoinWalkerCtx ctx = (MapJoinWalkerCtx)procCtx; + + MapJoinWalkerCtx ctx = (MapJoinWalkerCtx) procCtx; MapJoinOperator mapJoin = ctx.getCurrMapJoinOp(); - List listRejectedMapJoins = ctx.getListRejectedMapJoins(); - + List listRejectedMapJoins = ctx + .getListRejectedMapJoins(); + // the mapjoin has already been handled - if ((listRejectedMapJoins != null) && - (listRejectedMapJoins.contains(mapJoin))) + if ((listRejectedMapJoins != null) + && (listRejectedMapJoins.contains(mapJoin))) { return null; - + } + List listMapJoinsNoRed = ctx.getListMapJoinsNoRed(); - if (listMapJoinsNoRed == null) + if (listMapJoinsNoRed == null) { listMapJoinsNoRed = new ArrayList(); + } listMapJoinsNoRed.add(mapJoin); ctx.setListMapJoins(listMapJoinsNoRed); return null; } } - + public static class MapJoinDefault implements NodeProcessor { /** @@ -425,17 +474,19 @@ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - MapJoinWalkerCtx ctx = (MapJoinWalkerCtx)procCtx; + MapJoinWalkerCtx ctx = (MapJoinWalkerCtx) procCtx; MapJoinOperator mapJoin = ctx.getCurrMapJoinOp(); - List listRejectedMapJoins = ctx.getListRejectedMapJoins(); - if (listRejectedMapJoins == null) + List listRejectedMapJoins = ctx + .getListRejectedMapJoins(); + if (listRejectedMapJoins == null) { listRejectedMapJoins = new ArrayList(); + } listRejectedMapJoins.add(mapJoin); ctx.setListRejectedMapJoins(listRejectedMapJoins); return null; } } - + public static class Default implements NodeProcessor { /** @@ -447,7 +498,7 @@ return null; } } - + public static NodeProcessor getMapJoinFS() { return new MapJoinFS(); } @@ -459,23 +510,23 @@ public static NodeProcessor getDefault() { return new Default(); } - + public static NodeProcessor getCurrentMapJoin() { return new CurrentMapJoin(); } - + public static class MapJoinWalkerCtx implements NodeProcessorCtx { List listMapJoinsNoRed; List listRejectedMapJoins; - MapJoinOperator currMapJoinOp; + MapJoinOperator currMapJoinOp; /** * @param listMapJoinsNoRed */ public MapJoinWalkerCtx(List listMapJoinsNoRed) { this.listMapJoinsNoRed = listMapJoinsNoRed; - this.currMapJoinOp = null; - this.listRejectedMapJoins = new ArrayList(); + currMapJoinOp = null; + listRejectedMapJoins = new ArrayList(); } /** @@ -486,7 +537,8 @@ } /** - * @param listMapJoinsNoRed the listMapJoins to set + * @param listMapJoinsNoRed + * the listMapJoins to set */ public void setListMapJoins(List listMapJoinsNoRed) { this.listMapJoinsNoRed = listMapJoinsNoRed; @@ -500,7 +552,8 @@ } /** - * @param currMapJoinOp the currMapJoinOp to set + * @param currMapJoinOp + * the currMapJoinOp to set */ public void setCurrMapJoinOp(MapJoinOperator currMapJoinOp) { this.currMapJoinOp = currMapJoinOp; @@ -514,9 +567,11 @@ } /** - * @param listRejectedMapJoins the listRejectedMapJoins to set + * @param listRejectedMapJoins + * the listRejectedMapJoins to set */ - public void setListRejectedMapJoins(List listRejectedMapJoins) { + public void setListRejectedMapJoins( + List listRejectedMapJoins) { this.listRejectedMapJoins = listRejectedMapJoins; } } Index: ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java (working copy) @@ -18,21 +18,23 @@ package org.apache.hadoop.hive.ql.hooks; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.metadata.Partition; import java.net.URI; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; + /** - * This class encapsulates an object that is being written to - * by the query. This object may be a table, partition, dfs - * directory or a local directory. + * This class encapsulates an object that is being written to by the query. This + * object may be a table, partition, dfs directory or a local directory. */ public class WriteEntity { /** * The type of the write entity. */ - public static enum Type {TABLE, PARTITION, DFS_DIR, LOCAL_DIR}; + public static enum Type { + TABLE, PARTITION, DFS_DIR, LOCAL_DIR + }; /** * The type. @@ -42,58 +44,61 @@ /** * The table. This is null if this is a directory. */ - private Table t; + private final Table t; /** * The partition.This is null if this object is not a partition. */ - private Partition p; + private final Partition p; /** * The directory if this is a directory. */ - private String d; + private final String d; /** * Constructor for a table. - * - * @param t Table that is written to. + * + * @param t + * Table that is written to. */ public WriteEntity(Table t) { - this.d = null; - this.p = null; + d = null; + p = null; this.t = t; - this.typ = Type.TABLE; + typ = Type.TABLE; } /** * Constructor for a partition. - * - * @param p Partition that is written to. + * + * @param p + * Partition that is written to. */ public WriteEntity(Partition p) { - this.d = null; + d = null; this.p = p; - this.t = p.getTable(); - this.typ = Type.PARTITION; + t = p.getTable(); + typ = Type.PARTITION; } /** * Constructor for a file. - * - * @param d The name of the directory that is being written to. - * @param islocal Flag to decide whether this directory is local or in dfs. + * + * @param d + * The name of the directory that is being written to. + * @param islocal + * Flag to decide whether this directory is local or in dfs. */ public WriteEntity(String d, boolean islocal) { this.d = d; - this.p = null; - this.t = null; + p = null; + t = null; if (islocal) { - this.typ = Type.LOCAL_DIR; + typ = Type.LOCAL_DIR; + } else { + typ = Type.DFS_DIR; } - else { - this.typ = Type.DFS_DIR; - } } /** @@ -107,14 +112,17 @@ * Get the location of the entity. */ public URI getLocation() throws Exception { - if (typ == Type.TABLE) + if (typ == Type.TABLE) { return t.getDataLocation(); + } - if (typ == Type.PARTITION) + if (typ == Type.PARTITION) { return p.getDataLocation(); + } - if (typ == Type.DFS_DIR || typ == Type.LOCAL_DIR) + if (typ == Type.DFS_DIR || typ == Type.LOCAL_DIR) { return new URI(d); + } return null; } @@ -136,8 +144,9 @@ /** * toString function. */ + @Override public String toString() { - switch(typ) { + switch (typ) { case TABLE: return t.getDbName() + "@" + t.getName(); case PARTITION: @@ -152,15 +161,16 @@ */ @Override public boolean equals(Object o) { - if (o == null) + if (o == null) { return false; + } if (o instanceof WriteEntity) { - WriteEntity ore = (WriteEntity)o; + WriteEntity ore = (WriteEntity) o; return (toString().equalsIgnoreCase(ore.toString())); + } else { + return false; } - else - return false; } /** Index: ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecute.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecute.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecute.java (working copy) @@ -19,27 +19,29 @@ package org.apache.hadoop.hive.ql.hooks; import java.util.Set; + +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.hive.ql.session.SessionState; /** - * The post execute hook interface. A list of such hooks can - * be configured to be called after compilation and before - * execution. + * The post execute hook interface. A list of such hooks can be configured to be + * called after compilation and before execution. */ public interface PostExecute { /** - * The run command that is called just before the execution of the - * query. - * - * @param sess The session state. - * @param inputs The set of input tables and partitions. - * @param outputs The set of output tables, partitions, local and hdfs directories. - * @param ugi The user group security information. + * The run command that is called just before the execution of the query. + * + * @param sess + * The session state. + * @param inputs + * The set of input tables and partitions. + * @param outputs + * The set of output tables, partitions, local and hdfs directories. + * @param ugi + * The user group security information. */ public void run(SessionState sess, Set inputs, - Set outputs, UserGroupInformation ugi) - throws Exception; + Set outputs, UserGroupInformation ugi) throws Exception; } Index: ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java (working copy) @@ -18,50 +18,56 @@ package org.apache.hadoop.hive.ql.hooks; +import java.net.URI; +import java.util.Map; + import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; -import java.util.Map; -import java.net.URI; /** - * This class encapsulates the information on the partition and - * tables that are read by the query. + * This class encapsulates the information on the partition and tables that are + * read by the query. */ public class ReadEntity { /** * The partition. This is null for a non partitioned table. */ - private Partition p; + private final Partition p; /** * The table. */ - private Table t; + private final Table t; /** * Constructor. - * - * @param t The Table that the query reads from. + * + * @param t + * The Table that the query reads from. */ public ReadEntity(Table t) { this.t = t; - this.p = null; + p = null; } /** * Constructor given a partiton. - * - * @param p The partition that the query reads from. + * + * @param p + * The partition that the query reads from. */ public ReadEntity(Partition p) { - this.t = p.getTable(); + t = p.getTable(); this.p = p; } + /** * Enum that tells what time of a read entity this is. */ - public static enum Type {TABLE, PARTITION}; + public static enum Type { + TABLE, PARTITION + }; /** * Get the type. @@ -76,8 +82,7 @@ public Map getParameters() { if (p != null) { return p.getTPartition().getParameters(); - } - else { + } else { return t.getTTable().getParameters(); } } @@ -88,8 +93,7 @@ public URI getLocation() { if (p != null) { return p.getDataLocation(); - } - else { + } else { return t.getDataLocation(); } } @@ -114,9 +118,9 @@ @Override public String toString() { if (p != null) { - return p.getTable().getDbName() + "@" + p.getTable().getName() + "@" + p.getName(); - } - else { + return p.getTable().getDbName() + "@" + p.getTable().getName() + "@" + + p.getName(); + } else { return t.getDbName() + "@" + t.getName(); } } @@ -126,15 +130,16 @@ */ @Override public boolean equals(Object o) { - if (o == null) + if (o == null) { return false; + } if (o instanceof ReadEntity) { - ReadEntity ore = (ReadEntity)o; + ReadEntity ore = (ReadEntity) o; return (toString().equalsIgnoreCase(ore.toString())); + } else { + return false; } - else - return false; } /** Index: ql/src/java/org/apache/hadoop/hive/ql/hooks/PreExecute.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/hooks/PreExecute.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/hooks/PreExecute.java (working copy) @@ -19,27 +19,29 @@ package org.apache.hadoop.hive.ql.hooks; import java.util.Set; + +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.hive.ql.session.SessionState; /** - * The pre execute hook interface. A list of such hooks can - * be configured to be called after compilation and before - * execution. + * The pre execute hook interface. A list of such hooks can be configured to be + * called after compilation and before execution. */ public interface PreExecute { /** - * The run command that is called just before the execution of the - * query. + * The run command that is called just before the execution of the query. * - * @param sess The session state. - * @param inputs The set of input tables and partitions. - * @param outputs The set of output tables, partitions, local and hdfs directories. - * @param ugi The user group security information. + * @param sess + * The session state. + * @param inputs + * The set of input tables and partitions. + * @param outputs + * The set of output tables, partitions, local and hdfs directories. + * @param ugi + * The user group security information. */ - public void run(SessionState sess, Set inputs, - Set outputs, UserGroupInformation ugi) - throws Exception; - + public void run(SessionState sess, Set inputs, + Set outputs, UserGroupInformation ugi) throws Exception; + } Index: ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java (working copy) @@ -40,51 +40,59 @@ /** Information maintained for an expr while walking an expr tree */ private static class ExprInfo { - /** true if expr rooted at this node doesn't contain more than one table alias */ - public boolean isCandidate = false; + /** + * true if expr rooted at this node doesn't contain more than one table + * alias + */ + public boolean isCandidate = false; /** alias that this expression refers to */ - public String alias = null; - /** new expr for this expression.*/ + public String alias = null; + /** new expr for this expression. */ public exprNodeDesc convertedExpr = null; - public ExprInfo() {} + + public ExprInfo() { + } + public ExprInfo(boolean isCandidate, String alias, exprNodeDesc replacedNode) { - this.isCandidate = isCandidate; - this.alias = alias; - this.convertedExpr = replacedNode; + this.isCandidate = isCandidate; + this.alias = alias; + convertedExpr = replacedNode; } } - - protected static final Log LOG = LogFactory.getLog(OpProcFactory.class.getName());; + + protected static final Log LOG = LogFactory.getLog(OpProcFactory.class + .getName());; private Operator op = null; private RowResolver toRR = null; - + /** - * this map contains a expr infos. Each key is a node in the expression tree and the - * information for each node is the value which is used while walking the tree by - * its parent + * this map contains a expr infos. Each key is a node in the expression tree + * and the information for each node is the value which is used while walking + * the tree by its parent */ - private Map> pushdownPreds; + private final Map> pushdownPreds; /** - * Values the expression sub-trees (predicates) that can be pushed down for root - * expression tree. Since there can be more than one alias in an expression tree, - * this is a map from the alias to predicates. + * Values the expression sub-trees (predicates) that can be pushed down for + * root expression tree. Since there can be more than one alias in an + * expression tree, this is a map from the alias to predicates. */ - private Map exprInfoMap; + private final Map exprInfoMap; private boolean isDeterministic = true; - + public ExprWalkerInfo() { - this.pushdownPreds = new HashMap>(); - this.exprInfoMap = new HashMap(); + pushdownPreds = new HashMap>(); + exprInfoMap = new HashMap(); } - - public ExprWalkerInfo(Operator op, final RowResolver toRR) { + + public ExprWalkerInfo(Operator op, + final RowResolver toRR) { this.op = op; - this.toRR = toRR; - - this.pushdownPreds = new HashMap>(); - this.exprInfoMap = new HashMap(); + this.toRR = toRR; + + pushdownPreds = new HashMap>(); + exprInfoMap = new HashMap(); } - + /** * @return the op of this expression */ @@ -100,22 +108,28 @@ } /** - * @return converted expression for give node. If there is none then returns null. + * @return converted expression for give node. If there is none then returns + * null. */ public exprNodeDesc getConvertedNode(Node nd) { ExprInfo ei = exprInfoMap.get(nd); - if(ei == null) return null; + if (ei == null) { + return null; + } return ei.convertedExpr; } - + /** * adds a replacement node for this expression - * @param oldNode original node - * @param newNode new node + * + * @param oldNode + * original node + * @param newNode + * new node */ public void addConvertedNode(exprNodeDesc oldNode, exprNodeDesc newNode) { ExprInfo ei = exprInfoMap.get(oldNode); - if(ei == null) { + if (ei == null) { ei = new ExprInfo(); exprInfoMap.put(oldNode, ei); } @@ -125,71 +139,85 @@ /** * Returns true if the specified expression is pushdown candidate else false + * * @param expr * @return true or false */ public boolean isCandidate(exprNodeDesc expr) { ExprInfo ei = exprInfoMap.get(expr); - if(ei == null) return false; + if (ei == null) { + return false; + } return ei.isCandidate; } - + /** * Marks the specified expr to the specified value + * * @param expr - * @param b can + * @param b + * can */ public void setIsCandidate(exprNodeDesc expr, boolean b) { ExprInfo ei = exprInfoMap.get(expr); - if(ei == null) { + if (ei == null) { ei = new ExprInfo(); exprInfoMap.put(expr, ei); } ei.isCandidate = b; } - + /** * Returns the alias of the specified expr + * * @param expr * @return The alias of the expression */ public String getAlias(exprNodeDesc expr) { ExprInfo ei = exprInfoMap.get(expr); - if(ei == null) return null; + if (ei == null) { + return null; + } return ei.alias; } - + /** * Adds the specified alias to the specified expr + * * @param expr * @param alias */ public void addAlias(exprNodeDesc expr, String alias) { - if(alias == null) + if (alias == null) { return; + } ExprInfo ei = exprInfoMap.get(expr); - if(ei == null) { + if (ei == null) { ei = new ExprInfo(); exprInfoMap.put(expr, ei); } ei.alias = alias; } - + /** - * Adds the specified expr as the top-most pushdown expr (ie all its children can be pushed) + * Adds the specified expr as the top-most pushdown expr (ie all its children + * can be pushed) + * * @param expr */ public void addFinalCandidate(exprNodeDesc expr) { - String alias = this.getAlias(expr); - if(pushdownPreds.get(alias) == null) { + String alias = getAlias(expr); + if (pushdownPreds.get(alias) == null) { pushdownPreds.put(alias, new ArrayList()); } - pushdownPreds.get(alias).add((exprNodeDesc) expr.clone()); + pushdownPreds.get(alias).add(expr.clone()); } /** - * Returns the list of pushdown expressions for each alias that appear in the current operator's - * RowResolver. The exprs in each list can be combined using conjunction (AND) + * Returns the list of pushdown expressions for each alias that appear in the + * current operator's RowResolver. The exprs in each list can be combined + * using conjunction (AND) + * * @return the map of alias to a list of pushdown predicates */ public Map> getFinalCandidates() { @@ -198,14 +226,18 @@ /** * Merges the specified pushdown predicates with the current class - * @param ewi ExpressionWalkerInfo + * + * @param ewi + * ExpressionWalkerInfo */ public void merge(ExprWalkerInfo ewi) { - if(ewi == null) + if (ewi == null) { return; - for(Entry> e : ewi.getFinalCandidates().entrySet()) { + } + for (Entry> e : ewi.getFinalCandidates() + .entrySet()) { List predList = pushdownPreds.get(e.getKey()); - if( predList != null ) { + if (predList != null) { predList.addAll(e.getValue()); } else { pushdownPreds.put(e.getKey(), e.getValue()); @@ -214,13 +246,15 @@ } /** - * sets the deterministic flag for this expression - * @param b deterministic or not + * sets the deterministic flag for this expression + * + * @param b + * deterministic or not */ public void setDeterministic(boolean b) { isDeterministic = b; } - + /** * @return whether this expression is deterministic or not */ Index: ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java (working copy) @@ -41,13 +41,11 @@ import org.apache.hadoop.hive.ql.plan.exprNodeDesc; import org.apache.hadoop.hive.ql.plan.exprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.exprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.udf.UDFOPAnd; -import org.apache.hadoop.hive.ql.udf.UDFType; /** - * Expression factory for predicate pushdown processing. - * Each processor determines whether the expression is a possible candidate - * for predicate pushdown optimization for the given operator + * Expression factory for predicate pushdown processing. Each processor + * determines whether the expression is a possible candidate for predicate + * pushdown optimization for the given operator */ public class ExprWalkerProcFactory { @@ -65,12 +63,13 @@ Operator op = ctx.getOp(); String[] colAlias = toRR.reverseLookup(colref.getColumn()); - if(op.getColumnExprMap() != null) { + if (op.getColumnExprMap() != null) { // replace the output expression with the input expression so that // parent op can understand this expression exprNodeDesc exp = op.getColumnExprMap().get(colref.getColumn()); - if(exp == null) { - // means that expression can't be pushed either because it is value in group by + if (exp == null) { + // means that expression can't be pushed either because it is value in + // group by ctx.setIsCandidate(colref, false); return false; } @@ -78,8 +77,9 @@ ctx.setIsCandidate(exp, true); ctx.addAlias(exp, colAlias[0]); } else { - if (colAlias == null) + if (colAlias == null) { assert false; + } ctx.addAlias(colref, colAlias[0]); } ctx.setIsCandidate(colref, true); @@ -88,7 +88,6 @@ } - public static class FieldExprProcessor implements NodeProcessor { @Override @@ -99,7 +98,7 @@ exprNodeFieldDesc expr = (exprNodeFieldDesc) nd; boolean isCandidate = true; - assert(nd.getChildren().size() == 1); + assert (nd.getChildren().size() == 1); exprNodeDesc ch = (exprNodeDesc) nd.getChildren().get(0); exprNodeDesc newCh = ctx.getConvertedNode(ch); if (newCh != null) { @@ -109,7 +108,8 @@ String chAlias = ctx.getAlias(ch); isCandidate = isCandidate && ctx.isCandidate(ch); - // need to iterate through all children even if one is found to be not a candidate + // need to iterate through all children even if one is found to be not a + // candidate // in case if the other children could be individually pushed up if (isCandidate && chAlias != null) { if (alias == null) { @@ -127,8 +127,9 @@ } /** - * If all children are candidates and refer only to one table alias then this expr is a candidate - * else it is not a candidate but its children could be final candidates + * If all children are candidates and refer only to one table alias then this + * expr is a candidate else it is not a candidate but its children could be + * final candidates */ public static class GenericFuncExprProcessor implements NodeProcessor { @@ -139,16 +140,15 @@ String alias = null; exprNodeGenericFuncDesc expr = (exprNodeGenericFuncDesc) nd; - if (!FunctionRegistry.isDeterministic(expr.getGenericUDF())) { // this GenericUDF can't be pushed down ctx.setIsCandidate(expr, false); ctx.setDeterministic(false); return false; } - + boolean isCandidate = true; - for (int i=0; i < nd.getChildren().size(); i++) { + for (int i = 0; i < nd.getChildren().size(); i++) { exprNodeDesc ch = (exprNodeDesc) nd.getChildren().get(i); exprNodeDesc newCh = ctx.getConvertedNode(ch); if (newCh != null) { @@ -156,9 +156,10 @@ ch = newCh; } String chAlias = ctx.getAlias(ch); - + isCandidate = isCandidate && ctx.isCandidate(ch); - // need to iterate through all children even if one is found to be not a candidate + // need to iterate through all children even if one is found to be not a + // candidate // in case if the other children could be individually pushed up if (isCandidate && chAlias != null) { if (alias == null) { @@ -167,9 +168,10 @@ isCandidate = false; } } - - if(!isCandidate) + + if (!isCandidate) { break; + } } ctx.addAlias(expr, alias); ctx.setIsCandidate(expr, isCandidate); @@ -177,7 +179,7 @@ } } - + /** * For constants and null expressions */ @@ -208,48 +210,60 @@ return new FieldExprProcessor(); } - public static ExprWalkerInfo extractPushdownPreds(OpWalkerInfo opContext, - Operator op, - exprNodeDesc pred) throws SemanticException { + public static ExprWalkerInfo extractPushdownPreds(OpWalkerInfo opContext, + Operator op, exprNodeDesc pred) + throws SemanticException { List preds = new ArrayList(); preds.add(pred); return extractPushdownPreds(opContext, op, preds); } - + /** * Extracts pushdown predicates from the given list of predicate expression - * @param opContext operator context used for resolving column references - * @param op operator of the predicates being processed + * + * @param opContext + * operator context used for resolving column references + * @param op + * operator of the predicates being processed * @param preds * @return The expression walker information * @throws SemanticException */ - public static ExprWalkerInfo extractPushdownPreds(OpWalkerInfo opContext, - Operator op, - List preds) throws SemanticException { + public static ExprWalkerInfo extractPushdownPreds(OpWalkerInfo opContext, + Operator op, List preds) + throws SemanticException { // Create the walker, the rules dispatcher and the context. - ExprWalkerInfo exprContext = new ExprWalkerInfo(op, opContext.getRowResolver(op)); - - // create a walker which walks the tree in a DFS manner while maintaining the operator stack. The dispatcher + ExprWalkerInfo exprContext = new ExprWalkerInfo(op, opContext + .getRowResolver(op)); + + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. The dispatcher // generates the plan from the operator tree Map exprRules = new LinkedHashMap(); - exprRules.put(new RuleRegExp("R1", exprNodeColumnDesc.class.getName() + "%"), getColumnProcessor()); - exprRules.put(new RuleRegExp("R2", exprNodeFieldDesc.class.getName() + "%"), getFieldProcessor()); - exprRules.put(new RuleRegExp("R3", exprNodeGenericFuncDesc.class.getName() + "%"), getGenericFuncProcessor()); - - // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), exprRules, exprContext); + exprRules.put( + new RuleRegExp("R1", exprNodeColumnDesc.class.getName() + "%"), + getColumnProcessor()); + exprRules.put( + new RuleRegExp("R2", exprNodeFieldDesc.class.getName() + "%"), + getFieldProcessor()); + exprRules.put(new RuleRegExp("R3", exprNodeGenericFuncDesc.class.getName() + + "%"), getGenericFuncProcessor()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), + exprRules, exprContext); GraphWalker egw = new DefaultGraphWalker(disp); - + List startNodes = new ArrayList(); List clonedPreds = new ArrayList(); for (exprNodeDesc node : preds) { - clonedPreds.add((exprNodeDesc) node.clone()); + clonedPreds.add(node.clone()); } startNodes.addAll(clonedPreds); - + egw.startWalking(startNodes, null); - + // check the root expression for final candidates for (exprNodeDesc pred : clonedPreds) { extractFinalCandidates(pred, exprContext); @@ -258,20 +272,23 @@ } /** - * Walks through the top AND nodes and determine which of them are final candidates + * Walks through the top AND nodes and determine which of them are final + * candidates */ - private static void extractFinalCandidates(exprNodeDesc expr, ExprWalkerInfo ctx) { + private static void extractFinalCandidates(exprNodeDesc expr, + ExprWalkerInfo ctx) { if (ctx.isCandidate(expr)) { ctx.addFinalCandidate(expr); return; } - + if (FunctionRegistry.isOpAnd(expr)) { - // If the operator is AND, we need to determine if any of the children are final candidates. + // If the operator is AND, we need to determine if any of the children are + // final candidates. for (Node ch : expr.getChildren()) { extractFinalCandidates((exprNodeDesc) ch, ctx); } } - + } } Index: ql/src/java/org/apache/hadoop/hive/ql/ppd/OpWalkerInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ppd/OpWalkerInfo.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/OpWalkerInfo.java (working copy) @@ -32,17 +32,16 @@ */ public class OpWalkerInfo implements NodeProcessorCtx { /** - * Operator to Pushdown Predicates Map. This keeps track of the final pushdown predicates - * for each operator as you walk the Op Graph from child to parent + * Operator to Pushdown Predicates Map. This keeps track of the final pushdown + * predicates for each operator as you walk the Op Graph from child to parent */ - private HashMap, ExprWalkerInfo> opToPushdownPredMap; - private Map, OpParseContext> opToParseCtxMap; + private final HashMap, ExprWalkerInfo> opToPushdownPredMap; + private final Map, OpParseContext> opToParseCtxMap; - public OpWalkerInfo( HashMap, OpParseContext> opToParseCtxMap) { this.opToParseCtxMap = opToParseCtxMap; - this.opToPushdownPredMap = new HashMap, ExprWalkerInfo>(); + opToPushdownPredMap = new HashMap, ExprWalkerInfo>(); } public ExprWalkerInfo getPrunedPreds(Operator op) { Index: ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java (working copy) @@ -38,38 +38,37 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; /** - * Implements predicate pushdown. Predicate pushdown is a term borrowed from relational - * databases even though for Hive it is predicate pushup. - * The basic idea is to process expressions as early in the plan as possible. The default plan - * generation adds filters where they are seen but in some instances some of the filter expressions - * can be pushed nearer to the operator that sees this particular data for the first time. - * e.g. - * select a.*, b.* - * from a join b on (a.col1 = b.col1) - * where a.col1 > 20 and b.col2 > 40 - * - * For the above query, the predicates (a.col1 > 20) and (b.col2 > 40), without predicate pushdown, - * would be evaluated after the join processing has been done. Suppose the two predicates filter out - * most of the rows from a and b, the join is unnecessarily processing these rows. - * With predicate pushdown, these two predicates will be processed before the join. + * Implements predicate pushdown. Predicate pushdown is a term borrowed from + * relational databases even though for Hive it is predicate pushup. The basic + * idea is to process expressions as early in the plan as possible. The default + * plan generation adds filters where they are seen but in some instances some + * of the filter expressions can be pushed nearer to the operator that sees this + * particular data for the first time. e.g. select a.*, b.* from a join b on + * (a.col1 = b.col1) where a.col1 > 20 and b.col2 > 40 * - * Predicate pushdown is enabled by setting hive.optimize.ppd to true. It is disable by default. + * For the above query, the predicates (a.col1 > 20) and (b.col2 > 40), without + * predicate pushdown, would be evaluated after the join processing has been + * done. Suppose the two predicates filter out most of the rows from a and b, + * the join is unnecessarily processing these rows. With predicate pushdown, + * these two predicates will be processed before the join. * - * The high-level algorithm is describe here - * - An operator is processed after all its children have been processed - * - An operator processes its own predicates and then merges (conjunction) with the processed - * predicates of its children. In case of multiple children, there are combined using - * disjunction (OR). - * - A predicate expression is processed for an operator using the following steps - * - If the expr is a constant then it is a candidate for predicate pushdown - * - If the expr is a col reference then it is a candidate and its alias is noted - * - If the expr is an index and both the array and index expr are treated as children - * - If the all child expr are candidates for pushdown and all of the expression reference - * only one alias from the operator's RowResolver then the current expression is also a - * candidate - * One key thing to note is that some operators (Select, ReduceSink, GroupBy, Join etc) change - * the columns as data flows through them. In such cases the column references are replaced by - * the corresponding expression in the input data. + * Predicate pushdown is enabled by setting hive.optimize.ppd to true. It is + * disable by default. + * + * The high-level algorithm is describe here - An operator is processed after + * all its children have been processed - An operator processes its own + * predicates and then merges (conjunction) with the processed predicates of its + * children. In case of multiple children, there are combined using disjunction + * (OR). - A predicate expression is processed for an operator using the + * following steps - If the expr is a constant then it is a candidate for + * predicate pushdown - If the expr is a col reference then it is a candidate + * and its alias is noted - If the expr is an index and both the array and index + * expr are treated as children - If the all child expr are candidates for + * pushdown and all of the expression reference only one alias from the + * operator's RowResolver then the current expression is also a candidate One + * key thing to note is that some operators (Select, ReduceSink, GroupBy, Join + * etc) change the columns as data flows through them. In such cases the column + * references are replaced by the corresponding expression in the input data. */ public class PredicatePushDown implements Transform { @@ -78,12 +77,12 @@ @Override public ParseContext transform(ParseContext pctx) throws SemanticException { - this.pGraphContext = pctx; - this.opToParseCtxMap = pGraphContext.getOpParseCtx(); + pGraphContext = pctx; + opToParseCtxMap = pGraphContext.getOpParseCtx(); // create a the context for walking operators OpWalkerInfo opWalkerInfo = new OpWalkerInfo(opToParseCtxMap); - + Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp("R1", "FIL%"), OpProcFactory.getFilterProc()); opRules.put(new RuleRegExp("R3", "JOIN%"), OpProcFactory.getJoinProc()); @@ -92,10 +91,12 @@ opRules.put(new RuleRegExp("R6", "SCR%"), OpProcFactory.getSCRProc()); opRules.put(new RuleRegExp("R6", "LIM%"), OpProcFactory.getLIMProc()); - // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, opWalkerInfo); + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), + opRules, opWalkerInfo); GraphWalker ogw = new DefaultGraphWalker(disp); - + // Create a list of topop nodes ArrayList topNodes = new ArrayList(); topNodes.addAll(pGraphContext.getTopOps().values()); Index: ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java (working copy) @@ -50,31 +50,29 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /** - * Operator factory for predicate pushdown processing of operator graph - * Each operator determines the pushdown predicates by walking the expression tree. + * Operator factory for predicate pushdown processing of operator graph Each + * operator determines the pushdown predicates by walking the expression tree. * Each operator merges its own pushdown predicates with those of its children - * Finally the TableScan operator gathers all the predicates and inserts a filter operator - * after itself. - * TODO: Further optimizations - * 1) Multi-insert case - * 2) Create a filter operator for those predicates that couldn't be pushed to the previous - * operators in the data flow - * 3) Merge multiple sequential filter predicates into so that plans are more readable - * 4) Remove predicates from filter operators that have been pushed. Currently these pushed - * predicates are evaluated twice. + * Finally the TableScan operator gathers all the predicates and inserts a + * filter operator after itself. TODO: Further optimizations 1) Multi-insert + * case 2) Create a filter operator for those predicates that couldn't be pushed + * to the previous operators in the data flow 3) Merge multiple sequential + * filter predicates into so that plans are more readable 4) Remove predicates + * from filter operators that have been pushed. Currently these pushed + * predicates are evaluated twice. */ public class OpProcFactory { /** - * Processor for Script Operator - * Prevents any predicates being pushed + * Processor for Script Operator Prevents any predicates being pushed */ public static class ScriptPPD extends DefaultPPD implements NodeProcessor { @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - LOG.info("Processing for " + nd.getName() + "(" + ((Operator)nd).getIdentifier() + ")"); + LOG.info("Processing for " + nd.getName() + "(" + + ((Operator) nd).getIdentifier() + ")"); // script operator is a black-box to hive so no optimization here // assuming that nothing can be pushed above the script op // same with LIMIT op @@ -84,16 +82,18 @@ } /** - * Combines predicates of its child into a single expression and adds a filter op as new child + * Combines predicates of its child into a single expression and adds a filter + * op as new child */ public static class TableScanPPD extends DefaultPPD implements NodeProcessor { @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - LOG.info("Processing for " + nd.getName() + "(" + ((Operator)nd).getIdentifier() + ")"); - OpWalkerInfo owi = (OpWalkerInfo)procCtx; - TableScanOperator tsOp = (TableScanOperator)nd; + LOG.info("Processing for " + nd.getName() + "(" + + ((Operator) nd).getIdentifier() + ")"); + OpWalkerInfo owi = (OpWalkerInfo) procCtx; + TableScanOperator tsOp = (TableScanOperator) nd; mergeWithChildrenPred(tsOp, owi, null, null, false); ExprWalkerInfo pushDownPreds = owi.getPrunedPreds(tsOp); return createFilter(tsOp, pushDownPreds, owi); @@ -102,24 +102,29 @@ } /** - * Determines the push down predicates in its where expression and then combines it with - * the push down predicates that are passed from its children + * Determines the push down predicates in its where expression and then + * combines it with the push down predicates that are passed from its children */ public static class FilterPPD extends DefaultPPD implements NodeProcessor { @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - LOG.info("Processing for " + nd.getName() + "(" + ((Operator)nd).getIdentifier() + ")"); - OpWalkerInfo owi = (OpWalkerInfo)procCtx; + LOG.info("Processing for " + nd.getName() + "(" + + ((Operator) nd).getIdentifier() + ")"); + OpWalkerInfo owi = (OpWalkerInfo) procCtx; Operator op = (Operator) nd; - exprNodeDesc predicate = (((FilterOperator)nd).getConf()).getPredicate(); + exprNodeDesc predicate = (((FilterOperator) nd).getConf()).getPredicate(); // get pushdown predicates for this operator's predicate - ExprWalkerInfo ewi = ExprWalkerProcFactory.extractPushdownPreds(owi, op, predicate); + ExprWalkerInfo ewi = ExprWalkerProcFactory.extractPushdownPreds(owi, op, + predicate); if (!ewi.isDeterministic()) { /* predicate is not deterministic */ - if (op.getChildren() != null && op.getChildren().size() == 1) - createFilter(op, owi.getPrunedPreds((Operator)(op.getChildren().get(0))), owi); + if (op.getChildren() != null && op.getChildren().size() == 1) { + createFilter(op, owi + .getPrunedPreds((Operator) (op + .getChildren().get(0))), owi); + } return null; } @@ -128,43 +133,48 @@ owi.putPrunedPreds(op, ewi); // merge it with children predicates mergeWithChildrenPred(op, owi, ewi, null, false); - + return null; } } /** - * Determines predicates for which alias can be pushed to it's parents. - * See the comments for getQualifiedAliases function + * Determines predicates for which alias can be pushed to it's parents. See + * the comments for getQualifiedAliases function */ public static class JoinPPD extends DefaultPPD implements NodeProcessor { @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - LOG.info("Processing for " + nd.getName() + "(" + ((Operator)nd).getIdentifier() + ")"); - OpWalkerInfo owi = (OpWalkerInfo)procCtx; - Set aliases = getQualifiedAliases((JoinOperator) nd, owi.getRowResolver(nd)); + LOG.info("Processing for " + nd.getName() + "(" + + ((Operator) nd).getIdentifier() + ")"); + OpWalkerInfo owi = (OpWalkerInfo) procCtx; + Set aliases = getQualifiedAliases((JoinOperator) nd, owi + .getRowResolver(nd)); mergeWithChildrenPred(nd, owi, null, aliases, false); return null; } /** - * Figures out the aliases for whom it is safe to push predicates based on ANSI SQL semantics - * For inner join, all predicates for all aliases can be pushed - * For full outer join, none of the predicates can be pushed as that would limit the number of - * rows for join - * For left outer join, all the predicates on the left side aliases can be pushed up - * For right outer join, all the predicates on the right side aliases can be pushed up - * Joins chain containing both left and right outer joins are treated as full outer join. - * TODO: further optimization opportunity for the case a.c1 = b.c1 and b.c2 = c.c2 - * a and b are first joined and then the result with c. But the second join op currently - * treats a and b as separate aliases and thus disallowing predicate expr containing both - * tables a and b (such as a.c3 + a.c4 > 20). Such predicates also can be pushed just above - * the second join and below the first join - * - * @param op Join Operator - * @param rr Row resolver - * @return set of qualified aliases + * Figures out the aliases for whom it is safe to push predicates based on + * ANSI SQL semantics For inner join, all predicates for all aliases can be + * pushed For full outer join, none of the predicates can be pushed as that + * would limit the number of rows for join For left outer join, all the + * predicates on the left side aliases can be pushed up For right outer + * join, all the predicates on the right side aliases can be pushed up Joins + * chain containing both left and right outer joins are treated as full + * outer join. TODO: further optimization opportunity for the case a.c1 = + * b.c1 and b.c2 = c.c2 a and b are first joined and then the result with c. + * But the second join op currently treats a and b as separate aliases and + * thus disallowing predicate expr containing both tables a and b (such as + * a.c3 + a.c4 > 20). Such predicates also can be pushed just above the + * second join and below the first join + * + * @param op + * Join Operator + * @param rr + * Row resolver + * @return set of qualified aliases */ private Set getQualifiedAliases(JoinOperator op, RowResolver rr) { Set aliases = new HashSet(); @@ -173,26 +183,34 @@ boolean oj = false; joinCond[] conds = op.getConf().getConds(); Map> posToAliasMap = op.getPosToAliasMap(); - for(joinCond jc : conds) { - if(jc.getType() == joinDesc.FULL_OUTER_JOIN) { + for (joinCond jc : conds) { + if (jc.getType() == joinDesc.FULL_OUTER_JOIN) { oj = true; break; - } else if(jc.getType() == joinDesc.LEFT_OUTER_JOIN) { - if(jc.getLeft() < loj) loj = jc.getLeft(); - } else if(jc.getType() == joinDesc.RIGHT_OUTER_JOIN) { - if(jc.getRight() > roj) roj = jc.getRight(); + } else if (jc.getType() == joinDesc.LEFT_OUTER_JOIN) { + if (jc.getLeft() < loj) { + loj = jc.getLeft(); + } + } else if (jc.getType() == joinDesc.RIGHT_OUTER_JOIN) { + if (jc.getRight() > roj) { + roj = jc.getRight(); + } } } - if(oj || (loj != Integer.MAX_VALUE && roj != -1)) return aliases; + if (oj || (loj != Integer.MAX_VALUE && roj != -1)) { + return aliases; + } for (Entry> pa : posToAliasMap.entrySet()) { - if(loj != Integer.MAX_VALUE) { - if(pa.getKey() <= loj) + if (loj != Integer.MAX_VALUE) { + if (pa.getKey() <= loj) { aliases.addAll(pa.getValue()); - } else if(roj != -1) { - if(pa.getKey() >= roj) + } + } else if (roj != -1) { + if (pa.getKey() >= roj) { aliases.addAll(pa.getValue()); + } } else { - aliases.addAll(pa.getValue()); + aliases.addAll(pa.getValue()); } } Set aliases2 = rr.getTableNames(); @@ -203,19 +221,20 @@ /** * Processor for ReduceSink operator. - * + * */ public static class ReduceSinkPPD extends DefaultPPD implements NodeProcessor { @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - LOG.info("Processing for " + nd.getName() + "(" + ((Operator)nd).getIdentifier() + ")"); - OpWalkerInfo owi = (OpWalkerInfo)procCtx; + LOG.info("Processing for " + nd.getName() + "(" + + ((Operator) nd).getIdentifier() + ")"); + OpWalkerInfo owi = (OpWalkerInfo) procCtx; Set aliases = owi.getRowResolver(nd).getTableNames(); boolean ignoreAliases = false; - if(aliases.size() == 1 && aliases.contains("")) { + if (aliases.size() == 1 && aliases.contains("")) { // Reduce sink of group by operator - ignoreAliases = true; + ignoreAliases = true; } mergeWithChildrenPred(nd, owi, null, aliases, ignoreAliases); return null; @@ -228,13 +247,15 @@ */ public static class DefaultPPD implements NodeProcessor { - protected static final Log LOG = LogFactory.getLog(OpProcFactory.class.getName()); - + protected static final Log LOG = LogFactory.getLog(OpProcFactory.class + .getName()); + @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - LOG.info("Processing for " + nd.getName() + "(" + ((Operator)nd).getIdentifier() + ")"); - mergeWithChildrenPred(nd, (OpWalkerInfo)procCtx, null, null, false); + LOG.info("Processing for " + nd.getName() + "(" + + ((Operator) nd).getIdentifier() + ")"); + mergeWithChildrenPred(nd, (OpWalkerInfo) procCtx, null, null, false); return null; } @@ -243,41 +264,58 @@ * @param ewi */ protected void logExpr(Node nd, ExprWalkerInfo ewi) { - for (Entry> e : ewi.getFinalCandidates().entrySet()) { - LOG.info("Pushdown Predicates of " + nd.getName() + " For Alias : " + e.getKey() ); + for (Entry> e : ewi.getFinalCandidates() + .entrySet()) { + LOG.info("Pushdown Predicates of " + nd.getName() + " For Alias : " + + e.getKey()); for (exprNodeDesc n : e.getValue()) { - LOG.info("\t" + n.getExprString()); + LOG.info("\t" + n.getExprString()); } } } /** - * Take current operators pushdown predicates and merges them with children's pushdown predicates - * @param nd current operator - * @param owi operator context during this walk - * @param ewi pushdown predicates (part of expression walker info) - * @param aliases aliases that this operator can pushdown. null means that all aliases can be pushed down - * @param ignoreAliases - * @throws SemanticException + * Take current operators pushdown predicates and merges them with + * children's pushdown predicates + * + * @param nd + * current operator + * @param owi + * operator context during this walk + * @param ewi + * pushdown predicates (part of expression walker info) + * @param aliases + * aliases that this operator can pushdown. null means that all + * aliases can be pushed down + * @param ignoreAliases + * @throws SemanticException */ - protected void mergeWithChildrenPred(Node nd, OpWalkerInfo owi, ExprWalkerInfo ewi, Set aliases, boolean ignoreAliases) throws SemanticException { - if(nd.getChildren() == null || nd.getChildren().size() > 1) { + protected void mergeWithChildrenPred(Node nd, OpWalkerInfo owi, + ExprWalkerInfo ewi, Set aliases, boolean ignoreAliases) + throws SemanticException { + if (nd.getChildren() == null || nd.getChildren().size() > 1) { // ppd for multi-insert query is not yet implemented // no-op for leafs return; } Operator op = (Operator) nd; - ExprWalkerInfo childPreds = owi.getPrunedPreds((Operator) nd.getChildren().get(0)); - if(childPreds == null) { + ExprWalkerInfo childPreds = owi + .getPrunedPreds((Operator) nd.getChildren() + .get(0)); + if (childPreds == null) { return; } - if(ewi == null) { + if (ewi == null) { ewi = new ExprWalkerInfo(); } - for (Entry> e : childPreds.getFinalCandidates().entrySet()) { - if(ignoreAliases || aliases == null || aliases.contains(e.getKey()) || e.getKey() == null) { - // e.getKey() (alias) can be null in case of constant expressions. see input8.q - ExprWalkerInfo extractPushdownPreds = ExprWalkerProcFactory.extractPushdownPreds(owi, op, e.getValue()); + for (Entry> e : childPreds + .getFinalCandidates().entrySet()) { + if (ignoreAliases || aliases == null || aliases.contains(e.getKey()) + || e.getKey() == null) { + // e.getKey() (alias) can be null in case of constant expressions. see + // input8.q + ExprWalkerInfo extractPushdownPreds = ExprWalkerProcFactory + .extractPushdownPreds(owi, op, e.getValue()); ewi.merge(extractPushdownPreds); logExpr(nd, extractPushdownPreds); } @@ -286,19 +324,20 @@ } } - protected static Object createFilter(Operator op, ExprWalkerInfo pushDownPreds, OpWalkerInfo owi) { - if (pushDownPreds == null - || pushDownPreds.getFinalCandidates() == null + protected static Object createFilter(Operator op, + ExprWalkerInfo pushDownPreds, OpWalkerInfo owi) { + if (pushDownPreds == null || pushDownPreds.getFinalCandidates() == null || pushDownPreds.getFinalCandidates().size() == 0) { return null; } - + RowResolver inputRR = owi.getRowResolver(op); // combine all predicates into a single expression List preds = null; - exprNodeDesc condn = null; - Iterator> iterator = pushDownPreds.getFinalCandidates().values().iterator(); + exprNodeDesc condn = null; + Iterator> iterator = pushDownPreds.getFinalCandidates() + .values().iterator(); while (iterator.hasNext()) { preds = iterator.next(); int i = 0; @@ -307,31 +346,30 @@ i++; } - for(; i < preds.size(); i++) { + for (; i < preds.size(); i++) { List children = new ArrayList(2); children.add(condn); - children.add((exprNodeDesc) preds.get(i)); - condn = new exprNodeGenericFuncDesc( - TypeInfoFactory.booleanTypeInfo, - FunctionRegistry.getGenericUDFForAnd(), - children - ); + children.add(preds.get(i)); + condn = new exprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getGenericUDFForAnd(), children); } } - if(condn == null) + if (condn == null) { return null; + } // add new filter op - List> originalChilren = op.getChildOperators(); + List> originalChilren = op + .getChildOperators(); op.setChildOperators(null); - Operator output = - OperatorFactory.getAndMakeChild(new filterDesc(condn, false), - new RowSchema(inputRR.getColumnInfos()), - op); + Operator output = OperatorFactory.getAndMakeChild( + new filterDesc(condn, false), new RowSchema(inputRR.getColumnInfos()), + op); output.setChildOperators(originalChilren); for (Operator ch : originalChilren) { - List> parentOperators = ch.getParentOperators(); + List> parentOperators = ch + .getParentOperators(); int pos = parentOperators.indexOf(op); assert pos != -1; parentOperators.remove(pos); @@ -341,7 +379,7 @@ owi.put(output, ctx); return output; } - + public static NodeProcessor getFilterProc() { return new FilterPPD(); } Index: ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (working copy) @@ -18,29 +18,33 @@ package org.apache.hadoop.hive.ql.session; -import java.io.*; -import java.util.*; +import java.io.IOException; +import java.io.InputStream; +import java.io.PrintStream; +import java.net.URL; +import java.util.Calendar; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.log4j.*; -import java.net.URL; - import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.history.HiveHistory; +import org.apache.log4j.LogManager; +import org.apache.log4j.PropertyConfigurator; -import org.apache.commons.lang.StringUtils; - /** * SessionState encapsulates common data associated with a session - * - * Also provides support for a thread static session object that can - * be accessed from any point in the code to interact with the user - * and to retrieve configuration information + * + * Also provides support for a thread static session object that can be accessed + * from any point in the code to interact with the user and to retrieve + * configuration information */ public class SessionState { @@ -55,7 +59,7 @@ protected boolean isSilent; /* - * HiveHistory Object + * HiveHistory Object */ protected HiveHistory hiveHist; /** @@ -70,9 +74,10 @@ */ private String commandType; + public HiveConf getConf() { + return conf; + } - public HiveConf getConf() { return conf; } - public void setConf(HiveConf conf) { this.conf = conf; } @@ -89,7 +94,7 @@ this(null); } - public SessionState (HiveConf conf) { + public SessionState(HiveConf conf) { this.conf = conf; } @@ -102,7 +107,6 @@ return (conf.getVar(HiveConf.ConfVars.HIVEQUERYSTRING)); } - public String getQueryId() { return (conf.getVar(HiveConf.ConfVars.HIVEQUERYID)); } @@ -113,15 +117,15 @@ /** * Singleton Session object per thread. - * + * **/ - private static ThreadLocal tss = new ThreadLocal (); + private static ThreadLocal tss = new ThreadLocal(); /** * start a new session and set it to current session */ public static SessionState start(HiveConf conf) { - SessionState ss = new SessionState (conf); + SessionState ss = new SessionState(conf); ss.getConf().setVar(HiveConf.ConfVars.HIVESESSIONID, makeSessionId()); ss.hiveHist = new HiveHistory(ss); tss.set(ss); @@ -129,18 +133,20 @@ } /** - * set current session to existing session object - * if a thread is running multiple sessions - it must call this method with the new - * session object when switching from one session to another + * set current session to existing session object if a thread is running + * multiple sessions - it must call this method with the new session object + * when switching from one session to another */ public static SessionState start(SessionState startSs) { tss.set(startSs); - if(StringUtils.isEmpty(startSs.getConf().getVar(HiveConf.ConfVars.HIVESESSIONID))) { - startSs.getConf().setVar(HiveConf.ConfVars.HIVESESSIONID, makeSessionId()); + if (StringUtils.isEmpty(startSs.getConf().getVar( + HiveConf.ConfVars.HIVESESSIONID))) { + startSs.getConf() + .setVar(HiveConf.ConfVars.HIVESESSIONID, makeSessionId()); } - if (startSs.hiveHist == null){ + if (startSs.hiveHist == null) { startSs.hiveHist = new HiveHistory(startSs); } return startSs; @@ -153,34 +159,32 @@ return tss.get(); } - /** * get hiveHitsory object which does structured logging + * * @return The hive history object */ - public HiveHistory getHiveHistory(){ + public HiveHistory getHiveHistory() { return hiveHist; } - private static String makeSessionId() { GregorianCalendar gc = new GregorianCalendar(); String userid = System.getProperty("user.name"); - return userid + "_" + - String.format("%1$4d%2$02d%3$02d%4$02d%5$02d", gc.get(Calendar.YEAR), - gc.get(Calendar.MONTH) + 1, - gc.get(Calendar.DAY_OF_MONTH), - gc.get(Calendar.HOUR_OF_DAY), - gc.get(Calendar.MINUTE)); + return userid + + "_" + + String.format("%1$4d%2$02d%3$02d%4$02d%5$02d", gc.get(Calendar.YEAR), + gc.get(Calendar.MONTH) + 1, gc.get(Calendar.DAY_OF_MONTH), gc + .get(Calendar.HOUR_OF_DAY), gc.get(Calendar.MINUTE)); } public static final String HIVE_L4J = "hive-log4j.properties"; - public static void initHiveLog4j () { + public static void initHiveLog4j() { // allow hive log4j to override any normal initialized one URL hive_l4j = SessionState.class.getClassLoader().getResource(HIVE_L4J); - if(hive_l4j == null) { + if (hive_l4j == null) { System.out.println(HIVE_L4J + " not found"); } else { LogManager.resetConfiguration(); @@ -189,14 +193,17 @@ } /** - * This class provides helper routines to emit informational and error messages to the user - * and log4j files while obeying the current session's verbosity levels. - * - * NEVER write directly to the SessionStates standard output other than to emit result data - * DO use printInfo and printError provided by LogHelper to emit non result data strings - * - * It is perfectly acceptable to have global static LogHelper objects (for example - once per module) - * LogHelper always emits info/error to current session as required. + * This class provides helper routines to emit informational and error + * messages to the user and log4j files while obeying the current session's + * verbosity levels. + * + * NEVER write directly to the SessionStates standard output other than to + * emit result data DO use printInfo and printError provided by LogHelper to + * emit non result data strings + * + * It is perfectly acceptable to have global static LogHelper objects (for + * example - once per module) LogHelper always emits info/error to current + * session as required. */ public static class LogHelper { @@ -233,7 +240,7 @@ } public void printInfo(String info, String detail) { - if(!getIsSilent()) { + if (!getIsSilent()) { getErrStream().println(info); } LOG.info(info + StringUtils.defaultString(detail)); @@ -250,11 +257,12 @@ } private static LogHelper _console; + /** * initialize or retrieve console object for SessionState */ public static LogHelper getConsole() { - if(_console == null) { + if (_console == null) { Log LOG = LogFactory.getLog("SessionState"); _console = new LogHelper(LOG); } @@ -267,15 +275,16 @@ Configuration conf = (ss == null) ? new Configuration() : ss.getConf(); try { - if(Utilities.realFile(newFile, conf) != null) + if (Utilities.realFile(newFile, conf) != null) { return newFile; - else { + } else { console.printError(newFile + " does not exist"); return null; } } catch (IOException e) { - console.printError("Unable to validate " + newFile + "\nException: " + e.getMessage(), - "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); + console.printError("Unable to validate " + newFile + "\nException: " + + e.getMessage(), "\n" + + org.apache.hadoop.util.StringUtils.stringifyException(e)); return null; } } @@ -289,8 +298,9 @@ console.printInfo("Added " + newJar + " to class path"); return true; } catch (Exception e) { - console.printError("Unable to register " + newJar + "\nException: " + e.getMessage(), - "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); + console.printError("Unable to register " + newJar + "\nException: " + + e.getMessage(), "\n" + + org.apache.hadoop.util.StringUtils.stringifyException(e)); return false; } } @@ -302,40 +312,55 @@ console.printInfo("Deleted " + jarsToUnregister + " from class path"); return true; } catch (Exception e) { - console.printError("Unable to unregister " + jarsToUnregister + "\nException: " + e.getMessage(), - "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); + console.printError("Unable to unregister " + jarsToUnregister + + "\nException: " + e.getMessage(), "\n" + + org.apache.hadoop.util.StringUtils.stringifyException(e)); return false; } } public static interface ResourceHook { public String preHook(Set cur, String s); + public boolean postHook(Set cur, String s); } public static enum ResourceType { - FILE(new ResourceHook () { - public String preHook(Set cur, String s) { return validateFile(cur, s); } - public boolean postHook(Set cur, String s) { return true; } - }), + FILE(new ResourceHook() { + public String preHook(Set cur, String s) { + return validateFile(cur, s); + } - JAR(new ResourceHook () { - public String preHook(Set cur, String s) { - String newJar = validateFile(cur, s); - if(newJar != null) { - return (registerJar(newJar) ? newJar : null); - } else { - return null; - } + public boolean postHook(Set cur, String s) { + return true; + } + }), + + JAR(new ResourceHook() { + public String preHook(Set cur, String s) { + String newJar = validateFile(cur, s); + if (newJar != null) { + return (registerJar(newJar) ? newJar : null); + } else { + return null; } - public boolean postHook(Set cur, String s) { return unregisterJar(s); } - }), + } - ARCHIVE(new ResourceHook () { - public String preHook(Set cur, String s) { return validateFile(cur, s); } - public boolean postHook(Set cur, String s) { return true; } - }); + public boolean postHook(Set cur, String s) { + return unregisterJar(s); + } + }), + ARCHIVE(new ResourceHook() { + public String preHook(Set cur, String s) { + return validateFile(cur, s); + } + + public boolean postHook(Set cur, String s) { + return true; + } + }); + public ResourceHook hook; ResourceType(ResourceHook hook) { @@ -353,8 +378,8 @@ } // try singular - if(s.endsWith("S")) { - s = s.substring(0, s.length()-1); + if (s.endsWith("S")) { + s = s.substring(0, s.length() - 1); } else { return null; } @@ -366,44 +391,46 @@ return null; } - private HashMap> resource_map = new HashMap> (); + private final HashMap> resource_map = new HashMap>(); public void add_resource(ResourceType t, String value) { - if(resource_map.get(t) == null) { - resource_map.put(t, new HashSet ()); + if (resource_map.get(t) == null) { + resource_map.put(t, new HashSet()); } String fnlVal = value; - if(t.hook != null) { + if (t.hook != null) { fnlVal = t.hook.preHook(resource_map.get(t), value); - if(fnlVal == null) + if (fnlVal == null) { return; + } } resource_map.get(t).add(fnlVal); } public boolean delete_resource(ResourceType t, String value) { - if(resource_map.get(t) == null) { + if (resource_map.get(t) == null) { return false; } - if(t.hook != null) { - if(!t.hook.postHook(resource_map.get(t), value)) + if (t.hook != null) { + if (!t.hook.postHook(resource_map.get(t), value)) { return false; + } } return (resource_map.get(t).remove(value)); } public Set list_resource(ResourceType t, List filter) { - if(resource_map.get(t) == null) { + if (resource_map.get(t) == null) { return null; } Set orig = resource_map.get(t); - if(filter == null) { + if (filter == null) { return orig; } else { - Set fnl = new HashSet (); - for(String one: orig) { - if(filter.contains(one)) { + Set fnl = new HashSet(); + for (String one : orig) { + if (filter.contains(one)) { fnl.add(one); } } @@ -412,11 +439,11 @@ } public void delete_resource(ResourceType t) { - if(resource_map.get(t) != null) { - for(String value : resource_map.get(t)) { + if (resource_map.get(t) != null) { + for (String value : resource_map.get(t)) { delete_resource(t, value); } - resource_map.remove (t); + resource_map.remove(t); } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/NumericUDAF.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/NumericUDAF.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/NumericUDAF.java (working copy) @@ -19,7 +19,8 @@ package org.apache.hadoop.hive.ql.exec; /** - * Base class of numeric UDAFs like sum and avg which need a NumericUDAFEvaluatorResolver. + * Base class of numeric UDAFs like sum and avg which need a + * NumericUDAFEvaluatorResolver. */ public class NumericUDAF extends UDAF { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeNullEvaluator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeNullEvaluator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeNullEvaluator.java (working copy) @@ -35,7 +35,7 @@ @Override public ObjectInspector initialize(ObjectInspector rowInspector) - throws HiveException { + throws HiveException { return PrimitiveObjectInspectorFactory.writableVoidObjectInspector; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/UDAFEvaluator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/UDAFEvaluator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/UDAFEvaluator.java (working copy) @@ -19,10 +19,10 @@ package org.apache.hadoop.hive.ql.exec; /** - * Interface that encapsulates the evaluation logic of a UDAF. One evaluator is needed - * for every overloaded form of a UDAF .e.g max and min UDAFs would have evaluators for - * integer, string and other types. On the other hand avg would have an evaluator only - * for the double type. + * Interface that encapsulates the evaluation logic of a UDAF. One evaluator is + * needed for every overloaded form of a UDAF .e.g max and min UDAFs would have + * evaluators for integer, string and other types. On the other hand avg would + * have an evaluator only for the double type. */ public interface UDAFEvaluator { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/UDFArgumentTypeException.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/UDFArgumentTypeException.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/UDFArgumentTypeException.java (working copy) @@ -18,19 +18,18 @@ package org.apache.hadoop.hive.ql.exec; - -/** +/** * exception class, thrown when udf arguments have wrong types. */ public class UDFArgumentTypeException extends UDFArgumentException { - + int argumentId; - + public UDFArgumentTypeException() { super(); } - + public UDFArgumentTypeException(int argumentId, String message) { super(message); this.argumentId = argumentId; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionTask.java (working copy) @@ -40,17 +40,17 @@ private static final Log LOG = LogFactory.getLog("hive.ql.exec.FunctionTask"); transient HiveConf conf; - + public FunctionTask() { super(); } - + @Override public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext ctx) { super.initialize(conf, queryPlan, ctx); this.conf = conf; } - + @Override public int execute() { createFunctionDesc createFunctionDesc = work.getCreateFunctionDesc(); @@ -68,25 +68,26 @@ private int createFunction(createFunctionDesc createFunctionDesc) { try { Class udfClass = getUdfClass(createFunctionDesc); - if(UDF.class.isAssignableFrom(udfClass)) { - FunctionRegistry.registerTemporaryUDF(createFunctionDesc.getFunctionName(), - (Class) udfClass, false); + if (UDF.class.isAssignableFrom(udfClass)) { + FunctionRegistry.registerTemporaryUDF(createFunctionDesc + .getFunctionName(), (Class) udfClass, false); return 0; - } else if(GenericUDF.class.isAssignableFrom(udfClass)) { - FunctionRegistry.registerTemporaryGenericUDF(createFunctionDesc.getFunctionName(), - (Class) udfClass); + } else if (GenericUDF.class.isAssignableFrom(udfClass)) { + FunctionRegistry.registerTemporaryGenericUDF(createFunctionDesc + .getFunctionName(), (Class) udfClass); return 0; - } else if(GenericUDTF.class.isAssignableFrom(udfClass)) { - FunctionRegistry.registerTemporaryGenericUDTF(createFunctionDesc.getFunctionName(), - (Class) udfClass); + } else if (GenericUDTF.class.isAssignableFrom(udfClass)) { + FunctionRegistry.registerTemporaryGenericUDTF(createFunctionDesc + .getFunctionName(), (Class) udfClass); return 0; - } else if(UDAF.class.isAssignableFrom(udfClass)) { - FunctionRegistry.registerTemporaryUDAF(createFunctionDesc.getFunctionName(), - (Class) udfClass); + } else if (UDAF.class.isAssignableFrom(udfClass)) { + FunctionRegistry.registerTemporaryUDAF(createFunctionDesc + .getFunctionName(), (Class) udfClass); return 0; - } else if(GenericUDAFResolver.class.isAssignableFrom(udfClass)) { - FunctionRegistry.registerTemporaryGenericUDAF(createFunctionDesc.getFunctionName(), - (GenericUDAFResolver)ReflectionUtils.newInstance(udfClass, null)); + } else if (GenericUDAFResolver.class.isAssignableFrom(udfClass)) { + FunctionRegistry.registerTemporaryGenericUDAF(createFunctionDesc + .getFunctionName(), (GenericUDAFResolver) ReflectionUtils + .newInstance(udfClass, null)); return 0; } return 1; @@ -99,7 +100,8 @@ private int dropFunction(dropFunctionDesc dropFunctionDesc) { try { - FunctionRegistry.unregisterTemporaryUDF(dropFunctionDesc.getFunctionName()); + FunctionRegistry.unregisterTemporaryUDF(dropFunctionDesc + .getFunctionName()); return 0; } catch (HiveException e) { LOG.info("drop function: " + StringUtils.stringifyException(e)); @@ -112,7 +114,8 @@ throws ClassNotFoundException { return Class.forName(desc.getClassName(), true, JavaUtils.getClassLoader()); } - + + @Override public int getType() { return StageType.FUNC; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (working copy) @@ -33,132 +33,148 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; - /** * Join operator implementation. */ -public class JoinOperator extends CommonJoinOperator implements Serializable { +public class JoinOperator extends CommonJoinOperator implements + Serializable { private static final long serialVersionUID = 1L; - + private transient SkewJoinHandler skewJoinKeyContext = null; - + @Override protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); initializeChildren(hconf); - if(this.handleSkewJoin) { + if (handleSkewJoin) { skewJoinKeyContext = new SkewJoinHandler(this); skewJoinKeyContext.initiliaze(hconf); } } - - public void processOp(Object row, int tag) - throws HiveException { + + @Override + public void processOp(Object row, int tag) throws HiveException { try { - + // get alias - alias = (byte)tag; - - if ((lastAlias == null) || (!lastAlias.equals(alias))) + alias = (byte) tag; + + if ((lastAlias == null) || (!lastAlias.equals(alias))) { nextSz = joinEmitInterval; - - ArrayList nr = computeValues(row, joinValues.get(alias), joinValuesObjectInspectors.get(alias)); - - if(this.handleSkewJoin) + } + + ArrayList nr = computeValues(row, joinValues.get(alias), + joinValuesObjectInspectors.get(alias)); + + if (handleSkewJoin) { skewJoinKeyContext.handleSkew(tag); - + } + // number of rows for the key in the given table int sz = storage.get(alias).size(); - + // Are we consuming too much memory if (alias == numAliases - 1) { if (sz == joinEmitInterval) { - // The input is sorted by alias, so if we are already in the last join operand, + // The input is sorted by alias, so if we are already in the last join + // operand, // we can emit some results now. - // Note this has to be done before adding the current row to the storage, + // Note this has to be done before adding the current row to the + // storage, // to preserve the correctness for outer joins. checkAndGenObject(); storage.get(alias).clear(); } } else { if (sz == nextSz) { - // Output a warning if we reached at least 1000 rows for a join operand + // Output a warning if we reached at least 1000 rows for a join + // operand // We won't output a warning for the last join operand since the size // will never goes to joinEmitInterval. - StructObjectInspector soi = (StructObjectInspector)inputObjInspectors[tag]; - StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString()); + StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag]; + StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY + .toString()); Object keyObject = soi.getStructFieldData(row, sf); - LOG.warn("table " + alias + " has " + sz + " rows for join key " + keyObject); + LOG.warn("table " + alias + " has " + sz + " rows for join key " + + keyObject); nextSz = getNextSize(nextSz); } } - + // Add the value to the vector storage.get(alias).add(nr); - + } catch (Exception e) { e.printStackTrace(); throw new HiveException(e); } } - + + @Override public int getType() { return OperatorType.JOIN; } /** * All done - * + * */ + @Override public void closeOp(boolean abort) throws HiveException { - if (this.handleSkewJoin) { + if (handleSkewJoin) { skewJoinKeyContext.close(abort); } super.closeOp(abort); } - + @Override - public void jobClose(Configuration hconf, boolean success) throws HiveException { - if(this.handleSkewJoin) { + public void jobClose(Configuration hconf, boolean success) + throws HiveException { + if (handleSkewJoin) { try { for (int i = 0; i < numAliases; i++) { - String specPath = this.conf.getBigKeysDirMap().get((byte)i); + String specPath = conf.getBigKeysDirMap().get((byte) i); FileSinkOperator.mvFileToFinalPath(specPath, hconf, success, LOG); for (int j = 0; j < numAliases; j++) { - if(j == i) continue; - specPath = getConf().getSmallKeysDirMap().get((byte)i).get((byte)j); + if (j == i) { + continue; + } + specPath = getConf().getSmallKeysDirMap().get((byte) i).get( + (byte) j); FileSinkOperator.mvFileToFinalPath(specPath, hconf, success, LOG); } } - - if(success) { - //move up files + + if (success) { + // move up files for (int i = 0; i < numAliases; i++) { - String specPath = this.conf.getBigKeysDirMap().get((byte)i); + String specPath = conf.getBigKeysDirMap().get((byte) i); moveUpFiles(specPath, hconf, LOG); for (int j = 0; j < numAliases; j++) { - if(j == i) continue; - specPath = getConf().getSmallKeysDirMap().get((byte)i).get((byte)j); + if (j == i) { + continue; + } + specPath = getConf().getSmallKeysDirMap().get((byte) i).get( + (byte) j); moveUpFiles(specPath, hconf, LOG); } } } } catch (IOException e) { - throw new HiveException (e); + throw new HiveException(e); } } super.jobClose(hconf, success); } - - - - private void moveUpFiles(String specPath, Configuration hconf, Log log) throws IOException, HiveException { + + private void moveUpFiles(String specPath, Configuration hconf, Log log) + throws IOException, HiveException { FileSystem fs = (new Path(specPath)).getFileSystem(hconf); Path finalPath = new Path(specPath); - - if(fs.exists(finalPath)) { + + if (fs.exists(finalPath)) { FileStatus[] taskOutputDirs = fs.listStatus(finalPath); - if(taskOutputDirs != null ) { + if (taskOutputDirs != null) { for (FileStatus dir : taskOutputDirs) { Utilities.renameOrMoveFiles(fs, dir.getPath(), finalPath); fs.delete(dir.getPath(), true); @@ -166,27 +182,26 @@ } } } - + /** * Forward a record of join results. - * + * * @throws HiveException */ + @Override public void endGroup() throws HiveException { - //if this is a skew key, we need to handle it in a separate map reduce job. - if(this.handleSkewJoin && skewJoinKeyContext.currBigKeyTag >=0) { + // if this is a skew key, we need to handle it in a separate map reduce job. + if (handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0) { try { skewJoinKeyContext.endGroup(); } catch (IOException e) { - LOG.error(e.getMessage(),e); + LOG.error(e.getMessage(), e); throw new HiveException(e); } return; - } - else { + } else { checkAndGenObject(); } } - + } - Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java (working copy) @@ -44,41 +44,44 @@ private int maxRows = 100; private FetchOperator ftOp; - private LazySimpleSerDe mSerde; - private int totalRows; - - public FetchTask() { - super(); - } - - @Override - public void initialize (HiveConf conf, QueryPlan queryPlan, DriverContext ctx) { + private LazySimpleSerDe mSerde; + private int totalRows; + + public FetchTask() { + super(); + } + + @Override + public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext ctx) { super.initialize(conf, queryPlan, ctx); - - try { - // Create a file system handle - JobConf job = new JobConf(conf, ExecDriver.class); - mSerde = new LazySimpleSerDe(); - Properties mSerdeProp = new Properties(); - mSerdeProp.put(Constants.SERIALIZATION_FORMAT, "" + Utilities.tabCode); - mSerdeProp.put(Constants.SERIALIZATION_NULL_FORMAT, ((fetchWork)work).getSerializationNullFormat()); - mSerde.initialize(job, mSerdeProp); - mSerde.setUseJSONSerialize(true); - - ftOp = new FetchOperator(work, job); + try { + // Create a file system handle + JobConf job = new JobConf(conf, ExecDriver.class); + + mSerde = new LazySimpleSerDe(); + Properties mSerdeProp = new Properties(); + mSerdeProp.put(Constants.SERIALIZATION_FORMAT, "" + Utilities.tabCode); + mSerdeProp.put(Constants.SERIALIZATION_NULL_FORMAT, (work) + .getSerializationNullFormat()); + mSerde.initialize(job, mSerdeProp); + mSerde.setUseJSONSerialize(true); + + ftOp = new FetchOperator(work, job); } catch (Exception e) { // Bail out ungracefully - we should never hit // this here - but would have hit it in SemanticAnalyzer LOG.error(StringUtils.stringifyException(e)); - throw new RuntimeException (e); + throw new RuntimeException(e); } } - + + @Override public int execute() { assert false; return 0; } + /** * Return the tableDesc of the fetchWork */ @@ -99,41 +102,43 @@ public void setMaxRows(int maxRows) { this.maxRows = maxRows; } - + + @Override public boolean fetch(Vector res) throws IOException { try { int numRows = 0; int rowsRet = maxRows; - if ((work.getLimit() >= 0) && ((work.getLimit() - totalRows) < rowsRet)) + if ((work.getLimit() >= 0) && ((work.getLimit() - totalRows) < rowsRet)) { rowsRet = work.getLimit() - totalRows; + } if (rowsRet <= 0) { ftOp.clearFetchContext(); return false; } - while (numRows < rowsRet) { - InspectableObject io = ftOp.getNextRow(); - if (io == null) { - if (numRows == 0) + while (numRows < rowsRet) { + InspectableObject io = ftOp.getNextRow(); + if (io == null) { + if (numRows == 0) { return false; + } totalRows += numRows; return true; - } - - res.add(((Text)mSerde.serialize(io.o, io.oi)).toString()); - numRows++; - } + } + + res.add(((Text) mSerde.serialize(io.o, io.oi)).toString()); + numRows++; + } totalRows += numRows; return true; - } - catch (IOException e) { + } catch (IOException e) { throw e; - } - catch (Exception e) { + } catch (Exception e) { throw new IOException(e); } } - + + @Override public int getType() { return StageType.FETCH; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/UDF.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/UDF.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/UDF.java (working copy) @@ -25,19 +25,17 @@ * * New UDF classes need to inherit from this UDF class. * - * Required for all UDF classes: - * 1. Implement one or more methods named "evaluate" which will be called by Hive. - * The following are some examples: - * public int evaluate(); - * public int evaluate(int a); - * public double evaluate(int a, double b); - * public String evaluate(String a, int b, String c); + * Required for all UDF classes: 1. Implement one or more methods named + * "evaluate" which will be called by Hive. The following are some examples: + * public int evaluate(); public int evaluate(int a); public double evaluate(int + * a, double b); public String evaluate(String a, int b, String c); * - * "evaluate" should never be a void method. However it can return "null" if needed. + * "evaluate" should never be a void method. However it can return "null" if + * needed. */ -@UDFType(deterministic=true) +@UDFType(deterministic = true) public class UDF { - + /** * The resolver to use for method resolution. */ @@ -49,23 +47,24 @@ public UDF() { rslv = new DefaultUDFMethodResolver(this.getClass()); } - + /** * The constructor with user-provided UDFMethodResolver. */ protected UDF(UDFMethodResolver rslv) { this.rslv = rslv; } - + /** * Sets the resolver * - * @param rslv The method resolver to use for method resolution. + * @param rslv + * The method resolver to use for method resolution. */ public void setResolver(UDFMethodResolver rslv) { this.rslv = rslv; } - + /** * Get the method resolver. */ Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeConstantEvaluator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeConstantEvaluator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeConstantEvaluator.java (working copy) @@ -20,7 +20,6 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.exprNodeConstantDesc; -import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; @@ -31,25 +30,25 @@ protected exprNodeConstantDesc expr; transient ObjectInspector writableObjectInspector; transient Object writableValue; - + public ExprNodeConstantEvaluator(exprNodeConstantDesc expr) { this.expr = expr; - PrimitiveCategory pc = ((PrimitiveTypeInfo)expr.getTypeInfo()) + PrimitiveCategory pc = ((PrimitiveTypeInfo) expr.getTypeInfo()) .getPrimitiveCategory(); writableObjectInspector = PrimitiveObjectInspectorFactory .getPrimitiveWritableObjectInspector(pc); - // Convert from Java to Writable + // Convert from Java to Writable writableValue = PrimitiveObjectInspectorFactory - .getPrimitiveJavaObjectInspector(pc) - .getPrimitiveWritableObject(expr.getValue()); + .getPrimitiveJavaObjectInspector(pc).getPrimitiveWritableObject( + expr.getValue()); } @Override public ObjectInspector initialize(ObjectInspector rowInspector) - throws HiveException { + throws HiveException { return writableObjectInspector; } - + @Override public Object evaluate(Object row) throws HiveException { return writableValue; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/UDAFEvaluatorResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/UDAFEvaluatorResolver.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/UDAFEvaluatorResolver.java (working copy) @@ -23,22 +23,24 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** - * The UDF Method resolver interface. A user can plugin a resolver to their UDF by implementing the - * functions in this interface. Note that the resolver is stored in the UDF class as an instance - * variable. We did not use a static variable because many resolvers maintain the class of the - * enclosing UDF as state and are called from a base class e.g. UDFBaseCompare. This makes it very - * easy to write UDFs that want to do resolution similar to the comparison operators. Such UDFs - * just need to extend UDFBaseCompare and do not have to care about the UDFMethodResolver interface. - * Same is true for UDFs that want to do resolution similar to that done by the numeric operators. - * Such UDFs simply have to extend UDFBaseNumericOp class. For the default resolution the UDF - * implementation simply needs to extend the UDF class. + * The UDF Method resolver interface. A user can plugin a resolver to their UDF + * by implementing the functions in this interface. Note that the resolver is + * stored in the UDF class as an instance variable. We did not use a static + * variable because many resolvers maintain the class of the enclosing UDF as + * state and are called from a base class e.g. UDFBaseCompare. This makes it + * very easy to write UDFs that want to do resolution similar to the comparison + * operators. Such UDFs just need to extend UDFBaseCompare and do not have to + * care about the UDFMethodResolver interface. Same is true for UDFs that want + * to do resolution similar to that done by the numeric operators. Such UDFs + * simply have to extend UDFBaseNumericOp class. For the default resolution the + * UDF implementation simply needs to extend the UDF class. */ public interface UDAFEvaluatorResolver { - + /** * Gets the evaluator class corresponding to the passed parameter list. */ Class getEvaluatorClass(List argClasses) - throws AmbiguousMethodException; - + throws AmbiguousMethodException; + } \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeColumnEvaluator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeColumnEvaluator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeColumnEvaluator.java (working copy) @@ -18,13 +18,10 @@ package org.apache.hadoop.hive.ql.exec; -import java.util.Arrays; - import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** @@ -33,10 +30,10 @@ public class ExprNodeColumnEvaluator extends ExprNodeEvaluator { protected exprNodeColumnDesc expr; - + transient StructObjectInspector[] inspectors; transient StructField[] fields; - + public ExprNodeColumnEvaluator(exprNodeColumnDesc expr) { this.expr = expr; } @@ -45,27 +42,28 @@ public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException { - // We need to support field names like KEY.0, VALUE.1 between + // We need to support field names like KEY.0, VALUE.1 between // map-reduce boundary. String[] names = expr.getColumn().split("\\."); inspectors = new StructObjectInspector[names.length]; fields = new StructField[names.length]; - - for(int i=0; i extends Operator - implements Serializable { +public abstract class TerminalOperator extends + Operator implements Serializable { private static final long serialVersionUID = 1L; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/SkewJoinHandler.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/SkewJoinHandler.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SkewJoinHandler.java (working copy) @@ -66,40 +66,40 @@ * Right now, we use one file per skew key. * *

- * For more info, please see - * https://issues.apache.org/jira/browse/HIVE-964. + * For more info, please see https://issues.apache.org/jira/browse/HIVE-964. * */ public class SkewJoinHandler { - - static final protected Log LOG = LogFactory.getLog(SkewJoinHandler.class.getName()); - + + static final protected Log LOG = LogFactory.getLog(SkewJoinHandler.class + .getName()); + public int currBigKeyTag = -1; - + private int rowNumber = 0; private int currTag = -1; - + private int skewKeyDefinition = -1; - private Map skewKeysTableObjectInspector = null; - private Map tblSerializers = null; + private Map skewKeysTableObjectInspector = null; + private Map tblSerializers = null; private Map tblDesc = null; - + private Map bigKeysExistingMap = null; - + Configuration hconf = null; List dummyKey = null; String taskId; - - private CommonJoinOperator joinOp; - private int numAliases; - private joinDesc conf; - + + private final CommonJoinOperator joinOp; + private final int numAliases; + private final joinDesc conf; + public SkewJoinHandler(CommonJoinOperator joinOp) { this.joinOp = joinOp; - this.numAliases = joinOp.numAliases; - this.conf = joinOp.getConf(); + numAliases = joinOp.numAliases; + conf = joinOp.getConf(); } - + public void initiliaze(Configuration hconf) { this.hconf = hconf; joinDesc desc = joinOp.getConf(); @@ -114,7 +114,7 @@ for (int i = 0; i < numAliases; i++) { Byte alias = conf.getTagOrder()[i]; List skewTableKeyInspectors = new ArrayList(); - StructObjectInspector soi = (StructObjectInspector) this.joinOp.inputObjInspectors[alias]; + StructObjectInspector soi = (StructObjectInspector) joinOp.inputObjInspectors[alias]; StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY .toString()); List keyFields = ((StructObjectInspector) sf @@ -124,7 +124,8 @@ skewTableKeyInspectors.add(keyFields.get(k).getFieldObjectInspector()); } tableDesc joinKeyDesc = desc.getKeyTableDesc(); - List keyColNames = Utilities.getColumnNames(joinKeyDesc.getProperties()); + List keyColNames = Utilities.getColumnNames(joinKeyDesc + .getProperties()); StructObjectInspector structTblKeyInpector = ObjectInspectorFactory .getStandardStructObjectInspector(keyColNames, skewTableKeyInspectors); @@ -135,21 +136,23 @@ tblSerializers.put((byte) i, serializer); } catch (SerDeException e) { LOG.error("Skewjoin will be disabled due to " + e.getMessage(), e); - this.joinOp.handleSkewJoin = false; + joinOp.handleSkewJoin = false; break; } - tableDesc valTblDesc = this.joinOp.getSpillTableDesc(alias); + tableDesc valTblDesc = joinOp.getSpillTableDesc(alias); List valColNames = new ArrayList(); - if (valTblDesc != null) + if (valTblDesc != null) { valColNames = Utilities.getColumnNames(valTblDesc.getProperties()); + } StructObjectInspector structTblValInpector = ObjectInspectorFactory .getStandardStructObjectInspector(valColNames, - this.joinOp.joinValuesStandardObjectInspectors.get((byte) i)); + joinOp.joinValuesStandardObjectInspectors.get((byte) i)); StructObjectInspector structTblInpector = ObjectInspectorFactory - .getUnionStructObjectInspector(Arrays.asList(new StructObjectInspector[] { - structTblValInpector,structTblKeyInpector })); + .getUnionStructObjectInspector(Arrays + .asList(new StructObjectInspector[] { structTblValInpector, + structTblKeyInpector })); skewKeysTableObjectInspector.put((byte) i, structTblInpector); } @@ -165,50 +168,57 @@ } } } - + void endGroup() throws IOException, HiveException { - if(skewKeyInCurrentGroup) { - - String specPath = conf.getBigKeysDirMap().get((byte)currBigKeyTag); - RowContainer> bigKey = joinOp.storage.get(Byte.valueOf((byte)currBigKeyTag)); - Path outputPath = getOperatorOutputPath(specPath); + if (skewKeyInCurrentGroup) { + + String specPath = conf.getBigKeysDirMap().get((byte) currBigKeyTag); + RowContainer> bigKey = joinOp.storage.get(Byte + .valueOf((byte) currBigKeyTag)); + Path outputPath = getOperatorOutputPath(specPath); FileSystem destFs = outputPath.getFileSystem(hconf); bigKey.copyToDFSDirecory(destFs, outputPath); - + for (int i = 0; i < numAliases; i++) { - if (((byte)i) == currBigKeyTag) continue; - RowContainer> values = joinOp.storage.get(Byte.valueOf((byte)i)); - if(values != null) { - specPath = conf.getSmallKeysDirMap().get((byte)currBigKeyTag).get((byte)i); + if (((byte) i) == currBigKeyTag) { + continue; + } + RowContainer> values = joinOp.storage.get(Byte + .valueOf((byte) i)); + if (values != null) { + specPath = conf.getSmallKeysDirMap().get((byte) currBigKeyTag).get( + (byte) i); values.copyToDFSDirecory(destFs, getOperatorOutputPath(specPath)); } } } skewKeyInCurrentGroup = false; } - + boolean skewKeyInCurrentGroup = false; + public void handleSkew(int tag) throws HiveException { - if(joinOp.newGroupStarted || tag != currTag) { + if (joinOp.newGroupStarted || tag != currTag) { rowNumber = 0; currTag = tag; } - - if(joinOp.newGroupStarted) { + + if (joinOp.newGroupStarted) { currBigKeyTag = -1; joinOp.newGroupStarted = false; - dummyKey = (List)joinOp.getGroupKeyObject(); + dummyKey = (List) joinOp.getGroupKeyObject(); skewKeyInCurrentGroup = false; - + for (int i = 0; i < numAliases; i++) { - RowContainer> rc = joinOp.storage.get(Byte.valueOf((byte)i)); - if(rc != null) { + RowContainer> rc = joinOp.storage.get(Byte + .valueOf((byte) i)); + if (rc != null) { rc.setKeyObject(dummyKey); } } } - + rowNumber++; if (currBigKeyTag == -1 && (tag < numAliases - 1) && rowNumber >= skewKeyDefinition) { @@ -216,14 +226,15 @@ // table (the last table can always be streamed), we define that we get // a skew key now. currBigKeyTag = tag; - + // right now we assume that the group by is an ArrayList object. It may // change in future. - if(! (dummyKey instanceof List)) + if (!(dummyKey instanceof List)) { throw new RuntimeException("Bug in handle skew key in a seperate job."); - + } + skewKeyInCurrentGroup = true; - bigKeysExistingMap.put(Byte.valueOf((byte)currBigKeyTag), Boolean.TRUE); + bigKeysExistingMap.put(Byte.valueOf((byte) currBigKeyTag), Boolean.TRUE); } } @@ -240,8 +251,9 @@ // if we did not see a skew key in this table, continue to next // table - if (!bigKeysExistingMap.get((byte) bigKeyTbl)) + if (!bigKeysExistingMap.get((byte) bigKeyTbl)) { continue; + } try { String specPath = conf.getBigKeysDirMap().get((byte) bigKeyTbl); @@ -249,8 +261,9 @@ FileSystem fs = bigKeyPath.getFileSystem(hconf); delete(bigKeyPath, fs); for (int smallKeyTbl = 0; smallKeyTbl < numAliases; smallKeyTbl++) { - if (((byte) smallKeyTbl) == bigKeyTbl) + if (((byte) smallKeyTbl) == bigKeyTbl) { continue; + } specPath = conf.getSmallKeysDirMap().get((byte) bigKeyTbl).get( (byte) smallKeyTbl); delete(getOperatorOutputPath(specPath), fs); @@ -272,26 +285,30 @@ private void commit() throws IOException { for (int bigKeyTbl = 0; bigKeyTbl < numAliases; bigKeyTbl++) { - + // if we did not see a skew key in this table, continue to next table // we are trying to avoid an extra call of FileSystem.exists() - Boolean existing = bigKeysExistingMap.get(Byte.valueOf((byte)bigKeyTbl)); - if (existing == null || !existing) + Boolean existing = bigKeysExistingMap.get(Byte.valueOf((byte) bigKeyTbl)); + if (existing == null || !existing) { continue; - - String specPath = conf.getBigKeysDirMap().get(Byte.valueOf((byte) bigKeyTbl)); + } + + String specPath = conf.getBigKeysDirMap().get( + Byte.valueOf((byte) bigKeyTbl)); commitOutputPathToFinalPath(specPath, false); for (int smallKeyTbl = 0; smallKeyTbl < numAliases; smallKeyTbl++) { - if ( smallKeyTbl == bigKeyTbl) + if (smallKeyTbl == bigKeyTbl) { continue; - specPath = conf.getSmallKeysDirMap().get(Byte.valueOf((byte) bigKeyTbl)).get( - Byte.valueOf((byte) smallKeyTbl)); + } + specPath = conf.getSmallKeysDirMap() + .get(Byte.valueOf((byte) bigKeyTbl)).get( + Byte.valueOf((byte) smallKeyTbl)); // the file may not exist, and we just ignore this commitOutputPathToFinalPath(specPath, true); } } } - + private void commitOutputPathToFinalPath(String specPath, boolean ignoreNonExisting) throws IOException { Path outPath = getOperatorOutputPath(specPath); @@ -304,23 +321,25 @@ throw new IOException("Unable to rename output to: " + finalPath); } } catch (FileNotFoundException e) { - if (!ignoreNonExisting) + if (!ignoreNonExisting) { throw e; + } } catch (IOException e) { - if (!fs.exists(outPath) && ignoreNonExisting) + if (!fs.exists(outPath) && ignoreNonExisting) { return; + } throw e; } } - + private Path getOperatorOutputPath(String specPath) throws IOException { Path tmpPath = Utilities.toTempPath(specPath); return new Path(tmpPath, Utilities.toTempPath(taskId)); } - + private Path getOperatorFinalPath(String specPath) throws IOException { Path tmpPath = Utilities.toTempPath(specPath); return new Path(tmpPath, taskId); } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (working copy) @@ -34,7 +34,6 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.mapredWork; import org.apache.hadoop.hive.ql.plan.partitionDesc; -import org.apache.hadoop.hive.ql.plan.tableDesc; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -46,16 +45,19 @@ import org.apache.hadoop.io.Writable; /** - * Map operator. This triggers overall map side processing. - * This is a little different from regular operators in that - * it starts off by processing a Writable data structure from - * a Table (instead of a Hive Object). + * Map operator. This triggers overall map side processing. This is a little + * different from regular operators in that it starts off by processing a + * Writable data structure from a Table (instead of a Hive Object). **/ -public class MapOperator extends Operator implements Serializable { +public class MapOperator extends Operator implements Serializable { private static final long serialVersionUID = 1L; - public static enum Counter {DESERIALIZE_ERRORS} - transient private LongWritable deserialize_error_count = new LongWritable (); + + public static enum Counter { + DESERIALIZE_ERRORS + } + + transient private final LongWritable deserialize_error_count = new LongWritable(); transient private Deserializer deserializer; transient private Object[] rowWithPart; @@ -65,7 +67,7 @@ private Map, java.util.ArrayList> operatorToPaths; - private java.util.ArrayList childrenPaths = new ArrayList(); + private final java.util.ArrayList childrenPaths = new ArrayList(); private ArrayList> extraChildrenToClose = null; @@ -86,27 +88,31 @@ this.op = op; } + @Override public boolean equals(Object o) { if (o instanceof MapInputPath) { - MapInputPath mObj = (MapInputPath)o; - if (mObj == null) + MapInputPath mObj = (MapInputPath) o; + if (mObj == null) { return false; - return path.equals(mObj.path) && alias.equals(mObj.alias) && op.equals(mObj.op); + } + return path.equals(mObj.path) && alias.equals(mObj.alias) + && op.equals(mObj.op); } return false; } + @Override public int hashCode() { return (op == null) ? 0 : op.hashCode(); } } private static class MapOpCtx { - boolean isPartitioned; + boolean isPartitioned; StructObjectInspector rowObjectInspector; - Object[] rowWithPart; - Deserializer deserializer; + Object[] rowWithPart; + Deserializer deserializer; public String tableName; public String partName; @@ -116,7 +122,8 @@ * @param rowWithPart */ public MapOpCtx(boolean isPartitioned, - StructObjectInspector rowObjectInspector, Object[] rowWithPart, Deserializer deserializer) { + StructObjectInspector rowObjectInspector, Object[] rowWithPart, + Deserializer deserializer) { this.isPartitioned = isPartitioned; this.rowObjectInspector = rowObjectInspector; this.rowWithPart = rowWithPart; @@ -153,78 +160,89 @@ } /** - * Initializes this map op as the root of the tree. It sets JobConf & MapRedWork - * and starts initialization of the operator tree rooted at this op. + * Initializes this map op as the root of the tree. It sets JobConf & + * MapRedWork and starts initialization of the operator tree rooted at this + * op. + * * @param hconf * @param mrwork * @throws HiveException */ - public void initializeAsRoot(Configuration hconf, mapredWork mrwork) throws HiveException { + public void initializeAsRoot(Configuration hconf, mapredWork mrwork) + throws HiveException { setConf(mrwork); setChildren(hconf); initialize(hconf, null); } - private static MapOpCtx initObjectInspector(mapredWork conf, Configuration hconf, String onefile) - throws HiveException, ClassNotFoundException, InstantiationException, IllegalAccessException, SerDeException { + private static MapOpCtx initObjectInspector(mapredWork conf, + Configuration hconf, String onefile) throws HiveException, + ClassNotFoundException, InstantiationException, IllegalAccessException, + SerDeException { partitionDesc td = conf.getPathToPartitionInfo().get(onefile); LinkedHashMap partSpec = td.getPartSpec(); Properties tblProps = td.getProperties(); Class sdclass = td.getDeserializerClass(); - if(sdclass == null) { + if (sdclass == null) { String className = td.getSerdeClassName(); if ((className == "") || (className == null)) { - throw new HiveException("SerDe class or the SerDe class name is not set for table: " - + td.getProperties().getProperty("name")); + throw new HiveException( + "SerDe class or the SerDe class name is not set for table: " + + td.getProperties().getProperty("name")); } sdclass = hconf.getClassByName(className); } String tableName = String.valueOf(tblProps.getProperty("name")); String partName = String.valueOf(partSpec); - //HiveConf.setVar(hconf, HiveConf.ConfVars.HIVETABLENAME, tableName); - //HiveConf.setVar(hconf, HiveConf.ConfVars.HIVEPARTITIONNAME, partName); + // HiveConf.setVar(hconf, HiveConf.ConfVars.HIVETABLENAME, tableName); + // HiveConf.setVar(hconf, HiveConf.ConfVars.HIVEPARTITIONNAME, partName); Deserializer deserializer = (Deserializer) sdclass.newInstance(); deserializer.initialize(hconf, tblProps); - StructObjectInspector rowObjectInspector = (StructObjectInspector)deserializer.getObjectInspector(); + StructObjectInspector rowObjectInspector = (StructObjectInspector) deserializer + .getObjectInspector(); MapOpCtx opCtx = null; // Next check if this table has partitions and if so // get the list of partition names as well as allocate // the serdes for the partition columns - String pcols = tblProps.getProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS); - //Log LOG = LogFactory.getLog(MapOperator.class.getName()); + String pcols = tblProps + .getProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS); + // Log LOG = LogFactory.getLog(MapOperator.class.getName()); if (pcols != null && pcols.length() > 0) { String[] partKeys = pcols.trim().split("/"); List partNames = new ArrayList(partKeys.length); Object[] partValues = new Object[partKeys.length]; - List partObjectInspectors = new ArrayList(partKeys.length); - for(int i = 0; i < partKeys.length; i++ ) { + List partObjectInspectors = new ArrayList( + partKeys.length); + for (int i = 0; i < partKeys.length; i++) { String key = partKeys[i]; partNames.add(key); // Partitions do not exist for this table - if (partSpec == null) + if (partSpec == null) { partValues[i] = new Text(); - else + } else { partValues[i] = new Text(partSpec.get(key)); - partObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + } + partObjectInspectors + .add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); } StructObjectInspector partObjectInspector = ObjectInspectorFactory - .getStandardStructObjectInspector(partNames, partObjectInspectors); + .getStandardStructObjectInspector(partNames, partObjectInspectors); Object[] rowWithPart = new Object[2]; rowWithPart[1] = partValues; rowObjectInspector = ObjectInspectorFactory - .getUnionStructObjectInspector( - Arrays.asList(new StructObjectInspector[]{ - rowObjectInspector, - partObjectInspector})); - //LOG.info("dump " + tableName + " " + partName + " " + rowObjectInspector.getTypeName()); + .getUnionStructObjectInspector(Arrays + .asList(new StructObjectInspector[] { rowObjectInspector, + partObjectInspector })); + // LOG.info("dump " + tableName + " " + partName + " " + + // rowObjectInspector.getTypeName()); opCtx = new MapOpCtx(true, rowObjectInspector, rowWithPart, deserializer); - } - else { - //LOG.info("dump2 " + tableName + " " + partName + " " + rowObjectInspector.getTypeName()); + } else { + // LOG.info("dump2 " + tableName + " " + partName + " " + + // rowObjectInspector.getTypeName()); opCtx = new MapOpCtx(false, rowObjectInspector, null, deserializer); } opCtx.tableName = tableName; @@ -236,10 +254,9 @@ Path fpath = new Path((new Path(HiveConf.getVar(hconf, HiveConf.ConfVars.HADOOPMAPFILENAME))).toUri().getPath()); - ArrayList> children = - new ArrayList>(); + ArrayList> children = new ArrayList>(); opCtxMap = new HashMap(); - operatorToPaths = new HashMap, java.util.ArrayList> (); + operatorToPaths = new HashMap, java.util.ArrayList>(); statsMap.put(Counter.DESERIALIZE_ERRORS, deserialize_error_count); @@ -256,17 +273,21 @@ + fpath.toUri().getPath()); MapInputPath inp = new MapInputPath(onefile, onealias, op); opCtxMap.put(inp, opCtx); - if(operatorToPaths.get(op) == null) - operatorToPaths.put(op, new java.util.ArrayList()); + if (operatorToPaths.get(op) == null) { + operatorToPaths.put(op, new java.util.ArrayList()); + } operatorToPaths.get(op).add(onefile); - op.setParentOperators(new ArrayList>()); + op + .setParentOperators(new ArrayList>()); op.getParentOperators().add(this); - // check for the operators who will process rows coming to this Map Operator + // check for the operators who will process rows coming to this Map + // Operator if (!onepath.toUri().relativize(fpath.toUri()).equals(fpath.toUri())) { children.add(op); childrenPaths.add(onefile); - LOG.info("dump " + op.getName() + " " + opCtxMap.get(inp).getRowObjectInspector().getTypeName()); + LOG.info("dump " + op.getName() + " " + + opCtxMap.get(inp).getRowObjectInspector().getTypeName()); if (!done) { deserializer = opCtxMap.get(inp).getDeserializer(); isPartitioned = opCtxMap.get(inp).isPartitioned(); @@ -292,48 +313,55 @@ } } - + @Override public void initializeOp(Configuration hconf) throws HiveException { // set that parent initialization is done and call initialize on children state = State.INIT; List> children = getChildOperators(); for (Entry entry : opCtxMap.entrySet()) { - // Add alias, table name, and partitions to hadoop conf so that their children will + // Add alias, table name, and partitions to hadoop conf so that their + // children will // inherit these - HiveConf.setVar(hconf, HiveConf.ConfVars.HIVETABLENAME, entry.getValue().tableName); - HiveConf.setVar(hconf, HiveConf.ConfVars.HIVEPARTITIONNAME, entry.getValue().partName); - MapInputPath input = entry.getKey(); + HiveConf.setVar(hconf, HiveConf.ConfVars.HIVETABLENAME, + entry.getValue().tableName); + HiveConf.setVar(hconf, HiveConf.ConfVars.HIVEPARTITIONNAME, entry + .getValue().partName); + MapInputPath input = entry.getKey(); Operator op = input.op; - // op is not in the children list, so need to remember it and close it afterwards - if ( children.indexOf(op) == -1 ) { - if ( extraChildrenToClose == null ) { + // op is not in the children list, so need to remember it and close it + // afterwards + if (children.indexOf(op) == -1) { + if (extraChildrenToClose == null) { extraChildrenToClose = new ArrayList>(); } extraChildrenToClose.add(op); } - // multiple input paths may corresponding the same operator (tree). The - // below logic is to avoid initialize one operator multiple times if there - // is one input path in this mapper's input paths. + // multiple input paths may corresponding the same operator (tree). The + // below logic is to avoid initialize one operator multiple times if there + // is one input path in this mapper's input paths. boolean shouldInit = true; List paths = operatorToPaths.get(op); - for(String path: paths) { - if(childrenPaths.contains(path) && !path.equals(input.path)) { - shouldInit = false; - break; - } + for (String path : paths) { + if (childrenPaths.contains(path) && !path.equals(input.path)) { + shouldInit = false; + break; + } } - if(shouldInit) - op.initialize(hconf, new ObjectInspector[]{entry.getValue().getRowObjectInspector()}); + if (shouldInit) { + op.initialize(hconf, new ObjectInspector[] { entry.getValue() + .getRowObjectInspector() }); + } } } /** * close extra child operators that are initialized but are not executed. */ + @Override public void closeOp(boolean abort) throws HiveException { - if ( extraChildrenToClose != null ) { + if (extraChildrenToClose != null) { for (Operator op : extraChildrenToClose) { op.close(abort); } @@ -351,16 +379,17 @@ } } catch (SerDeException e) { // TODO: policy on deserialization errors - deserialize_error_count.set(deserialize_error_count.get()+1); - throw new HiveException (e); + deserialize_error_count.set(deserialize_error_count.get() + 1); + throw new HiveException(e); } } - public void processOp(Object row, int tag) - throws HiveException { + @Override + public void processOp(Object row, int tag) throws HiveException { throw new HiveException("Hive 2 Internal error: should not be called!"); } + @Override public String getName() { return "MAP"; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/JobTrackerURLResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/JobTrackerURLResolver.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JobTrackerURLResolver.java (working copy) @@ -34,10 +34,9 @@ InetSocketAddress infoSocAddr = NetUtils.createSocketAddr(infoAddr); int infoPort = infoSocAddr.getPort(); - String tracker = "http://" + - JobTracker.getAddress(conf).getHostName() + ":" + - infoPort; - + String tracker = "http://" + JobTracker.getAddress(conf).getHostName() + + ":" + infoPort; + return tracker; } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java (working copy) @@ -21,36 +21,38 @@ import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.exprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.exprNodeDesc; +import org.apache.hadoop.hive.ql.plan.exprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.exprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.exprNodeNullDesc; -import org.apache.hadoop.hive.ql.plan.exprNodeFieldDesc; public class ExprNodeEvaluatorFactory { - - public ExprNodeEvaluatorFactory() {} + public ExprNodeEvaluatorFactory() { + } + public static ExprNodeEvaluator get(exprNodeDesc desc) { // Constant node if (desc instanceof exprNodeConstantDesc) { - return new ExprNodeConstantEvaluator((exprNodeConstantDesc)desc); + return new ExprNodeConstantEvaluator((exprNodeConstantDesc) desc); } // Column-reference node, e.g. a column in the input row if (desc instanceof exprNodeColumnDesc) { - return new ExprNodeColumnEvaluator((exprNodeColumnDesc)desc); + return new ExprNodeColumnEvaluator((exprNodeColumnDesc) desc); } // Generic Function node, e.g. CASE, an operator or a UDF node if (desc instanceof exprNodeGenericFuncDesc) { - return new ExprNodeGenericFuncEvaluator((exprNodeGenericFuncDesc)desc); + return new ExprNodeGenericFuncEvaluator((exprNodeGenericFuncDesc) desc); } // Field node, e.g. get a.myfield1 from a if (desc instanceof exprNodeFieldDesc) { - return new ExprNodeFieldEvaluator((exprNodeFieldDesc)desc); + return new ExprNodeFieldEvaluator((exprNodeFieldDesc) desc); } - // Null node, a constant node with value NULL and no type information + // Null node, a constant node with value NULL and no type information if (desc instanceof exprNodeNullDesc) { - return new ExprNodeNullEvaluator((exprNodeNullDesc)desc); + return new ExprNodeNullEvaluator((exprNodeNullDesc) desc); } - throw new RuntimeException("Cannot find ExprNodeEvaluator for the exprNodeDesc = " + desc); + throw new RuntimeException( + "Cannot find ExprNodeEvaluator for the exprNodeDesc = " + desc); } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/UDFArgumentException.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/UDFArgumentException.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/UDFArgumentException.java (working copy) @@ -20,7 +20,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; -/** +/** * exception class, thrown when udf argument have something wrong. */ public class UDFArgumentException extends SemanticException { @@ -28,13 +28,13 @@ public UDFArgumentException() { super(); } - + public UDFArgumentException(String message) { super(message); } - + public UDFArgumentException(Throwable cause) { super(cause); } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java (working copy) @@ -18,42 +18,46 @@ package org.apache.hadoop.hive.ql.exec; -import java.util.*; -import java.io.*; +import java.io.Serializable; +import java.util.ArrayList; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.collectDesc; import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.conf.Configuration; /** * Buffers rows emitted by other operators **/ -public class CollectOperator extends Operator implements Serializable { +public class CollectOperator extends Operator implements + Serializable { private static final long serialVersionUID = 1L; transient protected ArrayList rowList; transient protected ObjectInspector standardRowInspector; transient int maxSize; + @Override protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); - rowList = new ArrayList (); + rowList = new ArrayList(); maxSize = conf.getBufferSize().intValue(); } boolean firstRow = true; - public void processOp(Object row, int tag) - throws HiveException { + + @Override + public void processOp(Object row, int tag) throws HiveException { ObjectInspector rowInspector = inputObjInspectors[tag]; if (firstRow) { firstRow = false; // Get the standard ObjectInspector of the row - this.standardRowInspector = ObjectInspectorUtils.getStandardObjectInspector(rowInspector); + standardRowInspector = ObjectInspectorUtils + .getStandardObjectInspector(rowInspector); } - + if (rowList.size() < maxSize) { // Create a standard copy of the object. Object o = ObjectInspectorUtils.copyToStandardObject(row, rowInspector); @@ -61,9 +65,9 @@ } forward(row, rowInspector); } - + public void retrieve(InspectableObject result) { - assert(result != null); + assert (result != null); if (rowList.isEmpty()) { result.o = null; result.oi = null; @@ -73,5 +77,4 @@ } } - } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java (working copy) @@ -25,37 +25,40 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; public class FunctionInfo { - - private boolean isNative; - private String displayName; + private final boolean isNative; + private final String displayName; + private GenericUDF genericUDF; private GenericUDTF genericUDTF; - + private GenericUDAFResolver genericUDAFResolver; - - public FunctionInfo(boolean isNative, String displayName, GenericUDF genericUDF) { + + public FunctionInfo(boolean isNative, String displayName, + GenericUDF genericUDF) { this.isNative = isNative; this.displayName = displayName; this.genericUDF = genericUDF; } - public FunctionInfo(boolean isNative, String displayName, GenericUDAFResolver genericUDAFResolver) { + public FunctionInfo(boolean isNative, String displayName, + GenericUDAFResolver genericUDAFResolver) { this.isNative = isNative; this.displayName = displayName; this.genericUDAFResolver = genericUDAFResolver; } - public FunctionInfo(boolean isNative, String displayName, GenericUDTF genericUDTF) { + public FunctionInfo(boolean isNative, String displayName, + GenericUDTF genericUDTF) { this.isNative = isNative; this.displayName = displayName; this.genericUDTF = genericUDTF; } - + /** - * Get a new GenericUDF object for the function. + * Get a new GenericUDF object for the function. */ public GenericUDF getGenericUDF() { // GenericUDF is stateful - we have to make a copy here @@ -64,9 +67,9 @@ } return FunctionRegistry.cloneGenericUDF(genericUDF); } - + /** - * Get a new GenericUDTF object for the function. + * Get a new GenericUDTF object for the function. */ public GenericUDTF getGenericUDTF() { // GenericUDTF is stateful too, copy @@ -75,27 +78,27 @@ } return FunctionRegistry.cloneGenericUDTF(genericUDTF); } - + /** - * Get the GenericUDAFResolver object for the function. + * Get the GenericUDAFResolver object for the function. */ public GenericUDAFResolver getGenericUDAFResolver() { return genericUDAFResolver; } - + /** * Get the Class of the UDF */ public Class getFunctionClass() { if (isGenericUDF()) { if (genericUDF instanceof GenericUDFBridge) { - return ((GenericUDFBridge)genericUDF).getUdfClass(); + return ((GenericUDFBridge) genericUDF).getUdfClass(); } else { return genericUDF.getClass(); } } else if (isGenericUDAF()) { if (genericUDAFResolver instanceof GenericUDAFBridge) { - return ((GenericUDAFBridge)genericUDAFResolver).getUDAFClass(); + return ((GenericUDAFBridge) genericUDAFResolver).getUDAFClass(); } else { return genericUDAFResolver.getClass(); } @@ -104,39 +107,39 @@ } return null; } - + /** - * Get the display name for this function. - * This should be transfered into exprNodeGenericUDFDesc, and will be - * used as the first parameter to GenericUDF.getDisplayName() call, instead - * of hard-coding the function name. This will solve the problem of - * displaying only one name when a udf is registered under 2 names. + * Get the display name for this function. This should be transfered into + * exprNodeGenericUDFDesc, and will be used as the first parameter to + * GenericUDF.getDisplayName() call, instead of hard-coding the function name. + * This will solve the problem of displaying only one name when a udf is + * registered under 2 names. */ public String getDisplayName() { return displayName; } - + /** * Native functions cannot be unregistered. */ public boolean isNative() { return isNative; } - + /** * @return TRUE if the function is a GenericUDF */ public boolean isGenericUDF() { return null != genericUDF; } - + /** * @return TRUE if the function is a GenericUDAF */ public boolean isGenericUDAF() { return null != genericUDAFResolver; } - + /** * @return TRUE if the function is a GenericUDTF */ Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (working copy) @@ -18,9 +18,16 @@ package org.apache.hadoop.hive.ql.exec; -import java.io.*; -import java.util.*; +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Vector; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.QueryPlan; @@ -31,15 +38,12 @@ import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.util.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - - /** * Task implementation **/ -public abstract class Task implements Serializable, Node { +public abstract class Task implements Serializable, + Node { private static final long serialVersionUID = 1L; transient protected boolean started; @@ -68,7 +72,8 @@ this.taskCounters = new HashMap(); } - public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) { + public void initialize(HiveConf conf, QueryPlan queryPlan, + DriverContext driverContext) { this.queryPlan = queryPlan; isdone = false; started = false; @@ -76,21 +81,22 @@ this.conf = conf; try { - db = Hive.get(conf); + db = Hive.get(conf); } catch (HiveException e) { // Bail out ungracefully - we should never hit // this here - but would have hit it in SemanticAnalyzer LOG.error(StringUtils.stringifyException(e)); - throw new RuntimeException (e); + throw new RuntimeException(e); } this.driverContext = driverContext; - + console = new LogHelper(LOG); } /** - * This method is called in the Driver on every task. It updates counters - * and calls execute(), which is overridden in each task + * This method is called in the Driver on every task. It updates counters and + * calls execute(), which is overridden in each task + * * @return return value of execute() */ public int executeTask() { @@ -112,32 +118,35 @@ } /** - * This method is overridden in each Task. - * TODO execute should return a TaskHandle. + * This method is overridden in each Task. TODO execute should return a + * TaskHandle. + * * @return status of executing the task */ protected abstract int execute(); - + /** - * Update the progress of the task within taskHandle and also - * dump the progress information to the history file - * @param taskHandle task handle returned by execute - * @throws IOException + * Update the progress of the task within taskHandle and also dump the + * progress information to the history file + * + * @param taskHandle + * task handle returned by execute + * @throws IOException */ public void progress(TaskHandle taskHandle) throws IOException { // do nothing by default } - + // dummy method - FetchTask overwrites this - public boolean fetch(Vector res) throws IOException { + public boolean fetch(Vector res) throws IOException { assert false; - return false; + return false; } public void setChildTasks(List> childTasks) { this.childTasks = childTasks; } - + public List getChildren() { return getChildTasks(); } @@ -155,7 +164,9 @@ } /** - * Add a dependent task on the current task. Return if the dependency already existed or is this a new one + * Add a dependent task on the current task. Return if the dependency already + * existed or is this a new one + * * @return true if the task got added false if it already existed */ public boolean addDependentTask(Task dependent) { @@ -178,13 +189,17 @@ /** * remove the dependent task - * @param dependent the task to remove + * + * @param dependent + * the task to remove */ public void removeDependentTask(Task dependent) { if ((getChildTasks() != null) && (getChildTasks().contains(dependent))) { getChildTasks().remove(dependent); - if ((dependent.getParentTasks() != null) && (dependent.getParentTasks().contains(this))) + if ((dependent.getParentTasks() != null) + && (dependent.getParentTasks().contains(this))) { dependent.getParentTasks().remove(this); + } } } @@ -223,7 +238,7 @@ public boolean isRunnable() { boolean isrunnable = true; if (parentTasks != null) { - for(Task parent: parentTasks) { + for (Task parent : parentTasks) { if (!parent.done()) { isrunnable = false; break; @@ -269,8 +284,8 @@ } /** - * Should be overridden to return the type of the specific task among - * the types in TaskType + * Should be overridden to return the type of the specific task among the + * types in TaskType * * @return TaskTypeType.* or -1 if not overridden */ Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DefaultUDFMethodResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DefaultUDFMethodResolver.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DefaultUDFMethodResolver.java (working copy) @@ -24,38 +24,41 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** - * The default UDF Method resolver. This resolver is used for resolving the UDF method that is to be - * used for evaluation given the list of the argument types. The getEvalMethod goes through all the - * evaluate methods and returns the one that matches the argument signature or is the closest match. - * Closest match is defined as the one that requires the least number of arguments to be converted. - * In case more than one matches are found, the method throws an ambiguous method exception. + * The default UDF Method resolver. This resolver is used for resolving the UDF + * method that is to be used for evaluation given the list of the argument + * types. The getEvalMethod goes through all the evaluate methods and returns + * the one that matches the argument signature or is the closest match. Closest + * match is defined as the one that requires the least number of arguments to be + * converted. In case more than one matches are found, the method throws an + * ambiguous method exception. */ public class DefaultUDFMethodResolver implements UDFMethodResolver { /** * The class of the UDF. */ - private Class udfClass; - + private final Class udfClass; + /** - * Constructor. - * This constructor sets the resolver to be used for comparison operators. - * See {@link UDFMethodResolver} + * Constructor. This constructor sets the resolver to be used for comparison + * operators. See {@link UDFMethodResolver} */ public DefaultUDFMethodResolver(Class udfClass) { this.udfClass = udfClass; } - + /** * Gets the evaluate method for the UDF given the parameter types. * - * @param argClasses The list of the argument types that need to matched with the evaluate - * function signature. + * @param argClasses + * The list of the argument types that need to matched with the + * evaluate function signature. */ @Override - public Method getEvalMethod(List argClasses) - throws AmbiguousMethodException { - Method m = FunctionRegistry.getMethodInternal(udfClass, "evaluate", false, argClasses); + public Method getEvalMethod(List argClasses) + throws AmbiguousMethodException { + Method m = FunctionRegistry.getMethodInternal(udfClass, "evaluate", false, + argClasses); if (m == null) { throw new AmbiguousMethodException(udfClass, argClasses); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluator.java (working copy) @@ -24,16 +24,16 @@ public abstract class ExprNodeEvaluator { /** - * Initialize should be called once and only once. - * Return the ObjectInspector for the return value, given the rowInspector. + * Initialize should be called once and only once. Return the ObjectInspector + * for the return value, given the rowInspector. */ - public abstract ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException; + public abstract ObjectInspector initialize(ObjectInspector rowInspector) + throws HiveException; /** - * Evaluate the expression given the row. - * This method should use the rowInspector passed in from initialize to - * inspect the row object. - * The return value will be inspected by the return value of initialize. + * Evaluate the expression given the row. This method should use the + * rowInspector passed in from initialize to inspect the row object. The + * return value will be inspected by the return value of initialize. */ public abstract Object evaluate(Object row) throws HiveException; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java (working copy) @@ -18,16 +18,15 @@ package org.apache.hadoop.hive.ql.exec; -import java.lang.Class; -import java.io.*; +import java.io.Serializable; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /** - * Implementation for ColumnInfo which contains the internal name for the - * column (the one that is used by the operator to access the column) and - * the type (identified by a java class). + * Implementation for ColumnInfo which contains the internal name for the column + * (the one that is used by the operator to access the column) and the type + * (identified by a java class). **/ public class ColumnInfo implements Serializable { @@ -35,40 +34,41 @@ private static final long serialVersionUID = 1L; private String internalName; - - private String alias = null; // [optional] alias of the column (external name as seen by the users) + private String alias = null; // [optional] alias of the column (external name + // as seen by the users) + /** * Store the alias of the table where available. */ private String tabAlias; - + /** * Indicates whether the column is a partition column. */ private boolean isPartitionCol; - + transient private TypeInfo type; public ColumnInfo() { } - public ColumnInfo(String internalName, TypeInfo type, - String tabAlias, boolean isPartitionCol) { + public ColumnInfo(String internalName, TypeInfo type, String tabAlias, + boolean isPartitionCol) { this.internalName = internalName; this.type = type; this.tabAlias = tabAlias; this.isPartitionCol = isPartitionCol; } - - public ColumnInfo(String internalName, Class type, - String tabAlias, boolean isPartitionCol) { + + public ColumnInfo(String internalName, Class type, String tabAlias, + boolean isPartitionCol) { this.internalName = internalName; this.type = TypeInfoFactory.getPrimitiveTypeInfoFromPrimitiveWritable(type); this.tabAlias = tabAlias; this.isPartitionCol = isPartitionCol; } - + public TypeInfo getType() { return type; } @@ -76,7 +76,7 @@ public String getInternalName() { return internalName; } - + public void setType(TypeInfo type) { this.type = type; } @@ -86,25 +86,27 @@ } public String getTabAlias() { - return this.tabAlias; + return tabAlias; } - + public boolean getIsPartitionCol() { - return this.isPartitionCol; + return isPartitionCol; } + /** * Returns the string representation of the ColumnInfo. */ + @Override public String toString() { return internalName + ": " + type; } - + public void setAlias(String col_alias) { alias = col_alias; } - + public String getAlias() { return alias; } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java (working copy) @@ -18,13 +18,9 @@ package org.apache.hadoop.hive.ql.exec; -import java.util.HashMap; -import java.util.List; import java.util.ArrayList; -import java.util.Map; +import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.lateralViewJoinDesc; @@ -33,39 +29,29 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; - /** * The lateral view join operator is used to implement the lateral view - * functionality. This operator was implemented with the following - * operator DAG in mind. For a query such as + * functionality. This operator was implemented with the following operator DAG + * in mind. For a query such as * - * SELECT pageid, adid.* FROM example_table LATERAL VIEW explode(adid_list) AS adid + * SELECT pageid, adid.* FROM example_table LATERAL VIEW explode(adid_list) AS + * adid * * The top of the operator tree will look similar to * - * [Table Scan] - * / \ - * [Select](*) [Select](adid_list) - * | | - * | [UDTF] (explode) - * \ / - * [Lateral View Join] - * | - * | - * [Select] (pageid, adid.*) - * | - * .... + * [Table Scan] / \ [Select](*) [Select](adid_list) | | | [UDTF] (explode) \ / + * [Lateral View Join] | | [Select] (pageid, adid.*) | .... * - * Rows from the table scan operator are first sent to two select operators. - * The select operator on the left picks all the columns while the select - * operator on the right picks only the columns needed by the UDTF. + * Rows from the table scan operator are first sent to two select operators. The + * select operator on the left picks all the columns while the select operator + * on the right picks only the columns needed by the UDTF. * - * The output of select in the left branch and output of the UDTF in the right + * The output of select in the left branch and output of the UDTF in the right * branch are then sent to the lateral view join (LVJ). In most cases, the UDTF * will generate > 1 row for every row received from the TS, while the left - * select operator will generate only one. For each row output from the TS, - * the LVJ outputs all possible rows that can be created by joining the row from - * the left select and one of the rows output from the UDTF. + * select operator will generate only one. For each row output from the TS, the + * LVJ outputs all possible rows that can be created by joining the row from the + * left select and one of the rows output from the UDTF. * * Additional lateral views can be supported by adding a similar DAG after the * previous LVJ operator. @@ -78,41 +64,41 @@ // The expected tags from the parent operators. See processOp() before // changing the tags. static final int SELECT_TAG = 0; - static final int UDTF_TAG = 1; - + static final int UDTF_TAG = 1; + @Override protected void initializeOp(Configuration hconf) throws HiveException { - + ArrayList ois = new ArrayList(); ArrayList fieldNames = conf.getOutputInternalColNames(); // The output of the lateral view join will be the columns from the select // parent, followed by the column from the UDTF parent - StructObjectInspector soi = - (StructObjectInspector) inputObjInspectors[SELECT_TAG]; + StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[SELECT_TAG]; List sfs = soi.getAllStructFieldRefs(); for (StructField sf : sfs) { ois.add(sf.getFieldObjectInspector()); } - + soi = (StructObjectInspector) inputObjInspectors[UDTF_TAG]; sfs = soi.getAllStructFieldRefs(); for (StructField sf : sfs) { ois.add(sf.getFieldObjectInspector()); } - outputObjInspector = - ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, ois); + outputObjInspector = ObjectInspectorFactory + .getStandardStructObjectInspector(fieldNames, ois); // Initialize the rest of the operator DAG super.initializeOp(hconf); } - + // acc is short for accumulator. It's used to build the row before forwarding ArrayList acc = new ArrayList(); // selectObjs hold the row from the select op, until receiving a row from // the udtf op ArrayList selectObjs = new ArrayList(); + /** * An important assumption for processOp() is that for a given row from the * TS, the LVJ will first get the row from the left select operator, followed @@ -120,7 +106,7 @@ */ @Override public void processOp(Object row, int tag) throws HiveException { - StructObjectInspector soi = (StructObjectInspector)inputObjInspectors[tag]; + StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag]; if (tag == SELECT_TAG) { selectObjs.clear(); selectObjs.addAll(soi.getStructFieldsDataAsList(row)); @@ -132,7 +118,7 @@ } else { throw new HiveException("Invalid tag"); } - + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/AmbiguousMethodException.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/AmbiguousMethodException.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/AmbiguousMethodException.java (working copy) @@ -23,8 +23,9 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** - * Exception thrown by the UDF and UDAF method resolvers in case a unique method is not found. - * + * Exception thrown by the UDF and UDAF method resolvers in case a unique method + * is not found. + * */ public class AmbiguousMethodException extends UDFArgumentException { @@ -37,28 +38,31 @@ * The UDF or UDAF class that has the ambiguity. */ Class funcClass; - + /** * The list of parameter types. */ List argTypeInfos; - + /** * Constructor. * - * @param funcClass The UDF or UDAF class. - * @param argTypeInfos The list of argument types that lead to an ambiguity. + * @param funcClass + * The UDF or UDAF class. + * @param argTypeInfos + * The list of argument types that lead to an ambiguity. */ - public AmbiguousMethodException(Class funcClass, List argTypeInfos) { + public AmbiguousMethodException(Class funcClass, + List argTypeInfos) { super("Ambiguous method for " + funcClass + " with " + argTypeInfos); this.funcClass = funcClass; this.argTypeInfos = argTypeInfos; } - + Class getFunctionClass() { return funcClass; } - + List getArgTypeList() { return argTypeInfos; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -18,9 +18,6 @@ package org.apache.hadoop.hive.ql.exec; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Arrays; @@ -34,6 +31,8 @@ import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.exprNodeDesc; @@ -129,10 +128,10 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStdSample; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVariance; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVarianceSample; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; @@ -153,8 +152,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSize; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSplit; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFExplode; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -167,7 +166,8 @@ public class FunctionRegistry { - private static Log LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.exec.FunctionRegistry"); + private static Log LOG = LogFactory + .getLog("org.apache.hadoop.hive.ql.exec.FunctionRegistry"); /** * The mapping from expression function names to expression classes. @@ -182,7 +182,7 @@ registerUDF("ascii", UDFAscii.class, false); registerUDF("lpad", UDFLpad.class, false); registerUDF("rpad", UDFRpad.class, false); - + registerGenericUDF("size", GenericUDFSize.class); registerUDF("round", UDFRound.class, false); @@ -196,10 +196,10 @@ registerUDF("ln", UDFLn.class, false); registerUDF("log2", UDFLog2.class, false); - registerUDF("sin",UDFSin.class, false); - registerUDF("asin",UDFAsin.class, false); - registerUDF("cos",UDFCos.class, false); - registerUDF("acos",UDFAcos.class, false); + registerUDF("sin", UDFSin.class, false); + registerUDF("asin", UDFAsin.class, false); + registerUDF("cos", UDFCos.class, false); + registerUDF("acos", UDFAcos.class, false); registerUDF("log10", UDFLog10.class, false); registerUDF("log", UDFLog.class, false); registerUDF("exp", UDFExp.class, false); @@ -210,7 +210,7 @@ registerUDF("bin", UDFBin.class, false); registerUDF("hex", UDFHex.class, false); registerUDF("unhex", UDFUnhex.class, false); - + registerUDF("upper", UDFUpper.class, false); registerUDF("lower", UDFLower.class, false); registerUDF("ucase", UDFUpper.class, false); @@ -222,7 +222,7 @@ registerUDF("reverse", UDFReverse.class, false); registerGenericUDF("field", GenericUDFField.class); registerUDF("find_in_set", UDFFindInSet.class, false); - + registerUDF("like", UDFLike.class, true); registerUDF("rlike", UDFRegExp.class, true); registerUDF("regexp", UDFRegExp.class, true); @@ -245,7 +245,7 @@ registerUDF("unix_timestamp", UDFUnixTimeStamp.class, false); registerUDF("to_date", UDFDate.class, false); registerUDF("weekofyear", UDFWeekOfYear.class, false); - + registerUDF("date_add", UDFDateAdd.class, false); registerUDF("date_sub", UDFDateSub.class, false); registerUDF("datediff", UDFDateDiff.class, false); @@ -282,31 +282,31 @@ registerGenericUDF("isnotnull", GenericUDFOPNotNull.class); registerGenericUDF("if", GenericUDFIf.class); - + // Aliases for Java Class Names // These are used in getImplicitConvertUDFMethod registerUDF(Constants.BOOLEAN_TYPE_NAME, UDFToBoolean.class, false, - UDFToBoolean.class.getSimpleName()); + UDFToBoolean.class.getSimpleName()); registerUDF(Constants.TINYINT_TYPE_NAME, UDFToByte.class, false, - UDFToByte.class.getSimpleName()); + UDFToByte.class.getSimpleName()); registerUDF(Constants.SMALLINT_TYPE_NAME, UDFToShort.class, false, - UDFToShort.class.getSimpleName()); + UDFToShort.class.getSimpleName()); registerUDF(Constants.INT_TYPE_NAME, UDFToInteger.class, false, - UDFToInteger.class.getSimpleName()); + UDFToInteger.class.getSimpleName()); registerUDF(Constants.BIGINT_TYPE_NAME, UDFToLong.class, false, - UDFToLong.class.getSimpleName()); + UDFToLong.class.getSimpleName()); registerUDF(Constants.FLOAT_TYPE_NAME, UDFToFloat.class, false, - UDFToFloat.class.getSimpleName()); + UDFToFloat.class.getSimpleName()); registerUDF(Constants.DOUBLE_TYPE_NAME, UDFToDouble.class, false, - UDFToDouble.class.getSimpleName()); + UDFToDouble.class.getSimpleName()); registerUDF(Constants.STRING_TYPE_NAME, UDFToString.class, false, - UDFToString.class.getSimpleName()); + UDFToString.class.getSimpleName()); // Aggregate functions registerGenericUDAF("sum", new GenericUDAFSum()); registerGenericUDAF("count", new GenericUDAFCount()); registerGenericUDAF("avg", new GenericUDAFAverage()); - + registerGenericUDAF("std", new GenericUDAFStd()); registerGenericUDAF("stddev", new GenericUDAFStd()); registerGenericUDAF("stddev_pop", new GenericUDAFStd()); @@ -314,10 +314,10 @@ registerGenericUDAF("variance", new GenericUDAFVariance()); registerGenericUDAF("var_pop", new GenericUDAFVariance()); registerGenericUDAF("var_samp", new GenericUDAFVarianceSample()); - + registerUDAF("max", UDAFMax.class); registerUDAF("min", UDAFMin.class); - + // Generic UDFs registerGenericUDF("array", GenericUDFArray.class); registerGenericUDF("map", GenericUDFMap.class); @@ -331,98 +331,110 @@ registerGenericUDF("locate", GenericUDFLocate.class); registerGenericUDF("elt", GenericUDFElt.class); registerGenericUDF("concat_ws", GenericUDFConcatWS.class); - + // Generic UDTF's registerGenericUDTF("explode", GenericUDTFExplode.class); } - public static void registerTemporaryUDF(String functionName, Class UDFClass, - boolean isOperator) { + public static void registerTemporaryUDF(String functionName, + Class UDFClass, boolean isOperator) { registerUDF(false, functionName, UDFClass, isOperator); } static void registerUDF(String functionName, Class UDFClass, - boolean isOperator) { + boolean isOperator) { registerUDF(true, functionName, UDFClass, isOperator); } - public static void registerUDF(boolean isNative, String functionName, Class UDFClass, - boolean isOperator) { - registerUDF(isNative, functionName, UDFClass, isOperator, functionName.toLowerCase()); + public static void registerUDF(boolean isNative, String functionName, + Class UDFClass, boolean isOperator) { + registerUDF(isNative, functionName, UDFClass, isOperator, functionName + .toLowerCase()); } - public static void registerUDF(String functionName, Class UDFClass, - boolean isOperator, String displayName) { + public static void registerUDF(String functionName, + Class UDFClass, boolean isOperator, String displayName) { registerUDF(true, functionName, UDFClass, isOperator, displayName); } - - public static void registerUDF(boolean isNative, String functionName, Class UDFClass, - boolean isOperator, String displayName) { + + public static void registerUDF(boolean isNative, String functionName, + Class UDFClass, boolean isOperator, String displayName) { if (UDF.class.isAssignableFrom(UDFClass)) { - FunctionInfo fI = new FunctionInfo(isNative, displayName, + FunctionInfo fI = new FunctionInfo(isNative, displayName, new GenericUDFBridge(displayName, isOperator, UDFClass)); mFunctions.put(functionName.toLowerCase(), fI); } else { - throw new RuntimeException("Registering UDF Class " + UDFClass + " which does not extend " + UDF.class); + throw new RuntimeException("Registering UDF Class " + UDFClass + + " which does not extend " + UDF.class); } } - public static void registerTemporaryGenericUDF(String functionName, Class genericUDFClass) { + public static void registerTemporaryGenericUDF(String functionName, + Class genericUDFClass) { registerGenericUDF(false, functionName, genericUDFClass); } - static void registerGenericUDF(String functionName, Class genericUDFClass) { + static void registerGenericUDF(String functionName, + Class genericUDFClass) { registerGenericUDF(true, functionName, genericUDFClass); } - public static void registerGenericUDF(boolean isNative, String functionName, Class genericUDFClass) { + public static void registerGenericUDF(boolean isNative, String functionName, + Class genericUDFClass) { if (GenericUDF.class.isAssignableFrom(genericUDFClass)) { - FunctionInfo fI = new FunctionInfo(isNative, functionName, - (GenericUDF)ReflectionUtils.newInstance(genericUDFClass, null)); + FunctionInfo fI = new FunctionInfo(isNative, functionName, + (GenericUDF) ReflectionUtils.newInstance(genericUDFClass, null)); mFunctions.put(functionName.toLowerCase(), fI); } else { - throw new RuntimeException("Registering GenericUDF Class " + genericUDFClass - + " which does not extend " + GenericUDF.class); + throw new RuntimeException("Registering GenericUDF Class " + + genericUDFClass + " which does not extend " + GenericUDF.class); } } - public static void registerTemporaryGenericUDTF(String functionName, Class genericUDTFClass) { + public static void registerTemporaryGenericUDTF(String functionName, + Class genericUDTFClass) { registerGenericUDTF(false, functionName, genericUDTFClass); } - static void registerGenericUDTF(String functionName, Class genericUDTFClass) { + + static void registerGenericUDTF(String functionName, + Class genericUDTFClass) { registerGenericUDTF(true, functionName, genericUDTFClass); } - public static void registerGenericUDTF(boolean isNative, String functionName, Class genericUDTFClass) { + public static void registerGenericUDTF(boolean isNative, String functionName, + Class genericUDTFClass) { if (GenericUDTF.class.isAssignableFrom(genericUDTFClass)) { - FunctionInfo fI = new FunctionInfo(isNative, functionName, - (GenericUDTF)ReflectionUtils.newInstance(genericUDTFClass, null)); + FunctionInfo fI = new FunctionInfo(isNative, functionName, + (GenericUDTF) ReflectionUtils.newInstance(genericUDTFClass, null)); mFunctions.put(functionName.toLowerCase(), fI); } else { - throw new RuntimeException("Registering GenericUDTF Class " + genericUDTFClass - + " which does not extend " + GenericUDTF.class); + throw new RuntimeException("Registering GenericUDTF Class " + + genericUDTFClass + " which does not extend " + GenericUDTF.class); } } - + public static FunctionInfo getFunctionInfo(String functionName) { return mFunctions.get(functionName.toLowerCase()); } /** - * Returns a set of registered function names. - * This is used for the CLI command "SHOW FUNCTIONS;" - * @return set of strings contains function names + * Returns a set of registered function names. This is used for the CLI + * command "SHOW FUNCTIONS;" + * + * @return set of strings contains function names */ public static Set getFunctionNames() { return mFunctions.keySet(); } /** - * Returns a set of registered function names. - * This is used for the CLI command "SHOW FUNCTIONS 'regular expression';" - * Returns an empty set when the regular expression is not valid. - * @param funcPatternStr regular expression of the interested function names - * @return set of strings contains function names + * Returns a set of registered function names. This is used for the CLI + * command "SHOW FUNCTIONS 'regular expression';" Returns an empty set when + * the regular expression is not valid. + * + * @param funcPatternStr + * regular expression of the interested function names + * @return set of strings contains function names */ public static Set getFunctionNames(String funcPatternStr) { Set funcNames = new TreeSet(); @@ -442,17 +454,19 @@ /** * Returns the set of synonyms of the supplied function. - * @param funcName the name of the function + * + * @param funcName + * the name of the function * @return Set of synonyms for funcName */ public static Set getFunctionSynonyms(String funcName) { Set synonyms = new HashSet(); - + FunctionInfo funcInfo = getFunctionInfo(funcName); if (null == funcInfo) { return synonyms; } - + Class funcClass = funcInfo.getFunctionClass(); for (String name : mFunctions.keySet()) { if (name.equals(funcName)) { @@ -462,18 +476,19 @@ synonyms.add(name); } } - + return synonyms; } - static Map numericTypes = new HashMap(); static List numericTypeList = new ArrayList(); + static void registerNumericType(String typeName, int level) { TypeInfo t = TypeInfoFactory.getPrimitiveTypeInfo(typeName); numericTypeList.add(t); - numericTypes.put(t, level); + numericTypes.put(t, level); } + static { registerNumericType(Constants.TINYINT_TYPE_NAME, 1); registerNumericType(Constants.SMALLINT_TYPE_NAME, 2); @@ -485,21 +500,23 @@ } /** - * Find a common class that objects of both TypeInfo a and TypeInfo b can convert to. - * This is used for comparing objects of type a and type b. + * Find a common class that objects of both TypeInfo a and TypeInfo b can + * convert to. This is used for comparing objects of type a and type b. * - * When we are comparing string and double, we will always convert both of them - * to double and then compare. + * When we are comparing string and double, we will always convert both of + * them to double and then compare. * * @return null if no common class could be found. */ public static TypeInfo getCommonClassForComparison(TypeInfo a, TypeInfo b) { // If same return one of them - if (a.equals(b)) return a; - - for (TypeInfo t: numericTypeList) { - if (FunctionRegistry.implicitConvertable(a, t) && - FunctionRegistry.implicitConvertable(b, t)) { + if (a.equals(b)) { + return a; + } + + for (TypeInfo t : numericTypeList) { + if (FunctionRegistry.implicitConvertable(a, t) + && FunctionRegistry.implicitConvertable(b, t)) { return t; } } @@ -507,8 +524,8 @@ } /** - * Find a common class that objects of both TypeInfo a and TypeInfo b can convert to. - * This is used for places other than comparison. + * Find a common class that objects of both TypeInfo a and TypeInfo b can + * convert to. This is used for places other than comparison. * * The common class of string and double is string. * @@ -524,7 +541,9 @@ return (ai > bi) ? a : b; } - /** Returns whether it is possible to implicitly convert an object of Class from to Class to. + /** + * Returns whether it is possible to implicitly convert an object of Class + * from to Class to. */ public static boolean implicitConvertable(TypeInfo from, TypeInfo to) { if (from.equals(to)) { @@ -540,42 +559,52 @@ return true; } - // Allow implicit conversion from Byte -> Integer -> Long -> Float -> Double -> String + // Allow implicit conversion from Byte -> Integer -> Long -> Float -> Double + // -> String Integer f = numericTypes.get(from); Integer t = numericTypes.get(to); - if (f == null || t == null) return false; - if (f.intValue() > t.intValue()) return false; + if (f == null || t == null) { + return false; + } + if (f.intValue() > t.intValue()) { + return false; + } return true; } /** * Get the GenericUDAF evaluator for the name and argumentClasses. - * @param name the name of the UDAF + * + * @param name + * the name of the UDAF * @param argumentTypeInfos * @return The UDAF evaluator */ - public static GenericUDAFEvaluator getGenericUDAFEvaluator(String name, List argumentTypeInfos) - throws SemanticException { + public static GenericUDAFEvaluator getGenericUDAFEvaluator(String name, + List argumentTypeInfos) throws SemanticException { GenericUDAFResolver udaf = getGenericUDAFResolver(name); - if (udaf == null) return null; + if (udaf == null) { + return null; + } TypeInfo[] parameters = new TypeInfo[argumentTypeInfos.size()]; - for(int i=0; i Method getMethodInternal(Class udfClass, String methodName, boolean exact, - List argumentClasses) { + public static Method getMethodInternal(Class udfClass, + String methodName, boolean exact, List argumentClasses) { List mlist = new ArrayList(); - for(Method m: Arrays.asList(udfClass.getMethods())) { + for (Method m : Arrays.asList(udfClass.getMethods())) { if (m.getName().equals(methodName)) { mlist.add(m); } @@ -584,20 +613,24 @@ return getMethodInternal(mlist, exact, argumentClasses); } - public static void registerTemporaryGenericUDAF(String functionName, GenericUDAFResolver genericUDAFResolver) { + public static void registerTemporaryGenericUDAF(String functionName, + GenericUDAFResolver genericUDAFResolver) { registerGenericUDAF(false, functionName, genericUDAFResolver); } - static void registerGenericUDAF(String functionName, GenericUDAFResolver genericUDAFResolver) { + static void registerGenericUDAF(String functionName, + GenericUDAFResolver genericUDAFResolver) { registerGenericUDAF(true, functionName, genericUDAFResolver); } - public static void registerGenericUDAF(boolean isNative, String functionName, GenericUDAFResolver genericUDAFResolver) { - mFunctions.put(functionName.toLowerCase(), - new FunctionInfo(isNative, functionName.toLowerCase(), genericUDAFResolver)); + public static void registerGenericUDAF(boolean isNative, String functionName, + GenericUDAFResolver genericUDAFResolver) { + mFunctions.put(functionName.toLowerCase(), new FunctionInfo(isNative, + functionName.toLowerCase(), genericUDAFResolver)); } - public static void registerTemporaryUDAF(String functionName, Class udafClass) { + public static void registerTemporaryUDAF(String functionName, + Class udafClass) { registerUDAF(false, functionName, udafClass); } @@ -605,20 +638,23 @@ registerUDAF(true, functionName, udafClass); } - public static void registerUDAF(boolean isNative, String functionName, Class udafClass) { - mFunctions.put(functionName.toLowerCase(), - new FunctionInfo(isNative, functionName.toLowerCase(), - new GenericUDAFBridge((UDAF)ReflectionUtils.newInstance(udafClass, null)))); + public static void registerUDAF(boolean isNative, String functionName, + Class udafClass) { + mFunctions.put(functionName.toLowerCase(), new FunctionInfo(isNative, + functionName.toLowerCase(), new GenericUDAFBridge( + (UDAF) ReflectionUtils.newInstance(udafClass, null)))); } - public static void unregisterTemporaryUDF(String functionName) throws HiveException { + public static void unregisterTemporaryUDF(String functionName) + throws HiveException { FunctionInfo fi = mFunctions.get(functionName.toLowerCase()); - if(fi != null) { - if(!fi.isNative()) + if (fi != null) { + if (!fi.isNative()) { mFunctions.remove(functionName.toLowerCase()); - else - throw new HiveException("Function " + functionName + } else { + throw new HiveException("Function " + functionName + " is hive native, it can't be dropped"); + } } } @@ -632,44 +668,47 @@ return result; } - public static Object invoke(Method m, Object thisObject, Object... arguments) throws HiveException { + public static Object invoke(Method m, Object thisObject, Object... arguments) + throws HiveException { Object o; try { o = m.invoke(thisObject, arguments); } catch (Exception e) { - String thisObjectString = "" + thisObject + " of class " + - (thisObject == null? "null" : thisObject.getClass().getName()); + String thisObjectString = "" + thisObject + " of class " + + (thisObject == null ? "null" : thisObject.getClass().getName()); StringBuilder argumentString = new StringBuilder(); if (arguments == null) { argumentString.append("null"); } else { argumentString.append("{"); - for (int i=0; i0) { + for (int i = 0; i < arguments.length; i++) { + if (i > 0) { argumentString.append(", "); } if (arguments[i] == null) { argumentString.append("null"); } else { - argumentString.append("" + arguments[i] + ":" + arguments[i].getClass().getName()); + argumentString.append("" + arguments[i] + ":" + + arguments[i].getClass().getName()); } } argumentString.append("} of size " + arguments.length); } throw new HiveException("Unable to execute method " + m + " " - + " on object " + thisObjectString - + " with arguments " + argumentString.toString(), e); + + " on object " + thisObjectString + " with arguments " + + argumentString.toString(), e); } return o; } /** - * Returns -1 if passed does not match accepted. - * Otherwise return the cost (usually 0 for no conversion and 1 for conversion). + * Returns -1 if passed does not match accepted. Otherwise return the cost + * (usually 0 for no conversion and 1 for conversion). */ - public static int matchCost(TypeInfo argumentPassed, TypeInfo argumentAccepted, boolean exact) { + public static int matchCost(TypeInfo argumentPassed, + TypeInfo argumentAccepted, boolean exact) { if (argumentAccepted.equals(argumentPassed)) { // matches return 0; @@ -678,26 +717,34 @@ // passing null matches everything return 0; } - if (argumentPassed.getCategory().equals(Category.LIST) + if (argumentPassed.getCategory().equals(Category.LIST) && argumentAccepted.getCategory().equals(Category.LIST)) { // lists are compatible if and only-if the elements are compatible - TypeInfo argumentPassedElement = ((ListTypeInfo)argumentPassed).getListElementTypeInfo(); - TypeInfo argumentAcceptedElement = ((ListTypeInfo)argumentAccepted).getListElementTypeInfo(); + TypeInfo argumentPassedElement = ((ListTypeInfo) argumentPassed) + .getListElementTypeInfo(); + TypeInfo argumentAcceptedElement = ((ListTypeInfo) argumentAccepted) + .getListElementTypeInfo(); return matchCost(argumentPassedElement, argumentAcceptedElement, exact); } - if (argumentPassed.getCategory().equals(Category.MAP) + if (argumentPassed.getCategory().equals(Category.MAP) && argumentAccepted.getCategory().equals(Category.MAP)) { // lists are compatible if and only-if the elements are compatible - TypeInfo argumentPassedKey = ((MapTypeInfo)argumentPassed).getMapKeyTypeInfo(); - TypeInfo argumentAcceptedKey = ((MapTypeInfo)argumentAccepted).getMapKeyTypeInfo(); - TypeInfo argumentPassedValue = ((MapTypeInfo)argumentPassed).getMapValueTypeInfo(); - TypeInfo argumentAcceptedValue = ((MapTypeInfo)argumentAccepted).getMapValueTypeInfo(); + TypeInfo argumentPassedKey = ((MapTypeInfo) argumentPassed) + .getMapKeyTypeInfo(); + TypeInfo argumentAcceptedKey = ((MapTypeInfo) argumentAccepted) + .getMapKeyTypeInfo(); + TypeInfo argumentPassedValue = ((MapTypeInfo) argumentPassed) + .getMapValueTypeInfo(); + TypeInfo argumentAcceptedValue = ((MapTypeInfo) argumentAccepted) + .getMapValueTypeInfo(); int cost1 = matchCost(argumentPassedKey, argumentAcceptedKey, exact); int cost2 = matchCost(argumentPassedValue, argumentAcceptedValue, exact); - if (cost1 < 0 || cost2 < 0) return -1; + if (cost1 < 0 || cost2 < 0) { + return -1; + } return Math.max(cost1, cost2); } - + if (argumentAccepted.equals(TypeInfoFactory.unknownTypeInfo)) { // accepting Object means accepting everything, // but there is a conversion cost. @@ -706,16 +753,20 @@ if (!exact && implicitConvertable(argumentPassed, argumentAccepted)) { return 1; } - + return -1; } - + /** - * Gets the closest matching method corresponding to the argument list from a list of methods. - * - * @param mlist The list of methods to inspect. - * @param exact Boolean to indicate whether this is an exact match or not. - * @param argumentsPassed The classes for the argument. + * Gets the closest matching method corresponding to the argument list from a + * list of methods. + * + * @param mlist + * The list of methods to inspect. + * @param exact + * Boolean to indicate whether this is an exact match or not. + * @param argumentsPassed + * The classes for the argument. * @return The matching method. */ public static Method getMethodInternal(List mlist, boolean exact, @@ -723,40 +774,45 @@ int leastConversionCost = Integer.MAX_VALUE; Method udfMethod = null; - for (Method m: mlist) { + for (Method m : mlist) { List argumentsAccepted = TypeInfoUtils.getParameterTypeInfos(m, argumentsPassed.size()); if (argumentsAccepted == null) { // null means the method does not accept number of arguments passed. continue; } - + boolean match = (argumentsAccepted.size() == argumentsPassed.size()); int conversionCost = 0; - for(int i=0; i getUDFClassFromExprDesc(exprNodeDesc desc) { if (!(desc instanceof exprNodeGenericFuncDesc)) { return null; } - exprNodeGenericFuncDesc genericFuncDesc = (exprNodeGenericFuncDesc)desc; + exprNodeGenericFuncDesc genericFuncDesc = (exprNodeGenericFuncDesc) desc; if (!(genericFuncDesc.getGenericUDF() instanceof GenericUDFBridge)) { return null; } - GenericUDFBridge bridge = (GenericUDFBridge)(genericFuncDesc.getGenericUDF()); + GenericUDFBridge bridge = (GenericUDFBridge) (genericFuncDesc + .getGenericUDF()); return bridge.getUdfClass(); } - + /** * Returns whether a GenericUDF is deterministic or not. */ @@ -830,42 +890,41 @@ if (genericUDFType != null && genericUDFType.deterministic() == false) { return false; } - + if (genericUDF instanceof GenericUDFBridge) { - GenericUDFBridge bridge = (GenericUDFBridge)(genericUDF); + GenericUDFBridge bridge = (GenericUDFBridge) (genericUDF); UDFType bridgeUDFType = bridge.getUdfClass().getAnnotation(UDFType.class); if (bridgeUDFType != null && bridgeUDFType.deterministic() == false) { return false; } } - + return true; } - + /** - * Returns whether the exprNodeDesc is a node of "and", "or", "not". + * Returns whether the exprNodeDesc is a node of "and", "or", "not". */ public static boolean isOpAndOrNot(exprNodeDesc desc) { Class udfClass = getUDFClassFromExprDesc(desc); - return UDFOPAnd.class == udfClass - || UDFOPOr.class == udfClass + return UDFOPAnd.class == udfClass || UDFOPOr.class == udfClass || UDFOPNot.class == udfClass; } - + /** - * Returns whether the exprNodeDesc is a node of "and". + * Returns whether the exprNodeDesc is a node of "and". */ public static boolean isOpAnd(exprNodeDesc desc) { Class udfClass = getUDFClassFromExprDesc(desc); return UDFOPAnd.class == udfClass; } - + /** - * Returns whether the exprNodeDesc is a node of "positive". + * Returns whether the exprNodeDesc is a node of "positive". */ public static boolean isOpPositive(exprNodeDesc desc) { Class udfClass = getUDFClassFromExprDesc(desc); return UDFOPPositive.class == udfClass; } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/UDAF.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/UDAF.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/UDAF.java (working copy) @@ -18,46 +18,36 @@ package org.apache.hadoop.hive.ql.exec; -import org.apache.hadoop.hive.ql.udf.UDFType; -//import org.apache.hadoop.hive.serde.ReflectionSerDe; - /** * Base class for all User-defined Aggregation Function (UDAF) classes. - * + * * UDAF classes are REQUIRED to inherit from this class. - * - * Required for a UDAF class: - * 1. Implement the init() method, which reset the status of the aggregation function. - * 2. Implement a single method called "aggregate" that returns boolean. The method should - * always return "true" on valid inputs, or the framework will throw an Exception. - * Following are some examples: - * public boolean aggregate(double a); - * public boolean aggregate(int b); - * public boolean aggregate(double c, double d); - * 3. Implement a single method called "evaluate" that returns the FINAL aggregation result. - * "evaluate" should never return "null" or an Exception will be thrown. - * Following are some examples. - * public int evaluate(); - * public long evaluate(); - * public double evaluate(); - * public Double evaluate(); - * public String evaluate(); - * - * Optional for a UDAF class (by implementing these 2 methods, the user declares that the - * UDAF support partial aggregations): - * 1. Implement a single method called "evaluatePartial" that returns the PARTIAL aggregation - * result. "evaluatePartial" should never return "null" or an Exception will be thrown. - * 2. Implement a single method called "aggregatePartial" that takes a PARTIAL aggregation - * result and returns a boolean. The method should always return "true" on valid inputs, - * or the framework will throw an Exception. - * - * Following are some examples: - * public int evaluatePartial(); - * public boolean aggregatePartial(int partial); - * - * public String evaluatePartial(); - * public boolean aggregatePartial(String partial); - * + * + * Required for a UDAF class: 1. Implement the init() method, which reset the + * status of the aggregation function. 2. Implement a single method called + * "aggregate" that returns boolean. The method should always return "true" on + * valid inputs, or the framework will throw an Exception. Following are some + * examples: public boolean aggregate(double a); public boolean aggregate(int + * b); public boolean aggregate(double c, double d); 3. Implement a single + * method called "evaluate" that returns the FINAL aggregation result. + * "evaluate" should never return "null" or an Exception will be thrown. + * Following are some examples. public int evaluate(); public long evaluate(); + * public double evaluate(); public Double evaluate(); public String evaluate(); + * + * Optional for a UDAF class (by implementing these 2 methods, the user declares + * that the UDAF support partial aggregations): 1. Implement a single method + * called "evaluatePartial" that returns the PARTIAL aggregation result. + * "evaluatePartial" should never return "null" or an Exception will be thrown. + * 2. Implement a single method called "aggregatePartial" that takes a PARTIAL + * aggregation result and returns a boolean. The method should always return + * "true" on valid inputs, or the framework will throw an Exception. + * + * Following are some examples: public int evaluatePartial(); public boolean + * aggregatePartial(int partial); + * + * public String evaluatePartial(); public boolean aggregatePartial(String + * partial); + * */ public class UDAF { @@ -65,11 +55,11 @@ * The resolver used for method resolution. */ UDAFEvaluatorResolver rslv; - + /** * The default constructor. */ - public UDAF() { + public UDAF() { rslv = new DefaultUDAFEvaluatorResolver(this.getClass()); } @@ -79,16 +69,17 @@ public UDAF(UDAFEvaluatorResolver rslv) { this.rslv = rslv; } - + /** * Sets the resolver * - * @param rslv The method resolver to use for method resolution. + * @param rslv + * The method resolver to use for method resolution. */ public void setResolver(UDAFEvaluatorResolver rslv) { this.rslv = rslv; } - + /** * Gets the resolver. */ Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java (working copy) @@ -24,42 +24,43 @@ import org.apache.hadoop.hive.ql.plan.exprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.util.ReflectionUtils; public class ExprNodeGenericFuncEvaluator extends ExprNodeEvaluator { - private static final Log LOG = LogFactory.getLog(ExprNodeGenericFuncEvaluator.class.getName()); - + private static final Log LOG = LogFactory + .getLog(ExprNodeGenericFuncEvaluator.class.getName()); + protected exprNodeGenericFuncDesc expr; - + transient GenericUDF genericUDF; transient Object rowObject; transient ExprNodeEvaluator[] children; transient DeferredExprObject[] deferredChildren; - + /** * Class to allow deferred evaluation for GenericUDF. */ class DeferredExprObject implements GenericUDF.DeferredObject { - - ExprNodeEvaluator eval; + + ExprNodeEvaluator eval; + DeferredExprObject(ExprNodeEvaluator eval) { this.eval = eval; } - + public Object get() throws HiveException { return eval.evaluate(rowObject); } }; - + public ExprNodeGenericFuncEvaluator(exprNodeGenericFuncDesc expr) { this.expr = expr; children = new ExprNodeEvaluator[expr.getChildExprs().size()]; - for(int i=0; i implements Serializable { +public class FileSinkOperator extends TerminalOperator implements + Serializable { public static interface RecordWriter { public void write(Writable w) throws IOException; + public void close(boolean abort) throws IOException; } @@ -56,53 +58,57 @@ transient protected Serializer serializer; transient protected BytesWritable commonKey = new BytesWritable(); transient protected TableIdEnum tabIdEnum = null; - transient private LongWritable row_count; + transient private LongWritable row_count; + public static enum TableIdEnum { TABLE_ID_1_ROWCOUNT, TABLE_ID_2_ROWCOUNT, TABLE_ID_3_ROWCOUNT, TABLE_ID_4_ROWCOUNT, TABLE_ID_5_ROWCOUNT, TABLE_ID_6_ROWCOUNT, TABLE_ID_7_ROWCOUNT, TABLE_ID_8_ROWCOUNT, TABLE_ID_9_ROWCOUNT, TABLE_ID_10_ROWCOUNT, TABLE_ID_11_ROWCOUNT, TABLE_ID_12_ROWCOUNT, TABLE_ID_13_ROWCOUNT, TABLE_ID_14_ROWCOUNT, TABLE_ID_15_ROWCOUNT; } + transient protected boolean autoDelete = false; private void commit() throws IOException { - if (!fs.rename(outPath, finalPath)) { - throw new IOException ("Unable to rename output to: " + finalPath); + if (!fs.rename(outPath, finalPath)) { + throw new IOException("Unable to rename output to: " + finalPath); } LOG.info("Committed to output file: " + finalPath); } + @Override protected void initializeOp(Configuration hconf) throws HiveException { try { - serializer = (Serializer)conf.getTableInfo().getDeserializerClass().newInstance(); + serializer = (Serializer) conf.getTableInfo().getDeserializerClass() + .newInstance(); serializer.initialize(null, conf.getTableInfo().getProperties()); - - + JobConf jc; - if(hconf instanceof JobConf) { - jc = (JobConf)hconf; + if (hconf instanceof JobConf) { + jc = (JobConf) hconf; } else { // test code path jc = new JobConf(hconf, ExecDriver.class); } int id = conf.getDestTableId(); - if ((id != 0) && (id <= TableIdEnum.values().length)){ - String enumName = "TABLE_ID_"+String.valueOf(id)+"_ROWCOUNT"; + if ((id != 0) && (id <= TableIdEnum.values().length)) { + String enumName = "TABLE_ID_" + String.valueOf(id) + "_ROWCOUNT"; tabIdEnum = TableIdEnum.valueOf(enumName); row_count = new LongWritable(); statsMap.put(tabIdEnum, row_count); - + } String specPath = conf.getDirName(); Path tmpPath = Utilities.toTempPath(specPath); - String taskId = Utilities.getTaskId(hconf); - fs =(new Path(specPath)).getFileSystem(hconf); + String taskId = Utilities.getTaskId(hconf); + fs = (new Path(specPath)).getFileSystem(hconf); finalPath = new Path(tmpPath, taskId); outPath = new Path(tmpPath, Utilities.toTempPath(taskId)); LOG.info("Writing to temp file: FS " + outPath); - HiveOutputFormat hiveOutputFormat = conf.getTableInfo().getOutputFileFormatClass().newInstance(); + HiveOutputFormat hiveOutputFormat = conf.getTableInfo() + .getOutputFileFormatClass().newInstance(); boolean isCompressed = conf.getCompressed(); // The reason to keep these instead of using @@ -110,12 +116,16 @@ // getRecordWriter does not give us enough control over the file name that // we create. Path parent = Utilities.toTempPath(specPath); - finalPath = HiveFileFormatUtils.getOutputFormatFinalPath(parent, jc, hiveOutputFormat, isCompressed, finalPath); - final Class outputClass = serializer.getSerializedClass(); - outWriter = HiveFileFormatUtils.getHiveRecordWriter(jc, conf.getTableInfo(), outputClass, conf, outPath); + finalPath = HiveFileFormatUtils.getOutputFormatFinalPath(parent, jc, + hiveOutputFormat, isCompressed, finalPath); + final Class outputClass = serializer + .getSerializedClass(); + outWriter = HiveFileFormatUtils.getHiveRecordWriter(jc, conf + .getTableInfo(), outputClass, conf, outPath); // in recent hadoop versions, use deleteOnExit to clean tmp files. - autoDelete = ShimLoader.getHadoopShims().fileSystemDeleteOnExit(fs, outPath); + autoDelete = ShimLoader.getHadoopShims().fileSystemDeleteOnExit(fs, + outPath); initializeChildren(hconf); } catch (HiveException e) { @@ -126,31 +136,35 @@ } } - Writable recordValue; + Writable recordValue; + + @Override public void processOp(Object row, int tag) throws HiveException { - // Since File Sink is a terminal operator, forward is not called - so, maintain the number of output rows explicitly + // Since File Sink is a terminal operator, forward is not called - so, + // maintain the number of output rows explicitly if (counterNameToEnum != null) { - ++this.outputRows; - if (this.outputRows % 1000 == 0) { + ++outputRows; + if (outputRows % 1000 == 0) { incrCounter(numOutputRowsCntr, outputRows); - this.outputRows = 0; + outputRows = 0; } } try { - if (reporter != null) + if (reporter != null) { reporter.progress(); + } // user SerDe to serialize r, and write it out recordValue = serializer.serialize(row, inputObjInspectors[tag]); - if (row_count != null){ - row_count.set(row_count.get()+ 1); + if (row_count != null) { + row_count.set(row_count.get() + 1); } - + outWriter.write(recordValue); } catch (IOException e) { - throw new HiveException (e); + throw new HiveException(e); } catch (SerDeException e) { - throw new HiveException (e); + throw new HiveException(e); } } @@ -168,11 +182,13 @@ } } else { // Will come here if an Exception was thrown in map() or reduce(). - // Hadoop always call close() even if an Exception was thrown in map() or reduce(). + // Hadoop always call close() even if an Exception was thrown in map() or + // reduce(). try { outWriter.close(abort); - if(!autoDelete) + if (!autoDelete) { fs.delete(outPath, true); + } } catch (Exception e) { e.printStackTrace(); } @@ -182,24 +198,27 @@ /** * @return the name of the operator */ + @Override public String getName() { return new String("FS"); } @Override - public void jobClose(Configuration hconf, boolean success) throws HiveException { + public void jobClose(Configuration hconf, boolean success) + throws HiveException { try { - if(conf != null) { + if (conf != null) { String specPath = conf.getDirName(); FileSinkOperator.mvFileToFinalPath(specPath, hconf, success, LOG); } } catch (IOException e) { - throw new HiveException (e); + throw new HiveException(e); } super.jobClose(hconf, success); } - - public static void mvFileToFinalPath(String specPath, Configuration hconf, boolean success, Log LOG) throws IOException, HiveException{ + + public static void mvFileToFinalPath(String specPath, Configuration hconf, + boolean success, Log LOG) throws IOException, HiveException { FileSystem fs = (new Path(specPath)).getFileSystem(hconf); Path tmpPath = Utilities.toTempPath(specPath); Path intermediatePath = new Path(tmpPath.getParent(), tmpPath.getName() @@ -215,15 +234,15 @@ // Step2: remove any tmp file or double-committed output files Utilities.removeTempOrDuplicateFiles(fs, intermediatePath); // Step3: move to the file destination - LOG.info("Moving tmp dir: " + intermediatePath + " to: " - + finalPath); + LOG.info("Moving tmp dir: " + intermediatePath + " to: " + finalPath); Utilities.renameOrMoveFiles(fs, intermediatePath, finalPath); } } else { fs.delete(tmpPath, true); } } - + + @Override public int getType() { return OperatorType.FILESINK; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/description.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/description.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/description.java (working copy) @@ -24,6 +24,8 @@ @Retention(RetentionPolicy.RUNTIME) public @interface description { String value() default "_FUNC_ is undocumented"; + String extended() default ""; + String name() default ""; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java (working copy) @@ -36,7 +36,6 @@ import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.util.StringUtils; - /** * ExplainTask implementation * @@ -47,52 +46,53 @@ public ExplainTask() { super(); } - + + @Override public int execute() { - + try { - OutputStream outS = work.getResFile().getFileSystem(conf).create(work.getResFile()); + OutputStream outS = work.getResFile().getFileSystem(conf).create( + work.getResFile()); PrintStream out = new PrintStream(outS); - + // Print out the parse AST outputAST(work.getAstStringTree(), out, 0); out.println(); - + outputDependencies(out, work.getRootTasks(), 0); out.println(); - + // Go over all the tasks and dump out the plans outputStagePlans(out, work.getRootTasks(), 0); out.close(); - + return (0); - } - catch (Exception e) { - console.printError("Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e)); + } catch (Exception e) { + console.printError("Failed with exception " + e.getMessage(), "\n" + + StringUtils.stringifyException(e)); return (1); } } private String indentString(int indent) { StringBuilder sb = new StringBuilder(); - for(int i=0; i mp, String header, - PrintStream out, boolean extended, int indent) - throws Exception { + private void outputMap(Map mp, String header, PrintStream out, + boolean extended, int indent) throws Exception { boolean first_el = true; - for(Entry ent: mp.entrySet()) { + for (Entry ent : mp.entrySet()) { if (first_el) { out.println(header); } first_el = false; - + // Print the key out.print(indentString(indent)); out.printf("%s ", ent.getKey().toString()); @@ -105,78 +105,74 @@ out.println(); } else if (ent.getValue() instanceof Serializable) { out.println(); - outputPlan((Serializable)ent.getValue(), out, extended, indent+2); - } + outputPlan((Serializable) ent.getValue(), out, extended, indent + 2); + } } } - private void outputList(List l, String header, - PrintStream out, boolean extended, int indent) - throws Exception { - + private void outputList(List l, String header, PrintStream out, + boolean extended, int indent) throws Exception { + boolean first_el = true; boolean nl = false; - for(Object o: l) { + for (Object o : l) { if (first_el) { out.print(header); } - + if (isPrintable(o)) { if (!first_el) { out.print(", "); } else { out.print(" "); } - + out.print(o); nl = true; - } - else if (o instanceof Serializable) { + } else if (o instanceof Serializable) { if (first_el) { out.println(); } - outputPlan((Serializable)o, out, extended, indent+2); + outputPlan((Serializable) o, out, extended, indent + 2); } - + first_el = false; } - + if (nl) { out.println(); } } private boolean isPrintable(Object val) { - if (val instanceof Boolean || - val instanceof String || - val instanceof Integer || - val instanceof Byte || - val instanceof Float || - val instanceof Double) { + if (val instanceof Boolean || val instanceof String + || val instanceof Integer || val instanceof Byte + || val instanceof Float || val instanceof Double) { return true; } if (val.getClass().isPrimitive()) { return true; } - + return false; } - private void outputPlan(Serializable work, PrintStream out, boolean extended, int indent) - throws Exception { + private void outputPlan(Serializable work, PrintStream out, boolean extended, + int indent) throws Exception { // Check if work has an explain annotation Annotation note = work.getClass().getAnnotation(explain.class); - + if (note instanceof explain) { - explain xpl_note = (explain)note; + explain xpl_note = (explain) note; if (extended || xpl_note.normalExplain()) { out.print(indentString(indent)); out.println(xpl_note.displayName()); } } - // If this is an operator then we need to call the plan generation on the conf and then + // If this is an operator then we need to call the plan generation on the + // conf and then // the children if (work instanceof Operator) { Operator operator = (Operator) work; @@ -184,42 +180,42 @@ outputPlan(operator.getConf(), out, extended, indent); } if (operator.getChildOperators() != null) { - for(Operator op: operator.getChildOperators()) { - outputPlan(op, out, extended, indent+2); + for (Operator op : operator.getChildOperators()) { + outputPlan(op, out, extended, indent + 2); } } return; } - + // We look at all methods that generate values for explain Method[] methods = work.getClass().getMethods(); Arrays.sort(methods, new MethodComparator()); - for(Method m: methods) { - int prop_indents = indent+2; + for (Method m : methods) { + int prop_indents = indent + 2; note = m.getAnnotation(explain.class); if (note instanceof explain) { - explain xpl_note = (explain)note; + explain xpl_note = (explain) note; if (extended || xpl_note.normalExplain()) { - + Object val = m.invoke(work); if (val == null) { continue; } - + String header = null; - if (!xpl_note.displayName().equals("")){ - header = indentString(prop_indents) + xpl_note.displayName() +":"; + if (!xpl_note.displayName().equals("")) { + header = indentString(prop_indents) + xpl_note.displayName() + ":"; } else { prop_indents = indent; header = indentString(prop_indents); } if (isPrintable(val)) { - + out.printf("%s ", header); out.println(val); continue; @@ -227,98 +223,100 @@ // Try this as a map try { // Go through the map and print out the stuff - Map mp = (Map)val; - outputMap(mp, header, out, extended, prop_indents+2); + Map mp = (Map) val; + outputMap(mp, header, out, extended, prop_indents + 2); continue; - } - catch (ClassCastException ce) { + } catch (ClassCastException ce) { // Ignore - all this means is that this is not a map } // Try this as a list try { - List l = (List)val; - outputList(l, header, out, extended, prop_indents+2); - + List l = (List) val; + outputList(l, header, out, extended, prop_indents + 2); + continue; - } - catch (ClassCastException ce) { + } catch (ClassCastException ce) { // Ignore } - // Finally check if it is serializable try { - Serializable s = (Serializable)val; + Serializable s = (Serializable) val; out.println(header); - outputPlan(s, out, extended, prop_indents+2); - + outputPlan(s, out, extended, prop_indents + 2); + continue; - } - catch (ClassCastException ce) { + } catch (ClassCastException ce) { // Ignore } } } } } - - private void outputPlan(Task task, PrintStream out, - boolean extended, HashSet> displayedSet, - int indent) - throws Exception { - + + private void outputPlan(Task task, PrintStream out, + boolean extended, HashSet> displayedSet, + int indent) throws Exception { + if (displayedSet.contains(task)) { return; } displayedSet.add(task); - + out.print(indentString(indent)); out.printf("Stage: %s\n", task.getId()); - // Start by getting the work part of the task and call the output plan for the work - outputPlan(task.getWork(), out, extended, indent+2); + // Start by getting the work part of the task and call the output plan for + // the work + outputPlan(task.getWork(), out, extended, indent + 2); out.println(); - if(task instanceof ConditionalTask && ((ConditionalTask)task).getListTasks() != null) { - for(Task con: ((ConditionalTask)task).getListTasks()) { + if (task instanceof ConditionalTask + && ((ConditionalTask) task).getListTasks() != null) { + for (Task con : ((ConditionalTask) task) + .getListTasks()) { outputPlan(con, out, extended, displayedSet, indent); } } if (task.getChildTasks() != null) { - for(Task child: task.getChildTasks()) { + for (Task child : task.getChildTasks()) { outputPlan(child, out, extended, displayedSet, indent); } } } - private Set> dependeciesTaskSet = new HashSet>(); - private void outputDependencies(Task task, PrintStream out, int indent, boolean rootTskCandidate) - throws Exception { - - if(dependeciesTaskSet.contains(task)) + private final Set> dependeciesTaskSet = new HashSet>(); + + private void outputDependencies(Task task, + PrintStream out, int indent, boolean rootTskCandidate) throws Exception { + + if (dependeciesTaskSet.contains(task)) { return; + } dependeciesTaskSet.add(task); - + out.print(indentString(indent)); out.printf("%s", task.getId()); if ((task.getParentTasks() == null || task.getParentTasks().isEmpty())) { - if(rootTskCandidate) + if (rootTskCandidate) { out.print(" is a root stage"); - } - else { + } + } else { out.print(" depends on stages: "); boolean first = true; - for(Task parent: task.getParentTasks()) { + for (Task parent : task.getParentTasks()) { if (!first) { out.print(", "); } first = false; out.print(parent.getId()); } - - if(task instanceof ConditionalTask && ((ConditionalTask)task).getListTasks() != null) { + + if (task instanceof ConditionalTask + && ((ConditionalTask) task).getListTasks() != null) { out.print(" , consists of "); first = true; - for(Task con: ((ConditionalTask)task).getListTasks()) { + for (Task con : ((ConditionalTask) task) + .getListTasks()) { if (!first) { out.print(", "); } @@ -326,63 +324,63 @@ out.print(con.getId()); } } - + } out.println(); - - if(task instanceof ConditionalTask && ((ConditionalTask)task).getListTasks() != null) { - for(Task con: ((ConditionalTask)task).getListTasks()) { + + if (task instanceof ConditionalTask + && ((ConditionalTask) task).getListTasks() != null) { + for (Task con : ((ConditionalTask) task) + .getListTasks()) { outputDependencies(con, out, indent, false); } } - + if (task.getChildTasks() != null) { - for(Task child: task.getChildTasks()) { + for (Task child : task.getChildTasks()) { outputDependencies(child, out, indent, true); } } - + } public void outputAST(String treeString, PrintStream out, int indent) { out.print(indentString(indent)); out.println("ABSTRACT SYNTAX TREE:"); - out.print(indentString(indent+2)); - out.println(treeString); + out.print(indentString(indent + 2)); + out.println(treeString); } - public void outputDependencies(PrintStream out, - List> rootTasks, - int indent) - throws Exception { + public void outputDependencies(PrintStream out, + List> rootTasks, int indent) + throws Exception { out.print(indentString(indent)); out.println("STAGE DEPENDENCIES:"); - for(Task rootTask: rootTasks) { - outputDependencies(rootTask, out, indent+2, true); + for (Task rootTask : rootTasks) { + outputDependencies(rootTask, out, indent + 2, true); } } - public void outputStagePlans(PrintStream out, - List> rootTasks, - int indent) - throws Exception { + public void outputStagePlans(PrintStream out, + List> rootTasks, int indent) + throws Exception { out.print(indentString(indent)); out.println("STAGE PLANS:"); HashSet> displayedSet = new HashSet>(); - for(Task rootTask: rootTasks) { - outputPlan(rootTask, out, work.getExtended(), - displayedSet, indent+2); + for (Task rootTask : rootTasks) { + outputPlan(rootTask, out, work.getExtended(), displayedSet, indent + 2); } } public static class MethodComparator implements Comparator { public int compare(Object o1, Object o2) { - Method m1 = (Method)o1; - Method m2 = (Method)o2; + Method m1 = (Method) o1; + Method m2 = (Method) o2; return m1.getName().compareTo(m2.getName()); } } + @Override public int getType() { return StageType.EXPLAIN; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/NumericOpMethodResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/NumericOpMethodResolver.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/NumericOpMethodResolver.java (working copy) @@ -23,23 +23,20 @@ import java.util.Arrays; import java.util.List; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** - * The class implements the method resolution for operators like - * (+, -, *, %). The resolution logic is as follows: + * The class implements the method resolution for operators like (+, -, *, %). + * The resolution logic is as follows: * - * 1. If one of the parameters is a string, then it resolves to - * evaluate(double, double) - * 2. If one of the parameters is null, then it resolves to evaluate(T, T) - * where T is the other non-null parameter type. - * 3. If both of the parameters are null, then it resolves to - * evaluate(byte, byte) - * 4. Otherwise, it resolves to evaluate(T, T), where T is the type resulting - * from calling FunctionRegistry.getCommonClass() on the two arguments. + * 1. If one of the parameters is a string, then it resolves to evaluate(double, + * double) 2. If one of the parameters is null, then it resolves to evaluate(T, + * T) where T is the other non-null parameter type. 3. If both of the parameters + * are null, then it resolves to evaluate(byte, byte) 4. Otherwise, it resolves + * to evaluate(T, T), where T is the type resulting from calling + * FunctionRegistry.getCommonClass() on the two arguments. */ public class NumericOpMethodResolver implements UDFMethodResolver { @@ -47,65 +44,68 @@ * The udfclass for which resolution is needed. */ Class udfClass; - + /** * Constuctor. */ public NumericOpMethodResolver(Class udfClass) { this.udfClass = udfClass; } - - /* (non-Javadoc) - * @see org.apache.hadoop.hive.ql.exec.UDFMethodResolver#getEvalMethod(java.util.List) + + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.exec.UDFMethodResolver#getEvalMethod(java.util + * .List) */ @Override public Method getEvalMethod(List argTypeInfos) - throws AmbiguousMethodException, UDFArgumentException { - assert(argTypeInfos.size() == 2); + throws AmbiguousMethodException, UDFArgumentException { + assert (argTypeInfos.size() == 2); List pTypeInfos = null; List modArgTypeInfos = new ArrayList(); // If either argument is a string, we convert to a double because a number // in string form should always be convertible into a double - if (argTypeInfos.get(0).equals(TypeInfoFactory.stringTypeInfo) || - argTypeInfos.get(1).equals(TypeInfoFactory.stringTypeInfo) ) { + if (argTypeInfos.get(0).equals(TypeInfoFactory.stringTypeInfo) + || argTypeInfos.get(1).equals(TypeInfoFactory.stringTypeInfo)) { modArgTypeInfos.add(TypeInfoFactory.doubleTypeInfo); modArgTypeInfos.add(TypeInfoFactory.doubleTypeInfo); } else { // If it's a void, we change the type to a byte because once the types // are run through getCommonClass(), a byte and any other type T will // resolve to type T - for(int i=0; i<2; i++) { - if(argTypeInfos.get(i).equals(TypeInfoFactory.voidTypeInfo)) { - modArgTypeInfos.add(TypeInfoFactory.byteTypeInfo); + for (int i = 0; i < 2; i++) { + if (argTypeInfos.get(i).equals(TypeInfoFactory.voidTypeInfo)) { + modArgTypeInfos.add(TypeInfoFactory.byteTypeInfo); } else { modArgTypeInfos.add(argTypeInfos.get(i)); } } } - - TypeInfo commonType = FunctionRegistry.getCommonClass( - modArgTypeInfos.get(0), - modArgTypeInfos.get(1)); - - if(commonType == null) { - throw new UDFArgumentException("Unable to find a common class between" + - "types " + modArgTypeInfos.get(0).getTypeName() + - " and " + modArgTypeInfos.get(1).getTypeName()); + + TypeInfo commonType = FunctionRegistry.getCommonClass(modArgTypeInfos + .get(0), modArgTypeInfos.get(1)); + + if (commonType == null) { + throw new UDFArgumentException("Unable to find a common class between" + + "types " + modArgTypeInfos.get(0).getTypeName() + " and " + + modArgTypeInfos.get(1).getTypeName()); } - + pTypeInfos = new ArrayList(); pTypeInfos.add(commonType); pTypeInfos.add(commonType); Method udfMethod = null; - for(Method m: Arrays.asList(udfClass.getMethods())) { + for (Method m : Arrays.asList(udfClass.getMethods())) { if (m.getName().equals("evaluate")) { - List argumentTypeInfos = TypeInfoUtils.getParameterTypeInfos(m, - pTypeInfos.size()); + List argumentTypeInfos = TypeInfoUtils.getParameterTypeInfos( + m, pTypeInfos.size()); if (argumentTypeInfos == null) { // null means the method does not accept number of arguments passed. continue; @@ -113,7 +113,7 @@ boolean match = (argumentTypeInfos.size() == pTypeInfos.size()); - for(int i=0; i implements Serializable { +public class ConditionalTask extends Task implements + Serializable { private static final long serialVersionUID = 1L; private List> listTasks; - + private boolean resolved = false; private List> resTasks; - + private ConditionalResolver resolver; - private Object resolverCtx; - + private Object resolverCtx; + public ConditionalTask() { super(); } - + + @Override public boolean isMapRedTask() { - for (Task task : listTasks) - if (task.isMapRedTask()) + for (Task task : listTasks) { + if (task.isMapRedTask()) { return true; - + } + } + return false; } - + + @Override public boolean hasReduce() { - for (Task task : listTasks) - if (task.hasReduce()) + for (Task task : listTasks) { + if (task.hasReduce()) { return true; - + } + } + return false; } - - public void initialize (HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) { + + @Override + public void initialize(HiveConf conf, QueryPlan queryPlan, + DriverContext driverContext) { super.initialize(conf, queryPlan, driverContext); } - + @Override public int execute() { resTasks = resolver.getTasks(conf, resolverCtx); resolved = true; - for(Task tsk: getListTasks()) { - if(!resTasks.contains(tsk)) { - this.driverContext.getRunnable().remove(tsk); + for (Task tsk : getListTasks()) { + if (!resTasks.contains(tsk)) { + driverContext.getRunnable().remove(tsk); console.printInfo(ExecDriver.getJobEndMsg("" + Utilities.randGen.nextInt()) + ", job is filtered out (removed at runtime)."); - if(tsk.getChildTasks() != null) { - for(Task child : tsk.getChildTasks()) { + if (tsk.getChildTasks() != null) { + for (Task child : tsk.getChildTasks()) { child.parentTasks.remove(tsk); - if(DriverContext.isLaunchable(child)) - this.driverContext.addToRunnable(child); + if (DriverContext.isLaunchable(child)) { + driverContext.addToRunnable(child); + } } } - } else if(!this.driverContext.getRunnable().contains(tsk)){ - this.driverContext.addToRunnable(tsk); + } else if (!driverContext.getRunnable().contains(tsk)) { + driverContext.addToRunnable(tsk); } } return 0; @@ -99,7 +109,8 @@ } /** - * @param resolver the resolver to set + * @param resolver + * the resolver to set */ public void setResolver(ConditionalResolver resolver) { this.resolver = resolver; @@ -111,29 +122,34 @@ public Object getResolverCtx() { return resolverCtx; } - - // used to determine whether child tasks can be run. + + // used to determine whether child tasks can be run. + @Override public boolean done() { boolean ret = true; - List> parentTasks = this.getParentTasks(); + List> parentTasks = getParentTasks(); if (parentTasks != null) { - for(Task par: parentTasks) + for (Task par : parentTasks) { ret = ret && par.done(); + } } List> retTasks; - if(resolved) - retTasks = this.resTasks; - else + if (resolved) { + retTasks = resTasks; + } else { retTasks = getListTasks(); - if (ret && retTasks != null) { - for (Task tsk : retTasks) + } + if (ret && retTasks != null) { + for (Task tsk : retTasks) { ret = ret && tsk.done(); + } } return ret; } /** - * @param resolverCtx the resolverCtx to set + * @param resolverCtx + * the resolverCtx to set */ public void setResolverCtx(Object resolverCtx) { this.resolverCtx = resolverCtx; @@ -147,12 +163,14 @@ } /** - * @param listTasks the listTasks to set + * @param listTasks + * the listTasks to set */ public void setListTasks(List> listTasks) { this.listTasks = listTasks; } - + + @Override public int getType() { return StageType.CONDITIONAL; } @@ -169,10 +187,11 @@ * * @return true if the task got added false if it already existed */ + @Override public boolean addDependentTask(Task dependent) { boolean ret = false; - if(this.getListTasks() != null) { - for(Task tsk: this.getListTasks()) { + if (getListTasks() != null) { + for (Task tsk : getListTasks()) { ret = ret & tsk.addDependentTask(dependent); } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java (working copy) @@ -26,23 +26,26 @@ import org.apache.hadoop.hive.ql.plan.api.OperatorType; /** - * Extract operator implementation - * Extracts a subobject and passes that on. + * Extract operator implementation Extracts a subobject and passes that on. **/ -public class ExtractOperator extends Operator implements Serializable { +public class ExtractOperator extends Operator implements + Serializable { private static final long serialVersionUID = 1L; transient protected ExprNodeEvaluator eval; + @Override protected void initializeOp(Configuration hconf) throws HiveException { eval = ExprNodeEvaluatorFactory.get(conf.getCol()); outputObjInspector = eval.initialize(inputObjInspectors[0]); initializeChildren(hconf); } + @Override public void processOp(Object row, int tag) throws HiveException { forward(eval.evaluate(row), outputObjInspector); } + @Override public int getType() { return OperatorType.EXTRACT; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (working copy) @@ -56,17 +56,19 @@ private MemoryMXBean memoryMXBean; private long numRows = 0; private long nextCntr = 1; - + + @Override public void configure(JobConf job) { - // Allocate the bean at the beginning - + // Allocate the bean at the beginning - memoryMXBean = ManagementFactory.getMemoryMXBean(); l4j.info("maximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax()); - + try { - l4j.info("conf classpath = " - + Arrays.asList(((URLClassLoader)job.getClassLoader()).getURLs())); - l4j.info("thread classpath = " - + Arrays.asList(((URLClassLoader)Thread.currentThread().getContextClassLoader()).getURLs())); + l4j.info("conf classpath = " + + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs())); + l4j.info("thread classpath = " + + Arrays.asList(((URLClassLoader) Thread.currentThread() + .getContextClassLoader()).getURLs())); } catch (Exception e) { l4j.info("cannot get classpath: " + e.getMessage()); } @@ -88,35 +90,39 @@ } fetchOperators = new HashMap(); // create map local operators - for (Map.Entry entry : localWork.getAliasToFetchWork().entrySet()) { - fetchOperators.put(entry.getKey(), new FetchOperator(entry.getValue(), job)); + for (Map.Entry entry : localWork.getAliasToFetchWork() + .entrySet()) { + fetchOperators.put(entry.getKey(), new FetchOperator(entry.getValue(), + job)); l4j.info("fetchoperator for " + entry.getKey() + " created"); } // initialize map local operators for (Map.Entry entry : fetchOperators.entrySet()) { - Operator forwardOp = localWork.getAliasToWork().get(entry.getKey()); + Operator forwardOp = localWork.getAliasToWork() + .get(entry.getKey()); // All the operators need to be initialized before process - forwardOp.initialize(jc, new ObjectInspector[]{entry.getValue().getOutputObjectInspector()}); + forwardOp.initialize(jc, new ObjectInspector[] { entry.getValue() + .getOutputObjectInspector() }); l4j.info("fetchoperator for " + entry.getKey() + " initialized"); } - // defer processing of map local operators to first row if in case there is no input (??) + // defer processing of map local operators to first row if in case there + // is no input (??) } catch (Throwable e) { abort = true; if (e instanceof OutOfMemoryError) { // will this be true here? - // Don't create a new object if we are already out of memory - throw (OutOfMemoryError) e; + // Don't create a new object if we are already out of memory + throw (OutOfMemoryError) e; } else { - throw new RuntimeException ("Map operator initialization failed", e); + throw new RuntimeException("Map operator initialization failed", e); } } } - public void map(Object key, Object value, - OutputCollector output, - Reporter reporter) throws IOException { - if(oc == null) { + public void map(Object key, Object value, OutputCollector output, + Reporter reporter) throws IOException { + if (oc == null) { oc = output; rp = reporter; mo.setOutputCollector(oc); @@ -126,11 +132,13 @@ try { mapredLocalWork localWork = mo.getConf().getMapLocalWork(); int fetchOpNum = 0; - for (Map.Entry entry : fetchOperators.entrySet()) { + for (Map.Entry entry : fetchOperators + .entrySet()) { int fetchOpRows = 0; String alias = entry.getKey(); FetchOperator fetchOp = entry.getValue(); - Operator forwardOp = localWork.getAliasToWork().get(alias); + Operator forwardOp = localWork + .getAliasToWork().get(alias); while (true) { InspectableObject row = fetchOp.getNextRow(); @@ -140,40 +148,46 @@ } fetchOpRows++; forwardOp.process(row.o, 0); - // check if any operator had a fatal error or early exit during execution - if ( forwardOp.getDone() ) { + // check if any operator had a fatal error or early exit during + // execution + if (forwardOp.getDone()) { done = true; break; } } - + if (l4j.isInfoEnabled()) { - l4j.info("fetch " + fetchOpNum++ + " processed " + fetchOpRows + " used mem: " + memoryMXBean.getHeapMemoryUsage().getUsed()); + l4j + .info("fetch " + fetchOpNum++ + " processed " + fetchOpRows + + " used mem: " + + memoryMXBean.getHeapMemoryUsage().getUsed()); } } } catch (Throwable e) { abort = true; if (e instanceof OutOfMemoryError) { - // Don't create a new object if we are already out of memory - throw (OutOfMemoryError) e; + // Don't create a new object if we are already out of memory + throw (OutOfMemoryError) e; } else { - throw new RuntimeException ("Map local work failed", e); + throw new RuntimeException("Map local work failed", e); } } } } try { - if (mo.getDone()) + if (mo.getDone()) { done = true; - else { - // Since there is no concept of a group, we don't invoke startGroup/endGroup for a mapper - mo.process((Writable)value); + } else { + // Since there is no concept of a group, we don't invoke + // startGroup/endGroup for a mapper + mo.process((Writable) value); if (l4j.isInfoEnabled()) { numRows++; if (numRows == nextCntr) { long used_memory = memoryMXBean.getHeapMemoryUsage().getUsed(); - l4j.info("ExecMapper: processing " + numRows + " rows: used memory = " + used_memory); + l4j.info("ExecMapper: processing " + numRows + + " rows: used memory = " + used_memory); nextCntr = getNextCntr(numRows); } } @@ -182,26 +196,29 @@ abort = true; e.printStackTrace(); if (e instanceof OutOfMemoryError) { - // Don't create a new object if we are already out of memory - throw (OutOfMemoryError) e; + // Don't create a new object if we are already out of memory + throw (OutOfMemoryError) e; } else { - throw new RuntimeException (e.getMessage(), e); + throw new RuntimeException(e.getMessage(), e); } } } private long getNextCntr(long cntr) { - // A very simple counter to keep track of number of rows processed by the reducer. It dumps + // A very simple counter to keep track of number of rows processed by the + // reducer. It dumps // every 1 million times, and quickly before that - if (cntr >= 1000000) + if (cntr >= 1000000) { return cntr + 1000000; - + } + return 10 * cntr; } + @Override public void close() { // No row was processed - if(oc == null) { + if (oc == null) { l4j.trace("Close called. no row processed by map."); } @@ -212,24 +229,26 @@ if (fetchOperators != null) { mapredLocalWork localWork = mo.getConf().getMapLocalWork(); for (Map.Entry entry : fetchOperators.entrySet()) { - Operator forwardOp = localWork.getAliasToWork().get(entry.getKey()); + Operator forwardOp = localWork + .getAliasToWork().get(entry.getKey()); forwardOp.close(abort); } } - + if (l4j.isInfoEnabled()) { long used_memory = memoryMXBean.getHeapMemoryUsage().getUsed(); - l4j.info("ExecMapper: processed " + numRows + " rows: used memory = " + used_memory); + l4j.info("ExecMapper: processed " + numRows + " rows: used memory = " + + used_memory); } - - reportStats rps = new reportStats (rp); + + reportStats rps = new reportStats(rp); mo.preorderMap(rps); return; } catch (Exception e) { - if(!abort) { + if (!abort) { // signal new failure to map-reduce l4j.error("Hit error while closing operators - failing tree"); - throw new RuntimeException ("Error while closing operators", e); + throw new RuntimeException("Error while closing operators", e); } } } @@ -240,15 +259,17 @@ public static class reportStats implements Operator.OperatorFunc { Reporter rp; - public reportStats (Reporter rp) { + + public reportStats(Reporter rp) { this.rp = rp; } + public void func(Operator op) { Map opStats = op.getStats(); - for(Map.Entry e: opStats.entrySet()) { - if(this.rp != null) { - rp.incrCounter(e.getKey(), e.getValue()); - } + for (Map.Entry e : opStats.entrySet()) { + if (rp != null) { + rp.incrCounter(e.getKey(), e.getValue()); + } } } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java (working copy) @@ -18,11 +18,19 @@ package org.apache.hadoop.hive.ql.exec; -import java.util.*; -import java.io.*; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; -import org.apache.hadoop.hive.ql.plan.*; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.plan.ConditionalWork; +import org.apache.hadoop.hive.ql.plan.DDLWork; +import org.apache.hadoop.hive.ql.plan.FunctionWork; +import org.apache.hadoop.hive.ql.plan.copyWork; +import org.apache.hadoop.hive.ql.plan.explainWork; +import org.apache.hadoop.hive.ql.plan.fetchWork; +import org.apache.hadoop.hive.ql.plan.mapredWork; +import org.apache.hadoop.hive.ql.plan.moveWork; /** * TaskFactory implementation @@ -46,38 +54,43 @@ taskvec.add(new taskTuple(fetchWork.class, FetchTask.class)); taskvec.add(new taskTuple(copyWork.class, CopyTask.class)); taskvec.add(new taskTuple(DDLWork.class, DDLTask.class)); - taskvec.add(new taskTuple(FunctionWork.class, FunctionTask.class)); - taskvec.add(new taskTuple(explainWork.class, ExplainTask.class)); - taskvec.add(new taskTuple(ConditionalWork.class, ConditionalTask.class)); + taskvec.add(new taskTuple(FunctionWork.class, + FunctionTask.class)); + taskvec + .add(new taskTuple(explainWork.class, ExplainTask.class)); + taskvec.add(new taskTuple(ConditionalWork.class, + ConditionalTask.class)); // we are taking this out to allow us to instantiate either MapRedTask or // ExecDriver dynamically at run time based on configuration - // taskvec.add(new taskTuple(mapredWork.class, ExecDriver.class)); + // taskvec.add(new taskTuple(mapredWork.class, + // ExecDriver.class)); } - private static ThreadLocal tid = new ThreadLocal () { + private static ThreadLocal tid = new ThreadLocal() { + @Override protected synchronized Integer initialValue() { - return new Integer(0); - } + return new Integer(0); + } }; public static int getAndIncrementId() { int curValue = tid.get().intValue(); - tid.set(new Integer(curValue+1)); + tid.set(new Integer(curValue + 1)); return curValue; } - public static void resetId() { tid.set(new Integer(0)); } - + @SuppressWarnings("unchecked") - public static Task get(Class workClass, HiveConf conf) { - - for(taskTuple t: taskvec) { - if(t.workClass == workClass) { + public static Task get(Class workClass, + HiveConf conf) { + + for (taskTuple t : taskvec) { + if (t.workClass == workClass) { try { - Task ret = (Task)t.taskClass.newInstance(); + Task ret = (Task) t.taskClass.newInstance(); ret.setId("Stage-" + Integer.toString(getAndIncrementId())); return ret; } catch (Exception e) { @@ -85,57 +98,58 @@ } } } - - if(workClass == mapredWork.class) { + if (workClass == mapredWork.class) { + boolean viachild = conf.getBoolVar(HiveConf.ConfVars.SUBMITVIACHILD); - + try { // in local mode - or if otherwise so configured - always submit // jobs via separate jvm Task ret = null; - if(conf.getVar(HiveConf.ConfVars.HADOOPJT).equals("local") || viachild) { - ret = (Task)MapRedTask.class.newInstance(); + if (conf.getVar(HiveConf.ConfVars.HADOOPJT).equals("local") || viachild) { + ret = (Task) MapRedTask.class.newInstance(); } else { - ret = (Task)ExecDriver.class.newInstance(); + ret = (Task) ExecDriver.class.newInstance(); } ret.setId("Stage-" + Integer.toString(getAndIncrementId())); return ret; } catch (Exception e) { - throw new RuntimeException (e.getMessage(), e); + throw new RuntimeException(e.getMessage(), e); } } - throw new RuntimeException ("No task for work class " + workClass.getName()); + throw new RuntimeException("No task for work class " + workClass.getName()); } public static Task get(T work, HiveConf conf, - Task ... tasklist) { - Task ret = get((Class )work.getClass(), conf); + Task... tasklist) { + Task ret = get((Class) work.getClass(), conf); ret.setWork(work); - if(tasklist.length == 0) + if (tasklist.length == 0) { return (ret); + } - ArrayList> clist = new ArrayList> (); - for(Task tsk: tasklist) { + ArrayList> clist = new ArrayList>(); + for (Task tsk : tasklist) { clist.add(tsk); } ret.setChildTasks(clist); return (ret); } - public static Task getAndMakeChild( - T work, HiveConf conf, - Task ... tasklist) { - Task ret = get((Class )work.getClass(), conf); + public static Task getAndMakeChild(T work, + HiveConf conf, Task... tasklist) { + Task ret = get((Class) work.getClass(), conf); ret.setWork(work); - if(tasklist.length == 0) + if (tasklist.length == 0) { return (ret); + } // Add the new task as child of each of the passed in tasks - for(Task tsk: tasklist) { + for (Task tsk : tasklist) { List> children = tsk.getChildTasks(); if (children == null) { children = new ArrayList>(); @@ -143,7 +157,7 @@ children.add(ret); tsk.setChildTasks(children); } - + return (ret); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java (working copy) @@ -31,44 +31,47 @@ /** * Select operator implementation **/ -public class SelectOperator extends Operator implements Serializable { +public class SelectOperator extends Operator implements + Serializable { private static final long serialVersionUID = 1L; transient protected ExprNodeEvaluator[] eval; transient Object[] output; - + + @Override protected void initializeOp(Configuration hconf) throws HiveException { // Just forward the row as is if (conf.isSelStarNoCompute()) { - initializeChildren(hconf); + initializeChildren(hconf); return; } - + ArrayList colList = conf.getColList(); eval = new ExprNodeEvaluator[colList.size()]; - for(int i=0; i implements Serializable { - + private static final long serialVersionUID = 1L; final static String hadoopMemKey = "HADOOP_HEAPSIZE"; final static String hadoopOptsKey = "HADOOP_OPTS"; - final static String HIVE_SYS_PROP[] = {"build.dir", "build.dir.hive"}; - + final static String HIVE_SYS_PROP[] = { "build.dir", "build.dir.hive" }; + public MapRedTask() { super(); } - + + @Override public int execute() { try { @@ -60,7 +61,8 @@ String libJarsOption; { - String addedJars = ExecDriver.getResourceFiles(conf, SessionState.ResourceType.JAR); + String addedJars = ExecDriver.getResourceFiles(conf, + SessionState.ResourceType.JAR); conf.setVar(ConfVars.HIVEADDEDJARS, addedJars); String auxJars = conf.getAuxJars(); @@ -76,14 +78,14 @@ libJarsOption = " -libjars " + addedJars + " "; } else { libJarsOption = " -libjars " + addedJars + "," + auxJars + " "; - } + } } } // Generate the hiveConfArgs after potentially adding the jars String hiveConfArgs = ExecDriver.generateCmdLine(conf); File scratchDir = new File(conf.getVar(HiveConf.ConfVars.SCRATCHDIR)); - + mapredWork plan = getWork(); File planFile = File.createTempFile("plan", ".xml", scratchDir); @@ -91,21 +93,22 @@ FileOutputStream out = new FileOutputStream(planFile); Utilities.serializeMapRedWork(plan, out); - String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) - ? "-silent" : ""; + String isSilent = "true".equalsIgnoreCase(System + .getProperty("test.silent")) ? "-silent" : ""; String jarCmd; - if(ShimLoader.getHadoopShims().usesJobShell()) { + if (ShimLoader.getHadoopShims().usesJobShell()) { jarCmd = libJarsOption + hiveJar + " " + ExecDriver.class.getName(); } else { jarCmd = hiveJar + " " + ExecDriver.class.getName() + libJarsOption; } - String cmdLine = hadoopExec + " jar " + jarCmd + - " -plan " + planFile.toString() + " " + isSilent + " " + hiveConfArgs; - - String files = ExecDriver.getResourceFiles(conf, SessionState.ResourceType.FILE); - if(!files.isEmpty()) { + String cmdLine = hadoopExec + " jar " + jarCmd + " -plan " + + planFile.toString() + " " + isSilent + " " + hiveConfArgs; + + String files = ExecDriver.getResourceFiles(conf, + SessionState.ResourceType.FILE); + if (!files.isEmpty()) { cmdLine = cmdLine + " -files " + files; } @@ -117,63 +120,65 @@ { StringBuilder sb = new StringBuilder(); Properties p = System.getProperties(); - for (int k = 0; k < HIVE_SYS_PROP.length; k++) { - if (p.containsKey(HIVE_SYS_PROP[k])) { - sb.append(" -D" + HIVE_SYS_PROP[k] + "=" + p.getProperty(HIVE_SYS_PROP[k])); + for (String element : HIVE_SYS_PROP) { + if (p.containsKey(element)) { + sb.append(" -D" + element + "=" + p.getProperty(element)); } } hadoopOpts = sb.toString(); } - + // Inherit the environment variables String[] env; { Map variables = new HashMap(System.getenv()); // The user can specify the hadoop memory int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM); - + if (hadoopMem == 0) { variables.remove(hadoopMemKey); } else { // user specified the memory - only applicable for local mode variables.put(hadoopMemKey, String.valueOf(hadoopMem)); } - + if (variables.containsKey(hadoopOptsKey)) { - variables.put(hadoopOptsKey, variables.get(hadoopOptsKey) + hadoopOpts); + variables.put(hadoopOptsKey, variables.get(hadoopOptsKey) + + hadoopOpts); } else { variables.put(hadoopOptsKey, hadoopOpts); } - + env = new String[variables.size()]; int pos = 0; - for (Map.Entry entry : variables.entrySet()) { - String name = entry.getKey(); - String value = entry.getValue(); - env[pos++] = name + "=" + value; - } + for (Map.Entry entry : variables.entrySet()) { + String name = entry.getKey(); + String value = entry.getValue(); + env[pos++] = name + "=" + value; + } } - + // Run ExecDriver in another JVM executor = Runtime.getRuntime().exec(cmdLine, env); - StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(), null, System.out); - StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(), null, System.err); - + StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(), + null, System.out); + StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(), + null, System.err); + outPrinter.start(); errPrinter.start(); - + int exitVal = executor.waitFor(); - if(exitVal != 0) { + if (exitVal != 0) { LOG.error("Execution failed with exit status: " + exitVal); } else { LOG.info("Execution completed successfully"); } return exitVal; - } - catch (Exception e) { + } catch (Exception e) { e.printStackTrace(); LOG.error("Exception: " + e.getMessage()); return (1); @@ -190,7 +195,8 @@ mapredWork w = getWork(); return w.getReducer() != null; } - + + @Override public int getType() { return StageType.MAPREDLOCAL; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/TaskRunner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/TaskRunner.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TaskRunner.java (working copy) @@ -18,20 +18,10 @@ package org.apache.hadoop.hive.ql.exec; -import java.io.*; -import java.util.*; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.QueryPlan; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; +import java.io.Serializable; + import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; -import org.apache.hadoop.util.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - - /** * TaskRunner implementation **/ @@ -51,6 +41,7 @@ return tsk; } + @Override public void run() { SessionState.start(ss); runSequential(); @@ -65,5 +56,4 @@ result.setExitVal(exitVal); } - } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ComparisonOpMethodResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ComparisonOpMethodResolver.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ComparisonOpMethodResolver.java (working copy) @@ -23,54 +23,52 @@ import java.util.Arrays; import java.util.List; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** - * The class implements the method resolution for operators like - * (> < <= >= = <>). The resolution logic is as follows: - * 1. If one of the parameters is null, then it resolves to - * evaluate(Double, Double) - * 2. If both of the parameters are of type T, then it resolves to - * evaluate(T, T) - * 3. If 1 and 2 fails then it resolves to evaluate(Double, Double). + * The class implements the method resolution for operators like (> < <= >= = + * <>). The resolution logic is as follows: 1. If one of the parameters is null, + * then it resolves to evaluate(Double, Double) 2. If both of the parameters are + * of type T, then it resolves to evaluate(T, T) 3. If 1 and 2 fails then it + * resolves to evaluate(Double, Double). */ public class ComparisonOpMethodResolver implements UDFMethodResolver { /** * The udfclass for which resolution is needed. */ - private Class udfClass; - + private final Class udfClass; + /** * Constuctor. */ public ComparisonOpMethodResolver(Class udfClass) { this.udfClass = udfClass; } - - /* (non-Javadoc) - * @see org.apache.hadoop.hive.ql.exec.UDFMethodResolver#getEvalMethod(java.util.List) + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.exec.UDFMethodResolver#getEvalMethod(java.util + * .List) */ @Override public Method getEvalMethod(List argTypeInfos) throws AmbiguousMethodException { - assert(argTypeInfos.size() == 2); + assert (argTypeInfos.size() == 2); List pTypeInfos = null; - if (argTypeInfos.get(0).equals(TypeInfoFactory.voidTypeInfo) || - argTypeInfos.get(1).equals(TypeInfoFactory.voidTypeInfo)) { + if (argTypeInfos.get(0).equals(TypeInfoFactory.voidTypeInfo) + || argTypeInfos.get(1).equals(TypeInfoFactory.voidTypeInfo)) { pTypeInfos = new ArrayList(); pTypeInfos.add(TypeInfoFactory.doubleTypeInfo); pTypeInfos.add(TypeInfoFactory.doubleTypeInfo); - } - else if (argTypeInfos.get(0) == argTypeInfos.get(1)) { + } else if (argTypeInfos.get(0) == argTypeInfos.get(1)) { pTypeInfos = argTypeInfos; - } - else { + } else { pTypeInfos = new ArrayList(); pTypeInfos.add(TypeInfoFactory.doubleTypeInfo); pTypeInfos.add(TypeInfoFactory.doubleTypeInfo); @@ -78,18 +76,19 @@ Method udfMethod = null; - for(Method m: Arrays.asList(udfClass.getMethods())) { + for (Method m : Arrays.asList(udfClass.getMethods())) { if (m.getName().equals("evaluate")) { - List acceptedTypeInfos = TypeInfoUtils.getParameterTypeInfos(m, pTypeInfos.size()); + List acceptedTypeInfos = TypeInfoUtils.getParameterTypeInfos( + m, pTypeInfos.size()); if (acceptedTypeInfos == null) { // null means the method does not accept number of arguments passed. continue; } - + boolean match = (acceptedTypeInfos.size() == pTypeInfos.size()); - for(int i=0; i implements Serializable { +public class GroupByOperator extends Operator implements + Serializable { - static final private Log LOG = LogFactory.getLog(GroupByOperator.class.getName()); + static final private Log LOG = LogFactory.getLog(GroupByOperator.class + .getName()); private static final long serialVersionUID = 1L; - private static final int NUMROWSESTIMATESIZE = 1000; + private static final int NUMROWSESTIMATESIZE = 1000; transient protected ExprNodeEvaluator[] keyFields; transient protected ObjectInspector[] keyObjectInspectors; transient protected Object[] keyObjects; - + transient protected ExprNodeEvaluator[][] aggregationParameterFields; transient protected ObjectInspector[][] aggregationParameterObjectInspectors; transient protected ObjectInspector[][] aggregationParameterStandardObjectInspectors; transient protected Object[][] aggregationParameterObjects; - // In the future, we may allow both count(DISTINCT a) and sum(DISTINCT a) in the same SQL clause, - // so aggregationIsDistinct is a boolean array instead of a single number. + // In the future, we may allow both count(DISTINCT a) and sum(DISTINCT a) in + // the same SQL clause, + // so aggregationIsDistinct is a boolean array instead of a single number. transient protected boolean[] aggregationIsDistinct; transient GenericUDAFEvaluator[] aggregationEvaluators; - + transient protected ArrayList objectInspectors; transient ArrayList fieldNames; - // Used by sort-based GroupBy: Mode = COMPLETE, PARTIAL1, PARTIAL2, MERGEPARTIAL + // Used by sort-based GroupBy: Mode = COMPLETE, PARTIAL1, PARTIAL2, + // MERGEPARTIAL transient protected ArrayList currentKeys; - transient protected ArrayList newKeys; + transient protected ArrayList newKeys; transient protected AggregationBuffer[] aggregations; transient protected Object[][] aggregationsParametersLastInvoke; // Used by hash-based GroupBy: Mode = HASH, PARTIALS transient protected HashMap hashAggregations; - + // Used by hash distinct aggregations when hashGrpKeyNotRedKey is true transient protected HashSet> keysCurrentGroup; - + transient boolean bucketGroup; - + transient boolean firstRow; - transient long totalMemory; + transient long totalMemory; transient boolean hashAggr; - // The reduction is happening on the reducer, and the grouping key and reduction keys are different. + // The reduction is happening on the reducer, and the grouping key and + // reduction keys are different. // For example: select a, count(distinct b) from T group by a // The data is sprayed by 'b' and the reducer is grouping it by 'a' - transient boolean groupKeyIsNotReduceKey; + transient boolean groupKeyIsNotReduceKey; transient boolean firstRowInGroup; - transient long numRowsInput; - transient long numRowsHashTbl; - transient int groupbyMapAggrInterval; - transient long numRowsCompareHashAggr; - transient float minReductionHashAggr; + transient long numRowsInput; + transient long numRowsHashTbl; + transient int groupbyMapAggrInterval; + transient long numRowsCompareHashAggr; + transient float minReductionHashAggr; // current Key ObjectInspectors are standard ObjectInspectors transient protected ObjectInspector[] currentKeyObjectInspectors; // new Key ObjectInspectors are objectInspectors from the parent transient StructObjectInspector newKeyObjectInspector; transient StructObjectInspector currentKeyObjectInspector; - + /** - * This is used to store the position and field names for variable length fields. + * This is used to store the position and field names for variable length + * fields. **/ class varLenFields { - int aggrPos; - List fields; + int aggrPos; + List fields; + varLenFields(int aggrPos, List fields) { this.aggrPos = aggrPos; - this.fields = fields; + this.fields = fields; } int getAggrPos() { @@ -132,24 +138,27 @@ } }; - // for these positions, some variable primitive type (String) is used, so size cannot be estimated. sample it at runtime. + // for these positions, some variable primitive type (String) is used, so size + // cannot be estimated. sample it at runtime. transient List keyPositionsSize; - // for these positions, some variable primitive type (String) is used for the aggregation classes + // for these positions, some variable primitive type (String) is used for the + // aggregation classes transient List aggrPositions; - transient int fixedRowSize; - transient long maxHashTblMemory; - transient int totalVariableSize; - transient int numEntriesVarSize; - transient int numEntriesHashTable; - + transient int fixedRowSize; + transient long maxHashTblMemory; + transient int totalVariableSize; + transient int numEntriesVarSize; + transient int numEntriesHashTable; + + @Override protected void initializeOp(Configuration hconf) throws HiveException { totalMemory = Runtime.getRuntime().totalMemory(); numRowsInput = 0; numRowsHashTbl = 0; - assert(inputObjInspectors.length == 1); + assert (inputObjInspectors.length == 1); ObjectInspector rowInspector = inputObjInspectors[0]; // init keyFields @@ -160,40 +169,51 @@ for (int i = 0; i < keyFields.length; i++) { keyFields[i] = ExprNodeEvaluatorFactory.get(conf.getKeys().get(i)); keyObjectInspectors[i] = keyFields[i].initialize(rowInspector); - currentKeyObjectInspectors[i] = ObjectInspectorUtils.getStandardObjectInspector(keyObjectInspectors[i], - ObjectInspectorCopyOption.WRITABLE); + currentKeyObjectInspectors[i] = ObjectInspectorUtils + .getStandardObjectInspector(keyObjectInspectors[i], + ObjectInspectorCopyOption.WRITABLE); keyObjects[i] = null; } newKeys = new ArrayList(keyFields.length); - + // init aggregationParameterFields - aggregationParameterFields = new ExprNodeEvaluator[conf.getAggregators().size()][]; - aggregationParameterObjectInspectors = new ObjectInspector[conf.getAggregators().size()][]; - aggregationParameterStandardObjectInspectors = new ObjectInspector[conf.getAggregators().size()][]; + aggregationParameterFields = new ExprNodeEvaluator[conf.getAggregators() + .size()][]; + aggregationParameterObjectInspectors = new ObjectInspector[conf + .getAggregators().size()][]; + aggregationParameterStandardObjectInspectors = new ObjectInspector[conf + .getAggregators().size()][]; aggregationParameterObjects = new Object[conf.getAggregators().size()][]; for (int i = 0; i < aggregationParameterFields.length; i++) { - ArrayList parameters = conf.getAggregators().get(i).getParameters(); + ArrayList parameters = conf.getAggregators().get(i) + .getParameters(); aggregationParameterFields[i] = new ExprNodeEvaluator[parameters.size()]; - aggregationParameterObjectInspectors[i] = new ObjectInspector[parameters.size()]; - aggregationParameterStandardObjectInspectors[i] = new ObjectInspector[parameters.size()]; + aggregationParameterObjectInspectors[i] = new ObjectInspector[parameters + .size()]; + aggregationParameterStandardObjectInspectors[i] = new ObjectInspector[parameters + .size()]; aggregationParameterObjects[i] = new Object[parameters.size()]; for (int j = 0; j < parameters.size(); j++) { - aggregationParameterFields[i][j] = ExprNodeEvaluatorFactory.get(parameters.get(j)); - aggregationParameterObjectInspectors[i][j] = aggregationParameterFields[i][j].initialize(rowInspector); - aggregationParameterStandardObjectInspectors[i][j] = - ObjectInspectorUtils.getStandardObjectInspector(aggregationParameterObjectInspectors[i][j], + aggregationParameterFields[i][j] = ExprNodeEvaluatorFactory + .get(parameters.get(j)); + aggregationParameterObjectInspectors[i][j] = aggregationParameterFields[i][j] + .initialize(rowInspector); + aggregationParameterStandardObjectInspectors[i][j] = ObjectInspectorUtils + .getStandardObjectInspector( + aggregationParameterObjectInspectors[i][j], ObjectInspectorCopyOption.WRITABLE); aggregationParameterObjects[i][j] = null; } } // init aggregationIsDistinct aggregationIsDistinct = new boolean[conf.getAggregators().size()]; - for(int i=0; i(totalFields); - for(int i=0; i(); aggrPositions = new ArrayList(); - groupbyMapAggrInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL); + groupbyMapAggrInterval = HiveConf.getIntVar(hconf, + HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL); // compare every groupbyMapAggrInterval rows numRowsCompareHashAggr = groupbyMapAggrInterval; - minReductionHashAggr = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION); + minReductionHashAggr = HiveConf.getFloatVar(hconf, + HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION); groupKeyIsNotReduceKey = conf.getGroupKeyNotReductionKey(); - if (groupKeyIsNotReduceKey) + if (groupKeyIsNotReduceKey) { keysCurrentGroup = new HashSet>(); + } } fieldNames = conf.getOutputColumnNames(); for (int i = 0; i < keyFields.length; i++) { - objectInspectors.set(i, currentKeyObjectInspectors[i]); + objectInspectors.set(i, currentKeyObjectInspectors[i]); } - + // Generate key names ArrayList keyNames = new ArrayList(keyFields.length); for (int i = 0; i < keyFields.length; i++) { keyNames.add(fieldNames.get(i)); } - newKeyObjectInspector = - ObjectInspectorFactory.getStandardStructObjectInspector(keyNames, Arrays.asList(keyObjectInspectors)); - currentKeyObjectInspector = - ObjectInspectorFactory.getStandardStructObjectInspector(keyNames, Arrays.asList(currentKeyObjectInspectors)); - - outputObjInspector = - ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, objectInspectors); + newKeyObjectInspector = ObjectInspectorFactory + .getStandardStructObjectInspector(keyNames, Arrays + .asList(keyObjectInspectors)); + currentKeyObjectInspector = ObjectInspectorFactory + .getStandardStructObjectInspector(keyNames, Arrays + .asList(currentKeyObjectInspectors)); + outputObjInspector = ObjectInspectorFactory + .getStandardStructObjectInspector(fieldNames, objectInspectors); + firstRow = true; - // estimate the number of hash table entries based on the size of each entry. Since the size of a entry + // estimate the number of hash table entries based on the size of each + // entry. Since the size of a entry // is not known, estimate that based on the number of entries - if (hashAggr) + if (hashAggr) { computeMaxEntriesHashAggr(hconf); + } initializeChildren(hconf); } /** - * Estimate the number of entries in map-side hash table. - * The user can specify the total amount of memory to be used by the map-side hash. By default, all available - * memory is used. The size of each row is estimated, rather crudely, and the number of entries are figure out - * based on that. - * @return number of entries that can fit in hash table - useful for map-side aggregation only + * Estimate the number of entries in map-side hash table. The user can specify + * the total amount of memory to be used by the map-side hash. By default, all + * available memory is used. The size of each row is estimated, rather + * crudely, and the number of entries are figure out based on that. + * + * @return number of entries that can fit in hash table - useful for map-side + * aggregation only **/ - private void computeMaxEntriesHashAggr(Configuration hconf) throws HiveException { - maxHashTblMemory = (long)(HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY) * Runtime.getRuntime().maxMemory()); + private void computeMaxEntriesHashAggr(Configuration hconf) + throws HiveException { + maxHashTblMemory = (long) (HiveConf.getFloatVar(hconf, + HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY) * Runtime.getRuntime() + .maxMemory()); estimateRowSize(); } - private static final int javaObjectOverHead = 64; + private static final int javaObjectOverHead = 64; private static final int javaHashEntryOverHead = 64; private static final int javaSizePrimitiveType = 16; - private static final int javaSizeUnknownType = 256; + private static final int javaSizeUnknownType = 256; /** - * The size of the element at position 'pos' is returned, if possible. - * If the datatype is of variable length, STRING, a list of such key positions is maintained, and the size for such positions is - * then actually calculated at runtime. - * @param pos the position of the key - * @param c the type of the key + * The size of the element at position 'pos' is returned, if possible. If the + * datatype is of variable length, STRING, a list of such key positions is + * maintained, and the size for such positions is then actually calculated at + * runtime. + * + * @param pos + * the position of the key + * @param c + * the type of the key * @return the size of this datatype **/ private int getSize(int pos, PrimitiveCategory category) { - switch(category) { - case VOID: - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - case FLOAT: - case DOUBLE: { - return javaSizePrimitiveType; - } - case STRING: { - keyPositionsSize.add(new Integer(pos)); - return javaObjectOverHead; - } - default: { - return javaSizeUnknownType; - } + switch (category) { + case VOID: + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + case FLOAT: + case DOUBLE: { + return javaSizePrimitiveType; } + case STRING: { + keyPositionsSize.add(new Integer(pos)); + return javaObjectOverHead; + } + default: { + return javaSizeUnknownType; + } + } } /** - * The size of the element at position 'pos' is returned, if possible. - * If the field is of variable length, STRING, a list of such field names for the field position is maintained, and the size - * for such positions is then actually calculated at runtime. - * @param pos the position of the key - * @param c the type of the key - * @param f the field to be added + * The size of the element at position 'pos' is returned, if possible. If the + * field is of variable length, STRING, a list of such field names for the + * field position is maintained, and the size for such positions is then + * actually calculated at runtime. + * + * @param pos + * the position of the key + * @param c + * the type of the key + * @param f + * the field to be added * @return the size of this datatype **/ private int getSize(int pos, Class c, Field f) { - if (c.isPrimitive() || - c.isInstance(new Boolean(true)) || - c.isInstance(new Byte((byte)0)) || - c.isInstance(new Short((short)0)) || - c.isInstance(new Integer(0)) || - c.isInstance(new Long(0)) || - c.isInstance(new Float(0)) || - c.isInstance(new Double(0))) + if (c.isPrimitive() || c.isInstance(new Boolean(true)) + || c.isInstance(new Byte((byte) 0)) + || c.isInstance(new Short((short) 0)) || c.isInstance(new Integer(0)) + || c.isInstance(new Long(0)) || c.isInstance(new Float(0)) + || c.isInstance(new Double(0))) { return javaSizePrimitiveType; + } if (c.isInstance(new String())) { int idx = 0; varLenFields v = null; for (idx = 0; idx < aggrPositions.size(); idx++) { v = aggrPositions.get(idx); - if (v.getAggrPos() == pos) + if (v.getAggrPos() == pos) { break; + } } if (idx == aggrPositions.size()) { @@ -343,18 +383,21 @@ v.getFields().add(f); return javaObjectOverHead; } - + return javaSizeUnknownType; } /** - * @param pos position of the key - * @param typeinfo type of the input + * @param pos + * position of the key + * @param typeinfo + * type of the input * @return the size of this datatype **/ private int getSize(int pos, TypeInfo typeInfo) { - if (typeInfo instanceof PrimitiveTypeInfo) - return getSize(pos, ((PrimitiveTypeInfo)typeInfo).getPrimitiveCategory()); + if (typeInfo instanceof PrimitiveTypeInfo) { + return getSize(pos, ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()); + } return javaSizeUnknownType; } @@ -362,23 +405,28 @@ * @return the size of each row **/ private void estimateRowSize() throws HiveException { - // estimate the size of each entry - - // a datatype with unknown size (String/Struct etc. - is assumed to be 256 bytes for now). + // estimate the size of each entry - + // a datatype with unknown size (String/Struct etc. - is assumed to be 256 + // bytes for now). // 64 bytes is the overhead for a reference fixedRowSize = javaHashEntryOverHead; ArrayList keys = conf.getKeys(); - // Go over all the keys and get the size of the fields of fixed length. Keep track of the variable length keys - for (int pos = 0; pos < keys.size(); pos++) + // Go over all the keys and get the size of the fields of fixed length. Keep + // track of the variable length keys + for (int pos = 0; pos < keys.size(); pos++) { fixedRowSize += getSize(pos, keys.get(pos).getTypeInfo()); + } - // Go over all the aggregation classes and and get the size of the fields of fixed length. Keep track of the variable length + // Go over all the aggregation classes and and get the size of the fields of + // fixed length. Keep track of the variable length // fields in these aggregation classes. - for(int i=0; i < aggregationEvaluators.length; i++) { + for (int i = 0; i < aggregationEvaluators.length; i++) { fixedRowSize += javaObjectOverHead; - Class agg = aggregationEvaluators[i].getNewAggregationBuffer().getClass(); + Class agg = aggregationEvaluators[i] + .getNewAggregationBuffer().getClass(); Field[] fArr = ObjectInspectorUtils.getDeclaredNonStaticFields(agg); for (Field f : fArr) { fixedRowSize += getSize(i, f.getType(), f); @@ -386,43 +434,50 @@ } } - protected AggregationBuffer[] newAggregations() throws HiveException { + protected AggregationBuffer[] newAggregations() throws HiveException { AggregationBuffer[] aggs = new AggregationBuffer[aggregationEvaluators.length]; - for(int i=0; i numRowsInput * minReductionHashAggr) { - LOG.warn("Disable Hash Aggr: #hash table = " + numRowsHashTbl + " #total = " + numRowsInput - + " reduction = " + 1.0*(numRowsHashTbl/numRowsInput) + " minReduction = " + minReductionHashAggr); + LOG.warn("Disable Hash Aggr: #hash table = " + numRowsHashTbl + + " #total = " + numRowsInput + " reduction = " + 1.0 + * (numRowsHashTbl / numRowsInput) + " minReduction = " + + minReductionHashAggr); flush(true); hashAggr = false; + } else { + LOG.trace("Hash Aggr Enabled: #hash table = " + numRowsHashTbl + + " #total = " + numRowsInput + " reduction = " + 1.0 + * (numRowsHashTbl / numRowsInput) + " minReduction = " + + minReductionHashAggr); } - else { - LOG.trace("Hash Aggr Enabled: #hash table = " + numRowsHashTbl + " #total = " + numRowsInput - + " reduction = " + 1.0*(numRowsHashTbl/numRowsInput) + " minReduction = " + minReductionHashAggr); - } } } @@ -496,10 +558,11 @@ newKeys.add(keyObjects[i]); } - if (hashAggr) + if (hashAggr) { processHashAggr(row, rowInspector, newKeys); - else + } else { processAggr(row, rowInspector, newKeys); + } firstRowInGroup = false; } catch (HiveException e) { @@ -509,167 +572,197 @@ } } - private static ArrayList deepCopyElements(Object[] keys, ObjectInspector[] keyObjectInspectors, + private static ArrayList deepCopyElements(Object[] keys, + ObjectInspector[] keyObjectInspectors, ObjectInspectorCopyOption copyOption) { ArrayList result = new ArrayList(keys.length); deepCopyElements(keys, keyObjectInspectors, result, copyOption); return result; } - - private static void deepCopyElements(Object[] keys, ObjectInspector[] keyObjectInspectors, ArrayList result, + + private static void deepCopyElements(Object[] keys, + ObjectInspector[] keyObjectInspectors, ArrayList result, ObjectInspectorCopyOption copyOption) { result.clear(); - for (int i=0; i keys; // decide whether this is already in hashmap (keys in hashmap are deepcopied // version, and we need to use 'currentKeyObjectInspector'). - boolean copy = false; - - KeyWrapper() {} - + boolean copy = false; + + KeyWrapper() { + } + public KeyWrapper(int hashcode, ArrayList copiedKeys) { this(hashcode, copiedKeys, false); } - - public KeyWrapper(int hashcode, ArrayList copiedKeys, boolean inHashMap) { + + public KeyWrapper(int hashcode, ArrayList copiedKeys, + boolean inHashMap) { super(); this.hashcode = hashcode; - this.keys = copiedKeys; - this.copy = inHashMap; + keys = copiedKeys; + copy = inHashMap; } - - public int hashCode(){ + + @Override + public int hashCode() { return hashcode; } - + + @Override public boolean equals(Object obj) { ArrayList copied_in_hashmap = ((KeyWrapper) obj).keys; - if(!copy) - return ObjectInspectorUtils.compare(copied_in_hashmap, currentKeyObjectInspector, keys, newKeyObjectInspector) == 0; - else - return ObjectInspectorUtils.compare(copied_in_hashmap, currentKeyObjectInspector, keys, currentKeyObjectInspector) == 0; + if (!copy) { + return ObjectInspectorUtils.compare(copied_in_hashmap, + currentKeyObjectInspector, keys, newKeyObjectInspector) == 0; + } else { + return ObjectInspectorUtils.compare(copied_in_hashmap, + currentKeyObjectInspector, keys, currentKeyObjectInspector) == 0; + } } } - + KeyWrapper keyProber = new KeyWrapper(); - private void processHashAggr(Object row, ObjectInspector rowInspector, ArrayList newKeys) throws HiveException { + + private void processHashAggr(Object row, ObjectInspector rowInspector, + ArrayList newKeys) throws HiveException { // Prepare aggs for updating AggregationBuffer[] aggs = null; boolean newEntryForHashAggr = false; - + keyProber.hashcode = newKeys.hashCode(); - //use this to probe the hashmap + // use this to probe the hashmap keyProber.keys = newKeys; - + // hash-based aggregations aggs = hashAggregations.get(keyProber); ArrayList newDefaultKeys = null; - if(aggs == null) { - newDefaultKeys = deepCopyElements(keyObjects, keyObjectInspectors, ObjectInspectorCopyOption.WRITABLE); - KeyWrapper newKeyProber = new KeyWrapper(keyProber.hashcode, newDefaultKeys, true); + if (aggs == null) { + newDefaultKeys = deepCopyElements(keyObjects, keyObjectInspectors, + ObjectInspectorCopyOption.WRITABLE); + KeyWrapper newKeyProber = new KeyWrapper(keyProber.hashcode, + newDefaultKeys, true); aggs = newAggregations(); hashAggregations.put(newKeyProber, aggs); newEntryForHashAggr = true; - numRowsHashTbl++; // new entry in the hash table + numRowsHashTbl++; // new entry in the hash table } - - // If the grouping key and the reduction key are different, a set of grouping keys for the current reduction key are maintained in keysCurrentGroup - // Peek into the set to find out if a new grouping key is seen for the given reduction key + + // If the grouping key and the reduction key are different, a set of + // grouping keys for the current reduction key are maintained in + // keysCurrentGroup + // Peek into the set to find out if a new grouping key is seen for the given + // reduction key if (groupKeyIsNotReduceKey) { - if(newDefaultKeys == null) - newDefaultKeys = deepCopyElements(keyObjects, keyObjectInspectors, ObjectInspectorCopyOption.WRITABLE); + if (newDefaultKeys == null) { + newDefaultKeys = deepCopyElements(keyObjects, keyObjectInspectors, + ObjectInspectorCopyOption.WRITABLE); + } newEntryForHashAggr = keysCurrentGroup.add(newDefaultKeys); } // Update the aggs updateAggregations(aggs, row, rowInspector, true, newEntryForHashAggr, null); - // We can only flush after the updateAggregations is done, or the potentially new entry "aggs" + // We can only flush after the updateAggregations is done, or the + // potentially new entry "aggs" // can be flushed out of the hash table. - - // Based on user-specified parameters, check if the hash table needs to be flushed. - // If the grouping key is not the same as reduction key, flushing can only happen at boundaries - if ((!groupKeyIsNotReduceKey || firstRowInGroup) && shouldBeFlushed(newKeys)) { + + // Based on user-specified parameters, check if the hash table needs to be + // flushed. + // If the grouping key is not the same as reduction key, flushing can only + // happen at boundaries + if ((!groupKeyIsNotReduceKey || firstRowInGroup) + && shouldBeFlushed(newKeys)) { flush(false); } } // Non-hash aggregation - private void processAggr(Object row, ObjectInspector rowInspector, ArrayList newKeys) throws HiveException { + private void processAggr(Object row, ObjectInspector rowInspector, + ArrayList newKeys) throws HiveException { // Prepare aggs for updating AggregationBuffer[] aggs = null; Object[][] lastInvoke = null; - boolean keysAreEqual = ObjectInspectorUtils.compare( - newKeys, newKeyObjectInspector, - currentKeys, currentKeyObjectInspector) == 0; - + boolean keysAreEqual = ObjectInspectorUtils.compare(newKeys, + newKeyObjectInspector, currentKeys, currentKeyObjectInspector) == 0; + // Forward the current keys if needed for sort-based aggregation - if (currentKeys != null && !keysAreEqual) + if (currentKeys != null && !keysAreEqual) { forward(currentKeys, aggregations); - + } + // Need to update the keys? if (currentKeys == null || !keysAreEqual) { if (currentKeys == null) { currentKeys = new ArrayList(keyFields.length); } - deepCopyElements(keyObjects, keyObjectInspectors, currentKeys, ObjectInspectorCopyOption.WRITABLE); - + deepCopyElements(keyObjects, keyObjectInspectors, currentKeys, + ObjectInspectorCopyOption.WRITABLE); + // Reset the aggregations resetAggregations(aggregations); - + // clear parameters in last-invoke - for(int i=0; i newKeys) { int numEntries = hashAggregations.size(); - // The fixed size for the aggregation class is already known. Get the variable portion of the size every NUMROWSESTIMATESIZE rows. + // The fixed size for the aggregation class is already known. Get the + // variable portion of the size every NUMROWSESTIMATESIZE rows. if ((numEntriesHashTable == 0) || ((numEntries % NUMROWSESTIMATESIZE) == 0)) { for (Integer pos : keyPositionsSize) { Object key = newKeys.get(pos.intValue()); // Ignore nulls if (key != null) { if (key instanceof String) { - totalVariableSize += ((String)key).length(); + totalVariableSize += ((String) key).length(); } else if (key instanceof Text) { - totalVariableSize += ((Text)key).getLength(); + totalVariableSize += ((Text) key).getLength(); } } } AggregationBuffer[] aggs = null; - if (aggrPositions.size() > 0) + if (aggrPositions.size() > 0) { aggs = hashAggregations.get(newKeys); + } for (varLenFields v : aggrPositions) { - int aggrPos = v.getAggrPos(); + int aggrPos = v.getAggrPos(); List fieldsVarLen = v.getFields(); - AggregationBuffer agg = aggs[aggrPos]; + AggregationBuffer agg = aggs[aggrPos]; - try - { - for (Field f : fieldsVarLen) - totalVariableSize += ((String)f.get(agg)).length(); + try { + for (Field f : fieldsVarLen) { + totalVariableSize += ((String) f.get(agg)).length(); + } } catch (IllegalAccessException e) { assert false; } @@ -678,24 +771,26 @@ numEntriesVarSize++; // Update the number of entries that can fit in the hash table - numEntriesHashTable = (int)(maxHashTblMemory / (fixedRowSize + ((int)totalVariableSize/numEntriesVarSize))); - LOG.trace("Hash Aggr: #hash table = " + numEntries + " #max in hash table = " + numEntriesHashTable); + numEntriesHashTable = (int) (maxHashTblMemory / (fixedRowSize + (totalVariableSize / numEntriesVarSize))); + LOG.trace("Hash Aggr: #hash table = " + numEntries + + " #max in hash table = " + numEntriesHashTable); } // flush if necessary - if (numEntries >= numEntriesHashTable) + if (numEntries >= numEntriesHashTable) { return true; + } return false; } private void flush(boolean complete) throws HiveException { - + // Currently, the algorithm flushes 10% of the entries - this can be // changed in the future if (complete) { - Iterator> - iter = hashAggregations.entrySet().iterator(); + Iterator> iter = hashAggregations + .entrySet().iterator(); while (iter.hasNext()) { Map.Entry m = iter.next(); forward(m.getKey().keys, m.getValue()); @@ -708,8 +803,8 @@ int oldSize = hashAggregations.size(); LOG.warn("Hash Tbl flush: #hash table = " + oldSize); - Iterator> - iter = hashAggregations.entrySet().iterator(); + Iterator> iter = hashAggregations + .entrySet().iterator(); int numDel = 0; while (iter.hasNext()) { Map.Entry m = iter.next(); @@ -732,24 +827,27 @@ * The keys in the record * @throws HiveException */ - protected void forward(ArrayList keys, AggregationBuffer[] aggs) throws HiveException { + protected void forward(ArrayList keys, AggregationBuffer[] aggs) + throws HiveException { int totalFields = keys.size() + aggs.length; if (forwardCache == null) { forwardCache = new Object[totalFields]; } - for(int i=0; i(0), aggregations); - } - else { + } else { if (hashAggregations != null) { - LOG.warn("Begin Hash Table flush at close: size = " + hashAggregations.size()); + LOG.warn("Begin Hash Table flush at close: size = " + + hashAggregations.size()); Iterator iter = hashAggregations.entrySet().iterator(); while (iter.hasNext()) { - Map.Entry m = (Map.Entry)iter.next(); + Map.Entry m = (Map.Entry) iter + .next(); forward(m.getKey().keys, m.getValue()); iter.remove(); } hashAggregations.clear(); - } - else if (aggregations != null) { + } else if (aggregations != null) { // sort-based aggregations if (currentKeys != null) { forward(currentKeys, aggregations); } currentKeys = null; } else { - // The GroupByOperator is not initialized, which means there is no data + // The GroupByOperator is not initialized, which means there is no + // data // (since we initialize the operators when we see the first record). // Just do nothing here. } @@ -802,17 +902,20 @@ } // Group by contains the columns needed - no need to aggregate from children - public List genColLists(HashMap, OpParseContext> opParseCtx) { + public List genColLists( + HashMap, OpParseContext> opParseCtx) { List colLists = new ArrayList(); ArrayList keys = conf.getKeys(); - for (exprNodeDesc key : keys) + for (exprNodeDesc key : keys) { colLists = Utilities.mergeUniqElems(colLists, key.getCols()); - + } + ArrayList aggrs = conf.getAggregators(); - for (aggregationDesc aggr : aggrs) { + for (aggregationDesc aggr : aggrs) { ArrayList params = aggr.getParameters(); - for (exprNodeDesc param : params) + for (exprNodeDesc param : params) { colLists = Utilities.mergeUniqElems(colLists, param.getCols()); + } } return colLists; @@ -825,7 +928,8 @@ public String getName() { return new String("GBY"); } - + + @Override public int getType() { return OperatorType.GROUPBY; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java (working copy) @@ -28,18 +28,17 @@ import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator.MapJoinObjectCtx; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.io.Writable; /** * Map Join Object used for both key and value */ public class MapJoinObjectValue implements Externalizable { - transient protected int metadataTag; + transient protected int metadataTag; transient protected RowContainer obj; transient protected Configuration conf; @@ -54,21 +53,26 @@ this.metadataTag = metadataTag; this.obj = obj; } - + + @Override public boolean equals(Object o) { if (o instanceof MapJoinObjectValue) { - MapJoinObjectValue mObj = (MapJoinObjectValue)o; + MapJoinObjectValue mObj = (MapJoinObjectValue) o; if (mObj.getMetadataTag() == metadataTag) { - if ((this.obj == null) && (mObj.getObj() == null)) + if ((obj == null) && (mObj.getObj() == null)) { return true; - if ((obj != null) && (mObj.getObj() != null) && (mObj.getObj().equals(obj))) + } + if ((obj != null) && (mObj.getObj() != null) + && (mObj.getObj().equals(obj))) { return true; + } } } return false; } + @Override public int hashCode() { return (obj == null) ? 0 : obj.hashCode(); } @@ -77,10 +81,11 @@ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { try { - metadataTag = in.readInt(); + metadataTag = in.readInt(); // get the tableDesc from the map stored in the mapjoin operator - MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(Integer.valueOf(metadataTag)); + MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get( + Integer.valueOf(metadataTag)); int sz = in.readInt(); RowContainer res = new RowContainer(ctx.getConf()); @@ -90,11 +95,10 @@ Writable val = ctx.getSerDe().getSerializedClass().newInstance(); val.readFields(in); - ArrayList memObj = (ArrayList) - ObjectInspectorUtils.copyToStandardObject( - ctx.getSerDe().deserialize(val), - ctx.getSerDe().getObjectInspector(), - ObjectInspectorCopyOption.WRITABLE); + ArrayList memObj = (ArrayList) ObjectInspectorUtils + .copyToStandardObject(ctx.getSerDe().deserialize(val), ctx + .getSerDe().getObjectInspector(), + ObjectInspectorCopyOption.WRITABLE); res.add(memObj); } @@ -111,23 +115,20 @@ out.writeInt(metadataTag); // get the tableDesc from the map stored in the mapjoin operator - MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(Integer.valueOf(metadataTag)); + MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get( + Integer.valueOf(metadataTag)); // Different processing for key and value RowContainer> v = obj; out.writeInt(v.size()); - for (ArrayList row = v.first(); - row != null; - row = v.next() ) { + for (ArrayList row = v.first(); row != null; row = v.next()) { Writable outVal = ctx.getSerDe().serialize(row, ctx.getStandardOI()); outVal.write(out); } - } - catch (SerDeException e) { + } catch (SerDeException e) { throw new IOException(e); - } - catch (HiveException e) { + } catch (HiveException e) { throw new IOException(e); } } @@ -140,7 +141,8 @@ } /** - * @param metadataTag the metadataTag to set + * @param metadataTag + * the metadataTag to set */ public void setMetadataTag(int metadataTag) { this.metadataTag = metadataTag; @@ -154,7 +156,8 @@ } /** - * @param obj the obj to set + * @param obj + * the obj to set */ public void setObj(RowContainer obj) { this.obj = obj; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java (working copy) @@ -20,169 +20,180 @@ import java.io.File; import java.util.HashMap; +import java.util.HashSet; import java.util.Properties; import java.util.Set; -import java.util.HashSet; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.util.jdbm.RecordManager; import org.apache.hadoop.hive.ql.util.jdbm.RecordManagerFactory; import org.apache.hadoop.hive.ql.util.jdbm.RecordManagerOptions; +import org.apache.hadoop.hive.ql.util.jdbm.helper.FastIterator; import org.apache.hadoop.hive.ql.util.jdbm.htree.HTree; -import org.apache.hadoop.hive.ql.util.jdbm.helper.FastIterator; -import org.apache.hadoop.hive.ql.exec.persistence.MRU; -import org.apache.hadoop.hive.ql.exec.persistence.DCLLItem; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; /** - * Simple wrapper for persistent Hashmap implementing only the put/get/remove/clear interface. - * The main memory hash table acts as a cache and all put/get will operate on it first. If the - * size of the main memory hash table exceeds a certain threshold, new elements will go into - * the persistent hash table. + * Simple wrapper for persistent Hashmap implementing only the + * put/get/remove/clear interface. The main memory hash table acts as a cache + * and all put/get will operate on it first. If the size of the main memory hash + * table exceeds a certain threshold, new elements will go into the persistent + * hash table. */ -public class HashMapWrapper { - +public class HashMapWrapper { + protected Log LOG = LogFactory.getLog(this.getClass().getName()); - + // default threshold for using main memory based HashMap private static final int THRESHOLD = 25000; - - private int threshold; // threshold to put data into persistent hash table instead - private HashMap mHash; // main memory HashMap - private HTree pHash; // persistent HashMap - private RecordManager recman; // record manager required by HTree - private File tmpFile; // temp file holding the persistent data from record manager. - private MRU MRUList; // MRU cache entry - + + private int threshold; // threshold to put data into persistent hash table + // instead + private HashMap mHash; // main memory HashMap + private HTree pHash; // persistent HashMap + private RecordManager recman; // record manager required by HTree + private File tmpFile; // temp file holding the persistent data from record + // manager. + private MRU MRUList; // MRU cache entry + /** - * Doubly linked list of value items. - * Note: this is only used along with memory hash table. Persistent hash stores the value directory. + * Doubly linked list of value items. Note: this is only used along with + * memory hash table. Persistent hash stores the value directory. */ class MRUItem extends DCLLItem { K key; V value; - + MRUItem(K k, V v) { key = k; value = v; } } - + /** * Constructor. - * @param threshold User specified threshold to store new values into persistent storage. + * + * @param threshold + * User specified threshold to store new values into persistent + * storage. */ public HashMapWrapper(int threshold) { this.threshold = threshold; this.pHash = null; this.recman = null; this.tmpFile = null; - mHash = new HashMap(); + mHash = new HashMap(); MRUList = new MRU(); } - - public HashMapWrapper () { + + public HashMapWrapper() { this(THRESHOLD); } - + /** - * Get the value based on the key. We try to get it from the main memory hash table first. - * If it is not there we will look up the persistent hash table. This function also guarantees - * if any item is found given a key, it is available in main memory HashMap. So mutating the - * returned value will be reflected (saved) in HashMapWrapper. + * Get the value based on the key. We try to get it from the main memory hash + * table first. If it is not there we will look up the persistent hash table. + * This function also guarantees if any item is found given a key, it is + * available in main memory HashMap. So mutating the returned value will be + * reflected (saved) in HashMapWrapper. + * * @param key - * @return Value corresponding to the key. If the key is not found, return null. + * @return Value corresponding to the key. If the key is not found, return + * null. */ public V get(K key) throws HiveException { V value = null; - + // if not the MRU, searching the main memory hash table. MRUItem item = mHash.get(key); - if ( item != null ) { + if (item != null) { value = item.value; MRUList.moveToHead(item); - } else if ( pHash != null ) { + } else if (pHash != null) { try { value = (V) pHash.get(key); - if ( value != null ) { - if ( mHash.size() < threshold ) { + if (value != null) { + if (mHash.size() < threshold) { mHash.put(key, new MRUItem(key, value)); - pHash.remove(key); - } else if ( threshold > 0 ) { // flush the LRU to disk + pHash.remove(key); + } else if (threshold > 0) { // flush the LRU to disk MRUItem tail = MRUList.tail(); // least recently used item - pHash.put(tail.key, tail.value); - pHash.remove(key); - recman.commit(); - - // update mHash -- reuse MRUItem - item = mHash.remove(tail.key); - item.key = key; - item.value = value; - mHash.put(key, item); - + pHash.put(tail.key, tail.value); + pHash.remove(key); + recman.commit(); + + // update mHash -- reuse MRUItem + item = mHash.remove(tail.key); + item.key = key; + item.value = value; + mHash.put(key, item); + // update MRU -- reusing MRUItem - tail.key = key; - tail.value = value; - MRUList.moveToHead(tail); + tail.key = key; + tail.value = value; + MRUList.moveToHead(tail); } } - } catch ( Exception e ) { + } catch (Exception e) { LOG.warn(e.toString()); throw new HiveException(e); } - } + } return value; } - + /** - * Put the key value pair in the hash table. It will first try to - * put it into the main memory hash table. If the size exceeds the - * threshold, it will put it into the persistent hash table. + * Put the key value pair in the hash table. It will first try to put it into + * the main memory hash table. If the size exceeds the threshold, it will put + * it into the persistent hash table. + * * @param key * @param value * @throws HiveException */ - public void put(K key, V value) throws HiveException { + public void put(K key, V value) throws HiveException { int mm_size = mHash.size(); MRUItem itm = mHash.get(key); - + if (mm_size < threshold) { - if ( itm != null ) { + if (itm != null) { // re-use the MRU item -- just overwrite value, key is the same itm.value = value; - MRUList.moveToHead(itm); - if (!mHash.get(key).value.equals(value)) - LOG.error("HashMapWrapper.put() reuse MRUItem inconsistency [1]."); - assert(mHash.get(key).value.equals(value)); + MRUList.moveToHead(itm); + if (!mHash.get(key).value.equals(value)) { + LOG.error("HashMapWrapper.put() reuse MRUItem inconsistency [1]."); + } + assert (mHash.get(key).value.equals(value)); } else { // check if key already exists in pHash try { - if ( pHash != null && pHash.get(key) != null ) { + if (pHash != null && pHash.get(key) != null) { // remove the old item from pHash and insert the new one pHash.remove(key); pHash.put(key, value); recman.commit(); - return; + return; } } catch (Exception e) { e.printStackTrace(); throw new HiveException(e); } - itm = new MRUItem(key,value); + itm = new MRUItem(key, value); MRUList.put(itm); - mHash.put(key, itm); + mHash.put(key, itm); } } else { - if ( itm != null ) { // replace existing item + if (itm != null) { // replace existing item // re-use the MRU item -- just overwrite value, key is the same itm.value = value; - MRUList.moveToHead(itm); - if (!mHash.get(key).value.equals(value)) - LOG.error("HashMapWrapper.put() reuse MRUItem inconsistency [2]."); - assert(mHash.get(key).value.equals(value)); + MRUList.moveToHead(itm); + if (!mHash.get(key).value.equals(value)) { + LOG.error("HashMapWrapper.put() reuse MRUItem inconsistency [2]."); + } + assert (mHash.get(key).value.equals(value)); } else { - // for items inserted into persistent hash table, we don't put it into MRU + // for items inserted into persistent hash table, we don't put it into + // MRU if (pHash == null) { pHash = getPersistentHash(); } @@ -196,62 +207,67 @@ } } } - + /** * Get the persistent hash table. + * * @return persistent hash table * @throws HiveException */ private HTree getPersistentHash() throws HiveException { try { - // Create a temporary file for the page manager to hold persistent data. - if ( tmpFile != null ) { - tmpFile.delete(); + // Create a temporary file for the page manager to hold persistent data. + if (tmpFile != null) { + tmpFile.delete(); } tmpFile = File.createTempFile("HashMapWrapper", ".tmp", new File("/tmp")); LOG.info("HashMapWrapper created temp file " + tmpFile.getAbsolutePath()); - // Delete the temp file if the JVM terminate normally through Hadoop job kill command. + // Delete the temp file if the JVM terminate normally through Hadoop job + // kill command. // Caveat: it won't be deleted if JVM is killed by 'kill -9'. - tmpFile.deleteOnExit(); - + tmpFile.deleteOnExit(); + Properties props = new Properties(); - props.setProperty(RecordManagerOptions.CACHE_TYPE, RecordManagerOptions.NO_CACHE); - props.setProperty(RecordManagerOptions.DISABLE_TRANSACTIONS, "true" ); - - recman = RecordManagerFactory.createRecordManager(tmpFile, props ); + props.setProperty(RecordManagerOptions.CACHE_TYPE, + RecordManagerOptions.NO_CACHE); + props.setProperty(RecordManagerOptions.DISABLE_TRANSACTIONS, "true"); + + recman = RecordManagerFactory.createRecordManager(tmpFile, props); pHash = HTree.createInstance(recman); } catch (Exception e) { LOG.warn(e.toString()); throw new HiveException(e); - } + } return pHash; } - + /** - * Clean up the hash table. All elements in the main memory hash table will be removed, and - * the persistent hash table will be destroyed (temporary file will be deleted). + * Clean up the hash table. All elements in the main memory hash table will be + * removed, and the persistent hash table will be destroyed (temporary file + * will be deleted). */ public void clear() throws HiveException { - if ( mHash != null ) { + if (mHash != null) { mHash.clear(); MRUList.clear(); } close(); } - + /** - * Remove one key-value pairs from the hash table based on the given key. If the pairs are - * removed from the main memory hash table, pairs in the persistent hash table will not be - * moved to the main memory hash table. Future inserted elements will go into the main memory - * hash table though. + * Remove one key-value pairs from the hash table based on the given key. If + * the pairs are removed from the main memory hash table, pairs in the + * persistent hash table will not be moved to the main memory hash table. + * Future inserted elements will go into the main memory hash table though. + * * @param key * @throws HiveException */ public void remove(Object key) throws HiveException { MRUItem entry = mHash.remove(key); - if ( entry != null ) { + if (entry != null) { MRUList.remove(entry); - } else if ( pHash != null ) { + } else if (pHash != null) { try { pHash.remove(key); } catch (Exception e) { @@ -260,58 +276,64 @@ } } } - + /** * Get a list of all keys in the hash map. + * * @return */ public Set keySet() { HashSet ret = null; - if ( mHash != null ) { + if (mHash != null) { ret = new HashSet(); ret.addAll(mHash.keySet()); } - if ( pHash != null ) { + if (pHash != null) { try { FastIterator fitr = pHash.keys(); - if ( fitr != null ) { - K k; - while ( (k = (K) fitr.next()) != null ) - ret.add(k); - } - } catch (Exception e) { + if (fitr != null) { + K k; + while ((k = (K) fitr.next()) != null) { + ret.add(k); + } + } + } catch (Exception e) { e.printStackTrace(); - } + } } return ret; } - + /** - * Get the main memory cache capacity. - * @return the maximum number of items can be put into main memory HashMap cache. + * Get the main memory cache capacity. + * + * @return the maximum number of items can be put into main memory HashMap + * cache. */ public int cacheSize() { return threshold; } - + /** * Close the persistent hash table and clean it up. + * * @throws HiveException */ public void close() throws HiveException { - - if ( pHash != null ) { + + if (pHash != null) { try { - if ( recman != null ) + if (recman != null) { recman.close(); - } catch (Exception e) { + } + } catch (Exception e) { throw new HiveException(e); } // delete the temporary file tmpFile.delete(); tmpFile = null; - pHash = null; - recman = null; + pHash = null; + recman = null; } } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MRU.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MRU.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MRU.java (working copy) @@ -18,21 +18,20 @@ package org.apache.hadoop.hive.ql.exec.persistence; -import org.apache.hadoop.hive.ql.exec.persistence.DCLLItem; /** - * An MRU (Most Recently Used) cache implementation. - * This implementation maintains a doubly circular linked list and it can be used - * with an auxiliary data structure such as a HashMap to locate the item quickly. + * An MRU (Most Recently Used) cache implementation. This implementation + * maintains a doubly circular linked list and it can be used with an auxiliary + * data structure such as a HashMap to locate the item quickly. */ public class MRU { - - T head; // head of the linked list -- MRU; tail (head.prev) will be the LRU - + + T head; // head of the linked list -- MRU; tail (head.prev) will be the LRU + public MRU() { head = null; } - + /** * Insert a value into the MRU. It will appear as the head. */ @@ -40,16 +39,19 @@ addToHead(item); return item; } - + /** * Remove a item from the MRU list. - * @param v linked list item. - */ + * + * @param v + * linked list item. + */ public void remove(T v) { - if (v == null) + if (v == null) { return; - if ( v == head ) { - if ( head != head.getNext()) { + } + if (v == head) { + if (head != head.getNext()) { head = (T) head.getNext(); } else { head = null; @@ -57,56 +59,61 @@ } v.remove(); } - + /** * Get the most recently used. + * * @return the most recently used item. */ - public T head() { + public T head() { return head; } - + /** * Get the least recently used. + * * @return the least recently used item. */ public T tail() { return (T) head.getPrev(); } - + /** * Insert a new item as the head - * @param v the new linked list item to be added to the head. + * + * @param v + * the new linked list item to be added to the head. */ private void addToHead(T v) { - if ( head == null ) { + if (head == null) { head = v; - } else { + } else { head.insertBefore(v); head = v; } } - - + /** - * Move an existing item to the head. - * @param v the linked list item to be moved to the head. + * Move an existing item to the head. + * + * @param v + * the linked list item to be moved to the head. */ public void moveToHead(T v) { - assert(head != null); - if ( head != v ) { + assert (head != null); + if (head != v) { v.remove(); head.insertBefore(v); head = v; } } - + /** - * Clear all elements in the MRU list. - * This is not very efficient (linear) since it will call remove() to every item in the list. + * Clear all elements in the MRU list. This is not very efficient (linear) + * since it will call remove() to every item in the list. */ public void clear() { - while ( head.getNext() != head ) { + while (head.getNext() != head) { head.getNext().remove(); } head.remove(); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java (working copy) @@ -73,133 +73,143 @@ * */ public class RowContainer> { - + protected Log LOG = LogFactory.getLog(this.getClass().getName()); - + // max # of rows can be put into one block - private static final int BLOCKSIZE = 25000; - - private Row[] currentWriteBlock; // the last block that add() should append to - private Row[] currentReadBlock; // the current block where the cursor is in + private static final int BLOCKSIZE = 25000; + + private Row[] currentWriteBlock; // the last block that add() should append to + private Row[] currentReadBlock; // the current block where the cursor is in // since currentReadBlock may assigned to currentWriteBlock, we need to store // orginal read block - private Row[] firstReadBlockPointer; - private int blockSize; // number of objects in the block before it is spilled to disk - private int numFlushedBlocks; // total # of blocks - private int size; // total # of elements in the RowContainer - private File tmpFile; // temporary file holding the spilled blocks + private Row[] firstReadBlockPointer; + private int blockSize; // number of objects in the block before it is spilled + // to disk + private int numFlushedBlocks; // total # of blocks + private int size; // total # of elements in the RowContainer + private File tmpFile; // temporary file holding the spilled blocks Path tempOutPath = null; private File parentFile; - private int itrCursor; // iterator cursor in the currBlock - private int readBlockSize; //size of current read block - private int addCursor; // append cursor in the lastBlock - private SerDe serde; // serialization/deserialization for the row - private ObjectInspector standardOI; // object inspector for the row - + private int itrCursor; // iterator cursor in the currBlock + private int readBlockSize; // size of current read block + private int addCursor; // append cursor in the lastBlock + private SerDe serde; // serialization/deserialization for the row + private ObjectInspector standardOI; // object inspector for the row + private List keyObject; private tableDesc tblDesc; - - boolean firstCalled = false; //once called first, it will never be able to write again. + + boolean firstCalled = false; // once called first, it will never be able to + // write again. int acutalSplitNum = 0; int currentSplitPointer = 0; - org.apache.hadoop.mapred.RecordReader rr = null; //record reader + org.apache.hadoop.mapred.RecordReader rr = null; // record reader RecordWriter rw = null; InputFormat inputFormat = null; InputSplit[] inputSplits = null; private Row dummyRow = null; - - Writable val = null; //cached to use serialize data - + + Writable val = null; // cached to use serialize data + JobConf jobCloneUsingLocalFs = null; private LocalFileSystem localFs; public RowContainer(Configuration jc) throws HiveException { this(BLOCKSIZE, jc); } - + public RowContainer(int blockSize, Configuration jc) throws HiveException { // no 0-sized block this.blockSize = blockSize == 0 ? BLOCKSIZE : blockSize; - this.size = 0; + this.size = 0; this.itrCursor = 0; this.addCursor = 0; this.numFlushedBlocks = 0; - this.tmpFile = null; + this.tmpFile = null; this.currentWriteBlock = (Row[]) new ArrayList[blockSize]; this.currentReadBlock = this.currentWriteBlock; this.firstReadBlockPointer = currentReadBlock; - this.serde = null; - this.standardOI= null; + this.serde = null; + this.standardOI = null; try { this.localFs = FileSystem.getLocal(jc); } catch (IOException e) { - throw new HiveException(e); + throw new HiveException(e); } this.jobCloneUsingLocalFs = new JobConf(jc); - HiveConf.setVar(jobCloneUsingLocalFs, HiveConf.ConfVars.HADOOPFS, Utilities.HADOOP_LOCAL_FS); + HiveConf.setVar(jobCloneUsingLocalFs, HiveConf.ConfVars.HADOOPFS, + Utilities.HADOOP_LOCAL_FS); } - - public RowContainer(int blockSize, SerDe sd, ObjectInspector oi, Configuration jc) throws HiveException { + + public RowContainer(int blockSize, SerDe sd, ObjectInspector oi, + Configuration jc) throws HiveException { this(blockSize, jc); setSerDe(sd, oi); } - + public void setSerDe(SerDe sd, ObjectInspector oi) { this.serde = sd; this.standardOI = oi; } - + public void add(Row t) throws HiveException { - if(this.tblDesc != null) { - if ( addCursor >= blockSize ) { // spill the current block to tmp file + if (this.tblDesc != null) { + if (addCursor >= blockSize) { // spill the current block to tmp file spillBlock(currentWriteBlock, addCursor); addCursor = 0; - if ( numFlushedBlocks == 1 ) + if (numFlushedBlocks == 1) { currentWriteBlock = (Row[]) new ArrayList[blockSize]; - } + } + } currentWriteBlock[addCursor++] = t; - } else if(t != null) { + } else if (t != null) { // the tableDesc will be null in the case that all columns in that table // is not used. we use a dummy row to denote all rows in that table, and - // the dummy row is added by caller. + // the dummy row is added by caller. this.dummyRow = t; } ++size; } - + public Row first() throws HiveException { - if ( size == 0 ) + if (size == 0) { return null; - + } + try { firstCalled = true; // when we reach here, we must have some data already (because size >0). - // We need to see if there are any data flushed into file system. If not, we can + // We need to see if there are any data flushed into file system. If not, + // we can // directly read from the current write block. Otherwise, we need to read // from the beginning of the underlying file. this.itrCursor = 0; closeWriter(); closeReader(); - - if(tblDesc == null) { - this.itrCursor ++; + + if (tblDesc == null) { + this.itrCursor++; return dummyRow; } - + this.currentReadBlock = this.firstReadBlockPointer; if (this.numFlushedBlocks == 0) { this.readBlockSize = this.addCursor; this.currentReadBlock = this.currentWriteBlock; } else { if (inputSplits == null) { - if (this.inputFormat == null) - inputFormat = (InputFormat) ReflectionUtils.newInstance(tblDesc.getInputFileFormatClass(), + if (this.inputFormat == null) { + inputFormat = (InputFormat) ReflectionUtils + .newInstance(tblDesc.getInputFileFormatClass(), jobCloneUsingLocalFs); + } HiveConf.setVar(jobCloneUsingLocalFs, HiveConf.ConfVars.HADOOPMAPREDINPUTDIR, - org.apache.hadoop.util.StringUtils.escapeString(parentFile.getAbsolutePath())); + org.apache.hadoop.util.StringUtils.escapeString(parentFile + .getAbsolutePath())); inputSplits = inputFormat.getSplits(jobCloneUsingLocalFs, 1); acutalSplitNum = inputSplits.length; } @@ -217,34 +227,35 @@ } catch (Exception e) { throw new HiveException(e); } - + } public Row next() throws HiveException { - - if(!firstCalled) + + if (!firstCalled) { throw new RuntimeException("Call first() then call next()."); - - if ( size == 0 ) + } + + if (size == 0) { return null; - - if(tblDesc == null) { - if(this.itrCursor < size) { + } + + if (tblDesc == null) { + if (this.itrCursor < size) { this.itrCursor++; return dummyRow; } - return null; + return null; } - + Row ret; - if(itrCursor < this.readBlockSize) { + if (itrCursor < this.readBlockSize) { ret = this.currentReadBlock[itrCursor++]; removeKeys(ret); return ret; - } - else { + } else { nextBlock(); - if ( this.readBlockSize == 0) { + if (this.readBlockSize == 0) { if (currentWriteBlock != null && currentReadBlock != currentWriteBlock) { this.itrCursor = 0; this.readBlockSize = this.addCursor; @@ -262,51 +273,58 @@ if (this.keyObject != null && this.currentReadBlock != this.currentWriteBlock) { int len = this.keyObject.size(); - int rowSize = ((ArrayList)ret).size(); - for(int i=0;i row = new ArrayList(2); + private void spillBlock(Row[] block, int length) throws HiveException { try { - if ( tmpFile == null ) { + if (tmpFile == null) { String suffix = ".tmp"; - if(this.keyObject != null) + if (this.keyObject != null) { suffix = "." + this.keyObject.toString() + suffix; - - while(true) { + } + + while (true) { String parentId = "hive-rowcontainer" + Utilities.randGen.nextInt(); - parentFile = new File("/tmp/"+ parentId); + parentFile = new File("/tmp/" + parentId); boolean success = parentFile.mkdir(); - if(success) + if (success) { break; + } LOG.debug("retry creating tmp row-container directory..."); } - + tmpFile = File.createTempFile("RowContainer", suffix, parentFile); LOG.info("RowContainer created temp file " + tmpFile.getAbsolutePath()); - // Delete the temp file if the JVM terminate normally through Hadoop job kill command. + // Delete the temp file if the JVM terminate normally through Hadoop job + // kill command. // Caveat: it won't be deleted if JVM is killed by 'kill -9'. parentFile.deleteOnExit(); - tmpFile.deleteOnExit(); - + tmpFile.deleteOnExit(); + // rFile = new RandomAccessFile(tmpFile, "rw"); - HiveOutputFormat hiveOutputFormat = tblDesc.getOutputFileFormatClass().newInstance(); + HiveOutputFormat hiveOutputFormat = tblDesc + .getOutputFileFormatClass().newInstance(); tempOutPath = new Path(tmpFile.toString()); - rw = HiveFileFormatUtils.getRecordWriter(this.jobCloneUsingLocalFs, hiveOutputFormat, - serde.getSerializedClass(), false, tblDesc.getProperties(), tempOutPath); + rw = HiveFileFormatUtils.getRecordWriter(this.jobCloneUsingLocalFs, + hiveOutputFormat, serde.getSerializedClass(), false, tblDesc + .getProperties(), tempOutPath); } else if (rw == null) { - throw new HiveException("RowContainer has already been closed for writing."); + throw new HiveException( + "RowContainer has already been closed for writing."); } - + row.clear(); row.add(null); row.add(null); - + if (this.keyObject != null) { row.set(1, this.keyObject); for (int i = 0; i < length; ++i) { @@ -315,18 +333,19 @@ Writable outVal = serde.serialize(row, standardOI); rw.write(outVal); } - }else { - for ( int i = 0; i < length; ++i ) { + } else { + for (int i = 0; i < length; ++i) { Row currentValRow = block[i]; Writable outVal = serde.serialize(currentValRow, standardOI); rw.write(outVal); } } - - if(block == this.currentWriteBlock) + + if (block == this.currentWriteBlock) { this.addCursor = 0; - - this.numFlushedBlocks ++; + } + + this.numFlushedBlocks++; } catch (Exception e) { clear(); LOG.error(e.toString(), e); @@ -336,6 +355,7 @@ /** * Get the number of elements in the RowContainer. + * * @return number of elements in the RowContainer */ public int size() { @@ -345,57 +365,64 @@ private boolean nextBlock() throws HiveException { itrCursor = 0; this.readBlockSize = 0; - if (this.numFlushedBlocks == 0) return false; - + if (this.numFlushedBlocks == 0) { + return false; + } + try { - if(val == null) + if (val == null) { val = serde.getSerializedClass().newInstance(); + } boolean nextSplit = true; int i = 0; - - if(rr != null) { + + if (rr != null) { Object key = rr.createKey(); while (i < this.currentReadBlock.length && rr.next(key, val)) { nextSplit = false; - this.currentReadBlock[i++] = (Row) ObjectInspectorUtils.copyToStandardObject( - serde.deserialize(val), - serde.getObjectInspector(), - ObjectInspectorCopyOption.WRITABLE); + this.currentReadBlock[i++] = (Row) ObjectInspectorUtils + .copyToStandardObject(serde.deserialize(val), serde + .getObjectInspector(), ObjectInspectorCopyOption.WRITABLE); } } - + if (nextSplit && this.currentSplitPointer < this.acutalSplitNum) { - //open record reader to read next split - rr = inputFormat.getRecordReader(inputSplits[currentSplitPointer], jobCloneUsingLocalFs, - Reporter.NULL); + // open record reader to read next split + rr = inputFormat.getRecordReader(inputSplits[currentSplitPointer], + jobCloneUsingLocalFs, Reporter.NULL); currentSplitPointer++; return nextBlock(); } - + this.readBlockSize = i; return this.readBlockSize > 0; } catch (Exception e) { - LOG.error(e.getMessage(),e); + LOG.error(e.getMessage(), e); try { this.clear(); } catch (HiveException e1) { - LOG.error(e.getMessage(),e); + LOG.error(e.getMessage(), e); } throw new HiveException(e); } } - public void copyToDFSDirecory(FileSystem destFs, Path destPath) throws IOException, HiveException { - if (addCursor > 0) + public void copyToDFSDirecory(FileSystem destFs, Path destPath) + throws IOException, HiveException { + if (addCursor > 0) { this.spillBlock(this.currentWriteBlock, addCursor); - if(tempOutPath == null || tempOutPath.toString().trim().equals("")) + } + if (tempOutPath == null || tempOutPath.toString().trim().equals("")) { return; + } this.closeWriter(); - LOG.info("RowContainer copied temp file " + tmpFile.getAbsolutePath()+ " to dfs directory " + destPath.toString()); - destFs.copyFromLocalFile(true,tempOutPath, new Path(destPath, new Path(tempOutPath.getName()))); + LOG.info("RowContainer copied temp file " + tmpFile.getAbsolutePath() + + " to dfs directory " + destPath.toString()); + destFs.copyFromLocalFile(true, tempOutPath, new Path(destPath, new Path( + tempOutPath.getName()))); clear(); } - + /** * Remove all elements in the RowContainer. */ @@ -410,13 +437,15 @@ this.inputSplits = null; tempOutPath = null; addCursor = 0; - + size = 0; try { - if (rw != null) + if (rw != null) { rw.close(false); - if (rr != null) + } + if (rr != null) { rr.close(); + } } catch (Exception e) { LOG.error(e.toString()); throw new HiveException(e); @@ -430,38 +459,41 @@ } private void deleteLocalFile(File file, boolean recursive) { - try{ + try { if (file != null) { - if(!file.exists()) + if (!file.exists()) { return; - if(file.isDirectory() && recursive) { + } + if (file.isDirectory() && recursive) { File[] files = file.listFiles(); - for (int i = 0; i < files.length; i++) - deleteLocalFile(files[i], true); + for (File file2 : files) { + deleteLocalFile(file2, true); + } } boolean deleteSuccess = file.delete(); - if(!deleteSuccess) + if (!deleteSuccess) { LOG.error("Error deleting tmp file:" + file.getAbsolutePath()); + } } } catch (Exception e) { LOG.error("Error deleting tmp file:" + file.getAbsolutePath(), e); } } - + private void closeWriter() throws IOException { if (this.rw != null) { this.rw.close(false); this.rw = null; } } - + private void closeReader() throws IOException { if (this.rr != null) { this.rr.close(); this.rr = null; } } - + public void setKeyObject(List dummyKey) { this.keyObject = dummyKey; } @@ -469,5 +501,5 @@ public void setTableDesc(tableDesc tblDesc) { this.tblDesc = tblDesc; } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/DCLLItem.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/DCLLItem.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/DCLLItem.java (working copy) @@ -19,77 +19,87 @@ package org.apache.hadoop.hive.ql.exec.persistence; /** - * Doubly circular linked list item. + * Doubly circular linked list item. */ public class DCLLItem { - + DCLLItem prev; DCLLItem next; - + DCLLItem() { prev = next = this; } - + /** * Get the next item. + * * @return the next item. */ - public DCLLItem getNext() { - return next; + public DCLLItem getNext() { + return next; } - + /** * Get the previous item. + * * @return the previous item. */ - public DCLLItem getPrev() { - return prev; + public DCLLItem getPrev() { + return prev; } - + /** * Set the next item as itm. - * @param itm the item to be set as next. + * + * @param itm + * the item to be set as next. */ - public void setNext(DCLLItem itm) { - next = itm; + public void setNext(DCLLItem itm) { + next = itm; } - + /** * Set the previous item as itm - * @param itm the item to be set as previous. + * + * @param itm + * the item to be set as previous. */ - public void setPrev(DCLLItem itm) { - prev = itm; + public void setPrev(DCLLItem itm) { + prev = itm; } - + /** * Remove the current item from the doubly circular linked list. */ public void remove() { - next.prev = this.prev; - prev.next = this.next; - this.prev = this.next = null; + next.prev = prev; + prev.next = next; + prev = next = null; } - + /** * Add v as the previous of the current list item. - * @param v inserted item. + * + * @param v + * inserted item. */ public void insertBefore(DCLLItem v) { - this.prev.next = v; - v.prev = this.prev; + prev.next = v; + v.prev = prev; v.next = this; - this.prev = v; + prev = v; } - + /** * Add v as the previous of the current list item. - * @param v inserted item. + * + * @param v + * inserted item. */ public void insertAfter(DCLLItem v) { - this.next.prev = v; - v.next = this.next; + next.prev = v; + v.next = next; v.prev = this; - this.next = v; + next = v; } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java (working copy) @@ -20,7 +20,6 @@ import java.io.Externalizable; import java.io.IOException; -import java.lang.Exception; import java.io.ObjectInput; import java.io.ObjectOutput; import java.util.ArrayList; @@ -30,7 +29,6 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; -import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; /** @@ -38,8 +36,8 @@ */ public class MapJoinObjectKey implements Externalizable { - transient protected int metadataTag; - transient protected ArrayList obj; + transient protected int metadataTag; + transient protected ArrayList obj; public MapJoinObjectKey() { } @@ -53,20 +51,25 @@ this.obj = obj; } + @Override public boolean equals(Object o) { if (o instanceof MapJoinObjectKey) { - MapJoinObjectKey mObj = (MapJoinObjectKey)o; + MapJoinObjectKey mObj = (MapJoinObjectKey) o; if (mObj.getMetadataTag() == metadataTag) { - if ((obj == null) && (mObj.getObj() == null)) + if ((obj == null) && (mObj.getObj() == null)) { return true; - if ((obj != null) && (mObj.getObj() != null) && (mObj.getObj().equals(obj))) + } + if ((obj != null) && (mObj.getObj() != null) + && (mObj.getObj().equals(obj))) { return true; + } } } return false; } + @Override public int hashCode() { return (obj == null) ? metadataTag : obj.hashCode(); } @@ -78,16 +81,14 @@ metadataTag = in.readInt(); // get the tableDesc from the map stored in the mapjoin operator - MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(Integer.valueOf(metadataTag)); + MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get( + Integer.valueOf(metadataTag)); Writable val = ctx.getSerDe().getSerializedClass().newInstance(); val.readFields(in); - obj = - (ArrayList) - ObjectInspectorUtils.copyToStandardObject( - ctx.getSerDe().deserialize(val), - ctx.getSerDe().getObjectInspector(), - ObjectInspectorCopyOption.WRITABLE); + obj = (ArrayList) ObjectInspectorUtils.copyToStandardObject(ctx + .getSerDe().deserialize(val), ctx.getSerDe().getObjectInspector(), + ObjectInspectorCopyOption.WRITABLE); } catch (Exception e) { throw new IOException(e); } @@ -100,13 +101,13 @@ out.writeInt(metadataTag); // get the tableDesc from the map stored in the mapjoin operator - MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(Integer.valueOf(metadataTag)); + MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get( + Integer.valueOf(metadataTag)); // Different processing for key and value Writable outVal = ctx.getSerDe().serialize(obj, ctx.getStandardOI()); outVal.write(out); - } - catch (SerDeException e) { + } catch (SerDeException e) { throw new IOException(e); } } @@ -119,7 +120,8 @@ } /** - * @param metadataTag the metadataTag to set + * @param metadataTag + * the metadataTag to set */ public void setMetadataTag(int metadataTag) { this.metadataTag = metadataTag; @@ -133,7 +135,8 @@ } /** - * @param obj the obj to set + * @param obj + * the obj to set */ public void setObj(ArrayList obj) { this.obj = obj; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (working copy) @@ -30,29 +30,28 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.fetchWork; import org.apache.hadoop.hive.ql.plan.partitionDesc; import org.apache.hadoop.hive.ql.plan.tableDesc; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.ReflectionUtils; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; /** * FetchTask implementation @@ -68,7 +67,7 @@ this.work = work; this.job = job; - + currRecReader = null; currPath = null; currTbl = null; @@ -79,12 +78,12 @@ rowWithPart = new Object[2]; } - private fetchWork work; + private final fetchWork work; private int splitNum; private RecordReader currRecReader; private InputSplit[] inputSplits; private InputFormat inputFormat; - private JobConf job; + private final JobConf job; private WritableComparable key; private Writable value; private Deserializer serde; @@ -95,7 +94,7 @@ private tableDesc currTbl; private boolean tblDataDone; private StructObjectInspector rowObjectInspector; - private Object[] rowWithPart; + private final Object[] rowWithPart; /** * A cache of InputFormat instances. @@ -111,7 +110,7 @@ inputFormats.put(inputFormatClass, newInstance); } catch (Exception e) { throw new IOException("Cannot create an instance of InputFormat class " - + inputFormatClass.getName() + " as specified in mapredWork!"); + + inputFormatClass.getName() + " as specified in mapredWork!"); } } return inputFormats.get(inputFormatClass); @@ -133,7 +132,8 @@ for (String key : partKeys) { partNames.add(key); partValues.add(partSpec.get(key)); - partObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); + partObjectInspectors + .add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); } StructObjectInspector partObjectInspector = ObjectInspectorFactory .getStandardStructObjectInspector(partNames, partObjectInspectors); @@ -164,8 +164,9 @@ } } - if (!tblDataDone) + if (!tblDataDone) { currPath = null; + } return; } else { currTbl = null; @@ -173,7 +174,8 @@ } return; } else { - iterPath = fetchWork.convertStringToPathArray(work.getPartDir()).iterator(); + iterPath = fetchWork.convertStringToPathArray(work.getPartDir()) + .iterator(); iterPartDesc = work.getPartDesc().iterator(); } } @@ -199,28 +201,33 @@ throws Exception { if (currPath == null) { getNextPath(); - if (currPath == null) + if (currPath == null) { return null; + } - // not using FileInputFormat.setInputPaths() here because it forces a connection - // to the default file system - which may or may not be online during pure metadata + // not using FileInputFormat.setInputPaths() here because it forces a + // connection + // to the default file system - which may or may not be online during pure + // metadata // operations - job.set("mapred.input.dir", - org.apache.hadoop.util.StringUtils.escapeString(currPath.toString())); - + job.set("mapred.input.dir", org.apache.hadoop.util.StringUtils + .escapeString(currPath.toString())); + tableDesc tmp = currTbl; - if (tmp == null) + if (tmp == null) { tmp = currPart.getTableDesc(); + } inputFormat = getInputFormatFromCache(tmp.getInputFileFormatClass(), job); inputSplits = inputFormat.getSplits(job, 1); splitNum = 0; serde = tmp.getDeserializerClass().newInstance(); serde.initialize(job, tmp.getProperties()); LOG.debug("Creating fetchTask with deserializer typeinfo: " - + serde.getObjectInspector().getTypeName()); + + serde.getObjectInspector().getTypeName()); LOG.debug("deserializer properties: " + tmp.getProperties()); - if (!tblDataDone) + if (!tblDataDone) { setPrtnDesc(); + } } if (splitNum >= inputSplits.length) { @@ -241,14 +248,15 @@ /** * Get the next row. The fetch context is modified appropriately. - * + * **/ public InspectableObject getNextRow() throws IOException { try { if (currRecReader == null) { currRecReader = getRecordReader(); - if (currRecReader == null) + if (currRecReader == null) { return null; + } } boolean ret = currRecReader.next(key, value); @@ -264,10 +272,11 @@ currRecReader.close(); currRecReader = null; currRecReader = getRecordReader(); - if (currRecReader == null) + if (currRecReader == null) { return null; - else + } else { return getNextRow(); + } } } catch (Exception e) { throw new IOException(e); @@ -292,14 +301,12 @@ public ObjectInspector getOutputObjectInspector() throws HiveException { try { - ObjectInspector outInspector; if (work.getTblDir() != null) { tableDesc tbl = work.getTblDesc(); Deserializer serde = tbl.getDeserializerClass().newInstance(); serde.initialize(job, tbl.getProperties()); return serde.getObjectInspector(); - } - else { + } else { List listParts = work.getPartDesc(); currPart = listParts.get(0); serde = currPart.getTableDesc().getDeserializerClass().newInstance(); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/NumericUDAFEvaluatorResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/NumericUDAFEvaluatorResolver.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/NumericUDAFEvaluatorResolver.java (working copy) @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec; -import java.sql.Date; import java.util.ArrayList; import java.util.List; @@ -26,8 +25,9 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /** - * Resolver for Numeric UDAFs like sum and avg. If the input argument is string or date, - * the resolver returns the evaluator whose iterate function operates on doubles. + * Resolver for Numeric UDAFs like sum and avg. If the input argument is string + * or date, the resolver returns the evaluator whose iterate function operates + * on doubles. */ public class NumericUDAFEvaluatorResolver extends DefaultUDAFEvaluatorResolver { @@ -37,16 +37,21 @@ public NumericUDAFEvaluatorResolver(Class udafClass) { super(udafClass); } - - /* (non-Javadoc) - * @see org.apache.hadoop.hive.ql.exec.UDAFMethodResolver#getEvaluatorClass(java.util.List) + + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.exec.UDAFMethodResolver#getEvaluatorClass(java + * .util.List) */ @Override public Class getEvaluatorClass( List argTypeInfos) throws AmbiguousMethodException { - // Go through the argClasses and for any string, void or date time, start looking for doubles + // Go through the argClasses and for any string, void or date time, start + // looking for doubles ArrayList args = new ArrayList(); - for(TypeInfo arg: argTypeInfos) { + for (TypeInfo arg : argTypeInfos) { if (arg.equals(TypeInfoFactory.voidTypeInfo) || arg.equals(TypeInfoFactory.stringTypeInfo)) { args.add(TypeInfoFactory.doubleTypeInfo); @@ -54,7 +59,7 @@ args.add(arg); } } - + return super.getEvaluatorClass(args); } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (working copy) @@ -25,32 +25,37 @@ import org.apache.hadoop.hive.ql.plan.api.OperatorType; /** - * Table Scan Operator - * If the data is coming from the map-reduce framework, just forward it. - * This will be needed as part of local work when data is not being read as part of map-reduce framework + * Table Scan Operator If the data is coming from the map-reduce framework, just + * forward it. This will be needed as part of local work when data is not being + * read as part of map-reduce framework **/ -public class TableScanOperator extends Operator implements Serializable { +public class TableScanOperator extends Operator implements + Serializable { private static final long serialVersionUID = 1L; /** - * Currently, the table scan operator does not do anything special other than just forwarding the row. Since the - * table data is always read as part of the map-reduce framework by the mapper. But, this assumption is not true, - * i.e table data is not only read by the mapper, this operator will be enhanced to read the table. + * Currently, the table scan operator does not do anything special other than + * just forwarding the row. Since the table data is always read as part of the + * map-reduce framework by the mapper. But, this assumption is not true, i.e + * table data is not only read by the mapper, this operator will be enhanced + * to read the table. **/ @Override - public void processOp(Object row, int tag) - throws HiveException { - forward(row, inputObjInspectors[tag]); + public void processOp(Object row, int tag) throws HiveException { + forward(row, inputObjInspectors[tag]); } /** - * The operator name for this operator type. This is used to construct the rule for an operator + * The operator name for this operator type. This is used to construct the + * rule for an operator + * * @return the operator name **/ + @Override public String getName() { return new String("TS"); } - + // this 'neededColumnIDs' field is included in this operator class instead of // its desc class.The reason is that 1)tableScanDesc can not be instantiated, // and 2) it will fail some join and union queries if this is added forcibly @@ -58,13 +63,14 @@ java.util.ArrayList neededColumnIDs; public void setNeededColumnIDs(java.util.ArrayList orign_columns) { - this.neededColumnIDs = orign_columns; + neededColumnIDs = orign_columns; } public java.util.ArrayList getNeededColumnIDs() { return neededColumnIDs; } + @Override public int getType() { return OperatorType.TABLESCAN; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (working copy) @@ -29,6 +29,10 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectKey; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectValue; +import org.apache.hadoop.hive.ql.exec.persistence.RowContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.mapJoinDesc; import org.apache.hadoop.hive.ql.plan.tableDesc; @@ -42,17 +46,15 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.util.ReflectionUtils; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectKey; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectValue; -import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; -import org.apache.hadoop.hive.ql.exec.persistence.RowContainer; /** * Map side Join operator implementation. */ -public class MapJoinOperator extends CommonJoinOperator implements Serializable { +public class MapJoinOperator extends CommonJoinOperator implements + Serializable { private static final long serialVersionUID = 1L; - static final private Log LOG = LogFactory.getLog(MapJoinOperator.class.getName()); + static final private Log LOG = LogFactory.getLog(MapJoinOperator.class + .getName()); /** * The expressions for join inputs's join keys. @@ -67,21 +69,23 @@ */ transient protected Map> joinKeysStandardObjectInspectors; - transient private int posBigTable; // one of the tables that is not in memory - transient int mapJoinRowsKey; // rows for a given key + transient private int posBigTable; // one of the tables that is not in memory + transient int mapJoinRowsKey; // rows for a given key transient protected Map> mapJoinTables; - + transient protected RowContainer> emptyList = null; - - transient static final private String[] fatalErrMsg = { - null, // counter value 0 means no error - "Mapside join size exceeds hive.mapjoin.maxsize. Please increase that or remove the mapjoin hint." // counter value 1 + + transient static final private String[] fatalErrMsg = { + null, // counter value 0 means no error + "Mapside join size exceeds hive.mapjoin.maxsize. Please increase that or remove the mapjoin hint." // counter + // value + // 1 }; - + public static class MapJoinObjectCtx { ObjectInspector standardOI; - SerDe serde; + SerDe serde; tableDesc tblDesc; Configuration conf; @@ -89,7 +93,8 @@ * @param standardOI * @param serde */ - public MapJoinObjectCtx(ObjectInspector standardOI, SerDe serde, tableDesc tblDesc, Configuration conf) { + public MapJoinObjectCtx(ObjectInspector standardOI, SerDe serde, + tableDesc tblDesc, Configuration conf) { this.standardOI = standardOI; this.serde = serde; this.tblDesc = tblDesc; @@ -129,12 +134,12 @@ transient boolean firstRow; - transient int metadataKeyTag; + transient int metadataKeyTag; transient int[] metadataValueTag; transient List hTables; - transient int numMapRowsRead; - transient int heartbeatInterval; - transient int maxMapJoinSize; + transient int numMapRowsRead; + transient int heartbeatInterval; + transient int maxMapJoinSize; @Override protected void initializeOp(Configuration hconf) throws HiveException { @@ -142,44 +147,53 @@ numMapRowsRead = 0; firstRow = true; - heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT); - maxMapJoinSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAXMAPJOINSIZE); - - joinKeys = new HashMap>(); - + heartbeatInterval = HiveConf.getIntVar(hconf, + HiveConf.ConfVars.HIVESENDHEARTBEAT); + maxMapJoinSize = HiveConf.getIntVar(hconf, + HiveConf.ConfVars.HIVEMAXMAPJOINSIZE); + + joinKeys = new HashMap>(); + populateJoinKeyValue(joinKeys, conf.getKeys()); - joinKeysObjectInspectors = getObjectInspectorsFromEvaluators(joinKeys, inputObjInspectors); + joinKeysObjectInspectors = getObjectInspectorsFromEvaluators(joinKeys, + inputObjInspectors); joinKeysStandardObjectInspectors = getStandardObjectInspectors(joinKeysObjectInspectors); - + // all other tables are small, and are cached in the hash table posBigTable = conf.getPosBigTable(); - + metadataValueTag = new int[numAliases]; - for (int pos = 0; pos < numAliases; pos++) + for (int pos = 0; pos < numAliases; pos++) { metadataValueTag[pos] = -1; - + } + mapJoinTables = new HashMap>(); hTables = new ArrayList(); - + // initialize the hash tables for other tables for (int pos = 0; pos < numAliases; pos++) { - if (pos == posBigTable) + if (pos == posBigTable) { continue; - - int cacheSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAPJOINCACHEROWS); - HashMapWrapper hashTable = - new HashMapWrapper(cacheSize); - - mapJoinTables.put(Byte.valueOf((byte)pos), hashTable); + } + + int cacheSize = HiveConf.getIntVar(hconf, + HiveConf.ConfVars.HIVEMAPJOINCACHEROWS); + HashMapWrapper hashTable = new HashMapWrapper( + cacheSize); + + mapJoinTables.put(Byte.valueOf((byte) pos), hashTable); } - + emptyList = new RowContainer>(1, hconf); - RowContainer bigPosRC = getRowContainer(hconf, (byte)posBigTable, order[posBigTable], joinCacheSize); - storage.put((byte)posBigTable, bigPosRC); - - mapJoinRowsKey = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAPJOINROWSIZE); - - List structFields = ((StructObjectInspector)outputObjInspector).getAllStructFieldRefs(); + RowContainer bigPosRC = getRowContainer(hconf, (byte) posBigTable, + order[posBigTable], joinCacheSize); + storage.put((byte) posBigTable, bigPosRC); + + mapJoinRowsKey = HiveConf.getIntVar(hconf, + HiveConf.ConfVars.HIVEMAPJOINROWSIZE); + + List structFields = ((StructObjectInspector) outputObjInspector) + .getAllStructFieldRefs(); if (conf.getOutputColumnNames().size() < structFields.size()) { List structFieldObjectInspectors = new ArrayList(); for (Byte alias : order) { @@ -188,34 +202,37 @@ for (int i = 0; i < sz; i++) { int pos = retained.get(i); structFieldObjectInspectors.add(structFields.get(pos) - .getFieldObjectInspector()); + .getFieldObjectInspector()); } } outputObjInspector = ObjectInspectorFactory - .getStandardStructObjectInspector(conf.getOutputColumnNames(), - structFieldObjectInspectors); + .getStandardStructObjectInspector(conf.getOutputColumnNames(), + structFieldObjectInspectors); } initializeChildren(hconf); } - + @Override protected void fatalErrorMessage(StringBuffer errMsg, long counterCode) { - errMsg.append("Operator " + getOperatorId() + " (id=" + id + "): " + - fatalErrMsg[(int)counterCode]); + errMsg.append("Operator " + getOperatorId() + " (id=" + id + "): " + + fatalErrMsg[(int) counterCode]); } - + @Override public void processOp(Object row, int tag) throws HiveException { try { // get alias - alias = (byte)tag; - - if ((lastAlias == null) || (!lastAlias.equals(alias))) + alias = (byte) tag; + + if ((lastAlias == null) || (!lastAlias.equals(alias))) { nextSz = joinEmitInterval; - + } + // compute keys and values as StandardObjects - ArrayList key = computeValues(row, joinKeys.get(alias), joinKeysObjectInspectors.get(alias)); - ArrayList value = computeValues(row, joinValues.get(alias), joinValuesObjectInspectors.get(alias)); + ArrayList key = computeValues(row, joinKeys.get(alias), + joinKeysObjectInspectors.get(alias)); + ArrayList value = computeValues(row, joinValues.get(alias), + joinValuesObjectInspectors.get(alias)); // does this source need to be stored in the hash map if (tag != posBigTable) { @@ -223,79 +240,90 @@ metadataKeyTag = nextVal++; tableDesc keyTableDesc = conf.getKeyTblDesc(); - SerDe keySerializer = (SerDe)ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null); + SerDe keySerializer = (SerDe) ReflectionUtils.newInstance( + keyTableDesc.getDeserializerClass(), null); keySerializer.initialize(null, keyTableDesc.getProperties()); mapMetadata.put(Integer.valueOf(metadataKeyTag), new MapJoinObjectCtx( - ObjectInspectorUtils.getStandardObjectInspector(keySerializer.getObjectInspector(), - ObjectInspectorCopyOption.WRITABLE), - keySerializer, keyTableDesc, hconf)); + ObjectInspectorUtils + .getStandardObjectInspector(keySerializer + .getObjectInspector(), + ObjectInspectorCopyOption.WRITABLE), keySerializer, + keyTableDesc, hconf)); firstRow = false; } // Send some status periodically numMapRowsRead++; - if (((numMapRowsRead % heartbeatInterval) == 0) && (reporter != null)) + if (((numMapRowsRead % heartbeatInterval) == 0) && (reporter != null)) { reporter.progress(); - - if ( (numMapRowsRead > maxMapJoinSize) && (reporter != null) && (counterNameToEnum != null)) { + } + + if ((numMapRowsRead > maxMapJoinSize) && (reporter != null) + && (counterNameToEnum != null)) { // update counter - LOG.warn("Too many rows in map join tables. Fatal error counter will be incremented!!"); + LOG + .warn("Too many rows in map join tables. Fatal error counter will be incremented!!"); incrCounter(fatalErrorCntr, 1); fatalError = true; return; } - - HashMapWrapper hashTable = mapJoinTables.get(alias); + HashMapWrapper hashTable = mapJoinTables + .get(alias); MapJoinObjectKey keyMap = new MapJoinObjectKey(metadataKeyTag, key); MapJoinObjectValue o = hashTable.get(keyMap); RowContainer res = null; boolean needNewKey = true; if (o == null) { - res = getRowContainer(this.hconf, (byte)tag, order[tag], joinCacheSize); - res.add(value); + res = getRowContainer(hconf, (byte) tag, order[tag], joinCacheSize); + res.add(value); } else { res = o.getObj(); res.add(value); - // If key already exists, HashMapWrapper.get() guarantees it is already in main memory HashMap - // cache. So just replacing the object value should update the HashMapWrapper. This will save - // the cost of constructing the new key/object and deleting old one and inserting the new one. - if ( hashTable.cacheSize() > 0) { + // If key already exists, HashMapWrapper.get() guarantees it is + // already in main memory HashMap + // cache. So just replacing the object value should update the + // HashMapWrapper. This will save + // the cost of constructing the new key/object and deleting old one + // and inserting the new one. + if (hashTable.cacheSize() > 0) { o.setObj(res); needNewKey = false; - } + } } - - + if (metadataValueTag[tag] == -1) { metadataValueTag[tag] = nextVal++; - + tableDesc valueTableDesc = conf.getValueTblDescs().get(tag); - SerDe valueSerDe = (SerDe)ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null); + SerDe valueSerDe = (SerDe) ReflectionUtils.newInstance(valueTableDesc + .getDeserializerClass(), null); valueSerDe.initialize(null, valueTableDesc.getProperties()); - + mapMetadata.put(Integer.valueOf(metadataValueTag[tag]), - new MapJoinObjectCtx( - ObjectInspectorUtils.getStandardObjectInspector(valueSerDe.getObjectInspector(), - ObjectInspectorCopyOption.WRITABLE), - valueSerDe, valueTableDesc, hconf)); + new MapJoinObjectCtx(ObjectInspectorUtils + .getStandardObjectInspector(valueSerDe.getObjectInspector(), + ObjectInspectorCopyOption.WRITABLE), valueSerDe, + valueTableDesc, hconf)); } - + // Construct externalizable objects for key and value - if ( needNewKey ) { + if (needNewKey) { MapJoinObjectKey keyObj = new MapJoinObjectKey(metadataKeyTag, key); - MapJoinObjectValue valueObj = new MapJoinObjectValue(metadataValueTag[tag], res); - valueObj.setConf(this.hconf); + MapJoinObjectValue valueObj = new MapJoinObjectValue( + metadataValueTag[tag], res); + valueObj.setConf(hconf); // This may potentially increase the size of the hashmap on the mapper - if (res.size() > mapJoinRowsKey) { - if ( res.size() % 100 == 0 ) { - LOG.warn("Number of values for a given key " + keyObj + " are " + res.size()); + if (res.size() > mapJoinRowsKey) { + if (res.size() % 100 == 0) { + LOG.warn("Number of values for a given key " + keyObj + " are " + + res.size()); LOG.warn("used memory " + Runtime.getRuntime().totalMemory()); - } + } } hashTable.put(keyObj, valueObj); } @@ -308,15 +336,15 @@ for (Byte pos : order) { if (pos.intValue() != tag) { MapJoinObjectKey keyMap = new MapJoinObjectKey(metadataKeyTag, key); - MapJoinObjectValue o = (MapJoinObjectValue)mapJoinTables.get(pos).get(keyMap); + MapJoinObjectValue o = mapJoinTables.get(pos).get(keyMap); if (o == null) { - if(noOuterJoin) + if (noOuterJoin) { storage.put(pos, emptyList); - else + } else { storage.put(pos, dummyObjVectors[pos.intValue()]); - } - else { + } + } else { storage.put(pos, o.getObj()); } } @@ -328,30 +356,37 @@ // done with the row storage.get(alias).clear(); - for (Byte pos : order) - if (pos.intValue() != tag) + for (Byte pos : order) { + if (pos.intValue() != tag) { storage.put(pos, null); + } + } } catch (SerDeException e) { e.printStackTrace(); throw new HiveException(e); } } - + + @Override public void closeOp(boolean abort) throws HiveException { - for (HashMapWrapper hashTable: mapJoinTables.values()) { + for (HashMapWrapper hashTable : mapJoinTables.values()) { hashTable.close(); } super.closeOp(abort); } + /** * Implements the getName function for the Node Interface. + * * @return the name of the operator */ + @Override public String getName() { return "MAPJOIN"; } + @Override public int getType() { return OperatorType.MAPJOIN; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeFieldEvaluator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeFieldEvaluator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeFieldEvaluator.java (working copy) @@ -23,8 +23,6 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.exprNodeFieldDesc; - -import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -32,9 +30,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** - * This Evaluator can evaluate s.f for s as both struct and list of struct. - * If s is struct, then s.f is the field. - * If s is list of struct, then s.f is the list of struct field. + * This Evaluator can evaluate s.f for s as both struct and list of struct. If s + * is struct, then s.f is the field. If s is list of struct, then s.f is the + * list of struct field. */ public class ExprNodeFieldEvaluator extends ExprNodeEvaluator { @@ -45,7 +43,7 @@ transient StructField field; transient ObjectInspector structFieldObjectInspector; transient ObjectInspector resultObjectInspector; - + public ExprNodeFieldEvaluator(exprNodeFieldDesc desc) { this.desc = desc; leftEvaluator = ExprNodeEvaluatorFactory.get(desc.getDesc()); @@ -54,7 +52,7 @@ @Override public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException { - + leftInspector = leftEvaluator.initialize(rowInspector); if (desc.getIsList()) { structObjectInspector = (StructObjectInspector) ((ListObjectInspector) leftInspector) @@ -73,22 +71,24 @@ } return resultObjectInspector; } - + List cachedList = new ArrayList(); + @Override public Object evaluate(Object row) throws HiveException { - + // Get the result in leftInspectableObject Object left = leftEvaluator.evaluate(row); if (desc.getIsList()) { - List list = ((ListObjectInspector)leftInspector).getList(left); + List list = ((ListObjectInspector) leftInspector).getList(left); if (list == null) { return null; } else { cachedList.clear(); - for(int i=0; i inputFields = - ((StandardStructObjectInspector)inputObjInspectors[0]).getAllStructFieldRefs(); + List inputFields = ((StandardStructObjectInspector) inputObjInspectors[0]) + .getAllStructFieldRefs(); udtfInputOIs = new ObjectInspector[inputFields.size()]; - for (int i=0; i fields = soi.getAllStructFieldRefs(); - for (int i=0; i implements + Serializable { -public class ScriptOperator extends Operator implements Serializable { - private static final long serialVersionUID = 1L; + public static enum Counter { + DESERIALIZE_ERRORS, SERIALIZE_ERRORS + } - public static enum Counter {DESERIALIZE_ERRORS, SERIALIZE_ERRORS} - transient private LongWritable deserialize_error_count = new LongWritable (); - transient private LongWritable serialize_error_count = new LongWritable (); + transient private final LongWritable deserialize_error_count = new LongWritable(); + transient private final LongWritable serialize_error_count = new LongWritable(); transient Thread outThread = null; transient Thread errThread = null; @@ -69,14 +70,15 @@ transient volatile Throwable scriptError = null; transient RecordWriter scriptOutWriter = null; - static final String IO_EXCEPTION_BROKEN_PIPE_STRING= "Broken pipe"; + static final String IO_EXCEPTION_BROKEN_PIPE_STRING = "Broken pipe"; /** * sends periodic reports back to the tracker. */ transient AutoProgressor autoProgressor; - // first row - the process should only be started if necessary, as it may conflict with some + // first row - the process should only be started if necessary, as it may + // conflict with some // of the user assumptions. transient boolean firstRow; @@ -89,7 +91,8 @@ for (int i = 0; i < len; i++) { char c = var.charAt(i); char s; - if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { + if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') + || (c >= 'a' && c <= 'z')) { s = c; } else { s = '_'; @@ -99,35 +102,33 @@ return safe.toString(); } - static void addJobConfToEnvironment(Configuration conf, Map env) { + static void addJobConfToEnvironment(Configuration conf, + Map env) { Iterator> it = conf.iterator(); while (it.hasNext()) { - Map.Entry en = (Map.Entry) it.next(); - String name = (String) en.getKey(); - //String value = (String)en.getValue(); // does not apply variable expansion + Map.Entry en = it.next(); + String name = en.getKey(); + // String value = (String)en.getValue(); // does not apply variable + // expansion String value = conf.get(name); // does variable expansion name = safeEnvVarName(name); env.put(name, value); } } - /** - * Maps a relative pathname to an absolute pathname using the - * PATH enviroment. + * Maps a relative pathname to an absolute pathname using the PATH enviroment. */ - public class PathFinder - { - String pathenv; // a string of pathnames - String pathSep; // the path seperator - String fileSep; // the file seperator in a directory + public class PathFinder { + String pathenv; // a string of pathnames + String pathSep; // the path seperator + String fileSep; // the file seperator in a directory /** - * Construct a PathFinder object using the path from - * the specified system environment variable. + * Construct a PathFinder object using the path from the specified system + * environment variable. */ - public PathFinder(String envpath) - { + public PathFinder(String envpath) { pathenv = System.getenv(envpath); pathSep = System.getProperty("path.separator"); fileSep = System.getProperty("file.separator"); @@ -136,25 +137,22 @@ /** * Appends the specified component to the path list */ - public void prependPathComponent(String str) - { + public void prependPathComponent(String str) { pathenv = str + pathSep + pathenv; } /** - * Returns the full path name of this file if it is listed in the - * path + * Returns the full path name of this file if it is listed in the path */ - public File getAbsolutePath(String filename) - { - if (pathenv == null || pathSep == null || fileSep == null) { + public File getAbsolutePath(String filename) { + if (pathenv == null || pathSep == null || fileSep == null) { return null; } - int val = -1; - String classvalue = pathenv + pathSep; + int val = -1; + String classvalue = pathenv + pathSep; - while (((val = classvalue.indexOf(pathSep)) >= 0) && - classvalue.length() > 0) { + while (((val = classvalue.indexOf(pathSep)) >= 0) + && classvalue.length() > 0) { // // Extract each entry from the pathenv // @@ -170,18 +168,20 @@ f = new File(entry + fileSep + filename); } // - // see if the filename matches and we can read it + // see if the filename matches and we can read it // if (f.isFile() && f.canRead()) { return f; } - } catch (Exception exp){ } - classvalue = classvalue.substring(val+1).trim(); + } catch (Exception exp) { + } + classvalue = classvalue.substring(val + 1).trim(); } return null; } } + @Override protected void initializeOp(Configuration hconf) throws HiveException { firstRow = true; @@ -191,18 +191,22 @@ try { this.hconf = hconf; - scriptOutputDeserializer = conf.getScriptOutputInfo().getDeserializerClass().newInstance(); - scriptOutputDeserializer.initialize(hconf, conf.getScriptOutputInfo().getProperties()); + scriptOutputDeserializer = conf.getScriptOutputInfo() + .getDeserializerClass().newInstance(); + scriptOutputDeserializer.initialize(hconf, conf.getScriptOutputInfo() + .getProperties()); - scriptInputSerializer = (Serializer)conf.getScriptInputInfo().getDeserializerClass().newInstance(); - scriptInputSerializer.initialize(hconf, conf.getScriptInputInfo().getProperties()); + scriptInputSerializer = (Serializer) conf.getScriptInputInfo() + .getDeserializerClass().newInstance(); + scriptInputSerializer.initialize(hconf, conf.getScriptInputInfo() + .getProperties()); outputObjInspector = scriptOutputDeserializer.getObjectInspector(); // initialize all children before starting the script initializeChildren(hconf); } catch (Exception e) { - throw new HiveException ("Cannot initialize ScriptOperator", e); + throw new HiveException("Cannot initialize ScriptOperator", e); } } @@ -215,17 +219,20 @@ } void displayBrokenPipeInfo() { - LOG.info("The script did not consume all input data. This is considered as an error."); - LOG.info("set " + HiveConf.ConfVars.ALLOWPARTIALCONSUMP.toString() + "=true; to ignore it."); + LOG + .info("The script did not consume all input data. This is considered as an error."); + LOG.info("set " + HiveConf.ConfVars.ALLOWPARTIALCONSUMP.toString() + + "=true; to ignore it."); return; } + @Override public void processOp(Object row, int tag) throws HiveException { // initialize the user's process only when you recieve the first row if (firstRow) { firstRow = false; try { - String [] cmdArgs = splitArgs(conf.getScriptCmd()); + String[] cmdArgs = splitArgs(conf.getScriptCmd()); String prog = cmdArgs[0]; File currentDir = new File(".").getAbsoluteFile(); @@ -240,64 +247,78 @@ f = null; } - String [] wrappedCmdArgs = addWrapper(cmdArgs); + String[] wrappedCmdArgs = addWrapper(cmdArgs); LOG.info("Executing " + Arrays.asList(wrappedCmdArgs)); - LOG.info("tablename=" + hconf.get(HiveConf.ConfVars.HIVETABLENAME.varname)); - LOG.info("partname=" + hconf.get(HiveConf.ConfVars.HIVEPARTITIONNAME.varname)); + LOG.info("tablename=" + + hconf.get(HiveConf.ConfVars.HIVETABLENAME.varname)); + LOG.info("partname=" + + hconf.get(HiveConf.ConfVars.HIVEPARTITIONNAME.varname)); LOG.info("alias=" + alias); ProcessBuilder pb = new ProcessBuilder(wrappedCmdArgs); Map env = pb.environment(); addJobConfToEnvironment(hconf, env); - env.put(safeEnvVarName(HiveConf.ConfVars.HIVEALIAS.varname), String.valueOf(alias)); + env.put(safeEnvVarName(HiveConf.ConfVars.HIVEALIAS.varname), String + .valueOf(alias)); - // Create an environment variable that uniquely identifies this script operator - String idEnvVarName = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVESCRIPTIDENVVAR); - String idEnvVarVal = this.getOperatorId(); + // Create an environment variable that uniquely identifies this script + // operator + String idEnvVarName = HiveConf.getVar(hconf, + HiveConf.ConfVars.HIVESCRIPTIDENVVAR); + String idEnvVarVal = getOperatorId(); env.put(safeEnvVarName(idEnvVarName), idEnvVarVal); - scriptPid = pb.start(); // Runtime.getRuntime().exec(wrappedCmdArgs); + scriptPid = pb.start(); // Runtime.getRuntime().exec(wrappedCmdArgs); - DataOutputStream scriptOut = new DataOutputStream(new BufferedOutputStream(scriptPid.getOutputStream())); - DataInputStream scriptIn = new DataInputStream(new BufferedInputStream(scriptPid.getInputStream())); - DataInputStream scriptErr = new DataInputStream(new BufferedInputStream(scriptPid.getErrorStream())); + DataOutputStream scriptOut = new DataOutputStream( + new BufferedOutputStream(scriptPid.getOutputStream())); + DataInputStream scriptIn = new DataInputStream(new BufferedInputStream( + scriptPid.getInputStream())); + DataInputStream scriptErr = new DataInputStream( + new BufferedInputStream(scriptPid.getErrorStream())); scriptOutWriter = conf.getInRecordWriterClass().newInstance(); scriptOutWriter.initialize(scriptOut, hconf); - RecordReader scriptOutputReader = conf.getOutRecordReaderClass().newInstance(); - scriptOutputReader.initialize(scriptIn, hconf, conf.getScriptOutputInfo().getProperties()); + RecordReader scriptOutputReader = conf.getOutRecordReaderClass() + .newInstance(); + scriptOutputReader.initialize(scriptIn, hconf, conf + .getScriptOutputInfo().getProperties()); - outThread = new StreamThread(scriptOutputReader, new OutputStreamProcessor( - scriptOutputDeserializer.getObjectInspector()), "OutputProcessor"); + outThread = new StreamThread(scriptOutputReader, + new OutputStreamProcessor(scriptOutputDeserializer + .getObjectInspector()), "OutputProcessor"); - RecordReader scriptErrReader = conf.getOutRecordReaderClass().newInstance(); - scriptErrReader.initialize(scriptErr, hconf, conf.getScriptOutputInfo().getProperties()); + RecordReader scriptErrReader = conf.getOutRecordReaderClass() + .newInstance(); + scriptErrReader.initialize(scriptErr, hconf, conf.getScriptOutputInfo() + .getProperties()); - errThread = new StreamThread(scriptErrReader, - new ErrorStreamProcessor - (HiveConf.getIntVar(hconf, HiveConf.ConfVars.SCRIPTERRORLIMIT)), - "ErrorProcessor"); + errThread = new StreamThread(scriptErrReader, new ErrorStreamProcessor( + HiveConf.getIntVar(hconf, HiveConf.ConfVars.SCRIPTERRORLIMIT)), + "ErrorProcessor"); - if (HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVESCRIPTAUTOPROGRESS)) { - autoProgressor = new AutoProgressor(this.getClass().getName(), reporter, - Utilities.getDefaultNotificationInterval(hconf)); + if (HiveConf + .getBoolVar(hconf, HiveConf.ConfVars.HIVESCRIPTAUTOPROGRESS)) { + autoProgressor = new AutoProgressor(this.getClass().getName(), + reporter, Utilities.getDefaultNotificationInterval(hconf)); autoProgressor.go(); } outThread.start(); errThread.start(); } catch (Exception e) { - throw new HiveException ("Cannot initialize ScriptOperator", e); + throw new HiveException("Cannot initialize ScriptOperator", e); } } - if(scriptError != null) { + if (scriptError != null) { throw new HiveException(scriptError); } try { - Writable res = scriptInputSerializer.serialize(row, inputObjInspectors[tag]); + Writable res = scriptInputSerializer.serialize(row, + inputObjInspectors[tag]); scriptOutWriter.write(res); } catch (SerDeException e) { LOG.error("Error in serializing the row: " + e.getMessage()); @@ -305,12 +326,13 @@ serialize_error_count.set(serialize_error_count.get() + 1); throw new HiveException(e); } catch (IOException e) { - if(isBrokenPipeException(e) && allowPartialConsumption()) { + if (isBrokenPipeException(e) && allowPartialConsumption()) { setDone(true); - LOG.warn("Got broken pipe during write: ignoring exception and setting operator to done"); + LOG + .warn("Got broken pipe during write: ignoring exception and setting operator to done"); } else { LOG.error("Error in writing to script: " + e.getMessage()); - if(isBrokenPipeException(e)) { + if (isBrokenPipeException(e)) { displayBrokenPipeInfo(); } scriptError = e; @@ -319,40 +341,45 @@ } } + @Override public void close(boolean abort) throws HiveException { boolean new_abort = abort; - if(!abort) { - if(scriptError != null) { + if (!abort) { + if (scriptError != null) { throw new HiveException(scriptError); } // everything ok. try normal shutdown try { try { - if (scriptOutWriter != null) + if (scriptOutWriter != null) { scriptOutWriter.close(); + } } catch (IOException e) { - if(isBrokenPipeException(e) && allowPartialConsumption()) { + if (isBrokenPipeException(e) && allowPartialConsumption()) { LOG.warn("Got broken pipe: ignoring exception"); } else { - if(isBrokenPipeException(e)) { + if (isBrokenPipeException(e)) { displayBrokenPipeInfo(); } throw e; } } int exitVal = 0; - if (scriptPid != null) + if (scriptPid != null) { exitVal = scriptPid.waitFor(); + } if (exitVal != 0) { LOG.error("Script failed with code " + exitVal); new_abort = true; - }; + } + ; } catch (IOException e) { LOG.error("Got ioexception: " + e.getMessage()); e.printStackTrace(); new_abort = true; - } catch (InterruptedException e) { } + } catch (InterruptedException e) { + } } else { @@ -362,16 +389,17 @@ // Interrupt the current thread after 1 second final Thread mythread = Thread.currentThread(); Timer timer = new Timer(true); - timer.schedule(new TimerTask(){ + timer.schedule(new TimerTask() { @Override public void run() { mythread.interrupt(); - }}, - 1000); + } + }, 1000); // Wait for the child process to finish int exitVal = 0; - if (scriptPid != null) + if (scriptPid != null) { scriptPid.waitFor(); + } // Cancel the timer timer.cancel(); // Output the exit code @@ -384,74 +412,82 @@ // try these best effort try { - if (outThread != null) + if (outThread != null) { outThread.join(0); + } } catch (Exception e) { - LOG.warn("Exception in closing outThread: " + StringUtils.stringifyException(e)); + LOG.warn("Exception in closing outThread: " + + StringUtils.stringifyException(e)); } try { - if (errThread != null) + if (errThread != null) { errThread.join(0); + } } catch (Exception e) { - LOG.warn("Exception in closing errThread: " + StringUtils.stringifyException(e)); + LOG.warn("Exception in closing errThread: " + + StringUtils.stringifyException(e)); } try { - if (scriptPid != null) + if (scriptPid != null) { scriptPid.destroy(); + } } catch (Exception e) { - LOG.warn("Exception in destroying scriptPid: " + StringUtils.stringifyException(e)); + LOG.warn("Exception in destroying scriptPid: " + + StringUtils.stringifyException(e)); } super.close(new_abort); - if(new_abort && !abort) { - throw new HiveException ("Hit error while closing .."); + if (new_abort && !abort) { + throw new HiveException("Hit error while closing .."); } } - interface StreamProcessor { public void processLine(Writable line) throws HiveException; + public void close() throws HiveException; } - class OutputStreamProcessor implements StreamProcessor { Object row; ObjectInspector rowInspector; + public OutputStreamProcessor(ObjectInspector rowInspector) { this.rowInspector = rowInspector; } + public void processLine(Writable line) throws HiveException { try { row = scriptOutputDeserializer.deserialize(line); } catch (SerDeException e) { - deserialize_error_count.set(deserialize_error_count.get()+1); + deserialize_error_count.set(deserialize_error_count.get() + 1); return; } forward(row, rowInspector); } + public void close() { } } /** * The processor for stderr stream. - * - * TODO: In the future when we move to hadoop 0.18 and above, we should borrow the logic - * from HadoopStreaming: PipeMapRed.java MRErrorThread to support counters and status - * updates. + * + * TODO: In the future when we move to hadoop 0.18 and above, we should borrow + * the logic from HadoopStreaming: PipeMapRed.java MRErrorThread to support + * counters and status updates. */ class ErrorStreamProcessor implements StreamProcessor { private long bytesCopied = 0; - private long maxBytes; + private final long maxBytes; private long lastReportTime; - public ErrorStreamProcessor (int maxBytes) { - this.maxBytes = (long)maxBytes; + public ErrorStreamProcessor(int maxBytes) { + this.maxBytes = maxBytes; lastReportTime = 0; } @@ -460,12 +496,14 @@ String stringLine = line.toString(); int len = 0; - if (line instanceof Text) - len = ((Text)line).getLength(); - else if (line instanceof BytesWritable) - len = ((BytesWritable)line).getSize(); + if (line instanceof Text) { + len = ((Text) line).getLength(); + } else if (line instanceof BytesWritable) { + len = ((BytesWritable) line).getSize(); + } - // Report progress for each stderr line, but no more frequently than once per minute. + // Report progress for each stderr line, but no more frequently than once + // per minute. long now = System.currentTimeMillis(); // reporter is a member variable of the Operator class. if (now - lastReportTime > 60 * 1000 && reporter != null) { @@ -474,21 +512,22 @@ reporter.progress(); } - if((maxBytes < 0) || (bytesCopied < maxBytes)) { + if ((maxBytes < 0) || (bytesCopied < maxBytes)) { System.err.println(stringLine); } if (bytesCopied < maxBytes && bytesCopied + len >= maxBytes) { System.err.println("Operator " + id + " " + getName() - + ": exceeding stderr limit of " + maxBytes + " bytes, will truncate stderr messages."); + + ": exceeding stderr limit of " + maxBytes + + " bytes, will truncate stderr messages."); } bytesCopied += len; } + public void close() { } } - class StreamThread extends Thread { RecordReader in; @@ -502,19 +541,20 @@ setDaemon(true); } + @Override public void run() { try { Writable row = in.createRow(); - while(true) { + while (true) { long bytes = in.next(row); - if(bytes <= 0) { + if (bytes <= 0) { break; } proc.processLine(row); } - LOG.info("StreamThread "+name+" done"); + LOG.info("StreamThread " + name + " done"); } catch (Throwable th) { scriptError = th; @@ -534,27 +574,26 @@ } /** - * Wrap the script in a wrapper that allows admins to control + * Wrap the script in a wrapper that allows admins to control **/ - protected String [] addWrapper(String [] inArgs) { + protected String[] addWrapper(String[] inArgs) { String wrapper = HiveConf.getVar(hconf, HiveConf.ConfVars.SCRIPTWRAPPER); - if(wrapper == null) { + if (wrapper == null) { return inArgs; } - String [] wrapComponents = splitArgs(wrapper); + String[] wrapComponents = splitArgs(wrapper); int totallength = wrapComponents.length + inArgs.length; - String [] finalArgv = new String [totallength]; - for(int i=0; i implements Serializable { +public class ForwardOperator extends Operator implements + Serializable { private static final long serialVersionUID = 1L; @Override - public void processOp(Object row, int tag) - throws HiveException { - forward(row, inputObjInspectors[tag]); + public void processOp(Object row, int tag) throws HiveException { + forward(row, inputObjInspectors[tag]); } - + + @Override public int getType() { return OperatorType.FORWARD; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (working copy) @@ -18,15 +18,29 @@ package org.apache.hadoop.hive.ql.exec; -import java.util.*; -import java.io.*; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.plan.*; +import org.apache.hadoop.hive.ql.plan.collectDesc; +import org.apache.hadoop.hive.ql.plan.extractDesc; +import org.apache.hadoop.hive.ql.plan.fileSinkDesc; +import org.apache.hadoop.hive.ql.plan.filterDesc; +import org.apache.hadoop.hive.ql.plan.forwardDesc; +import org.apache.hadoop.hive.ql.plan.groupByDesc; +import org.apache.hadoop.hive.ql.plan.joinDesc; +import org.apache.hadoop.hive.ql.plan.lateralViewJoinDesc; +import org.apache.hadoop.hive.ql.plan.limitDesc; +import org.apache.hadoop.hive.ql.plan.mapJoinDesc; +import org.apache.hadoop.hive.ql.plan.reduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.scriptDesc; +import org.apache.hadoop.hive.ql.plan.selectDesc; +import org.apache.hadoop.hive.ql.plan.tableScanDesc; +import org.apache.hadoop.hive.ql.plan.udtfDesc; +import org.apache.hadoop.hive.ql.plan.unionDesc; public class OperatorFactory { - + public final static class opTuple { public Class descClass; public Class> opClass; @@ -39,32 +53,40 @@ public static ArrayList opvec; static { - opvec = new ArrayList (); - opvec.add(new opTuple (filterDesc.class, FilterOperator.class)); - opvec.add(new opTuple (selectDesc.class, SelectOperator.class)); - opvec.add(new opTuple (forwardDesc.class, ForwardOperator.class)); - opvec.add(new opTuple (fileSinkDesc.class, FileSinkOperator.class)); - opvec.add(new opTuple (collectDesc.class, CollectOperator.class)); - opvec.add(new opTuple (scriptDesc.class, ScriptOperator.class)); - opvec.add(new opTuple (reduceSinkDesc.class, ReduceSinkOperator.class)); - opvec.add(new opTuple (extractDesc.class, ExtractOperator.class)); - opvec.add(new opTuple (groupByDesc.class, GroupByOperator.class)); - opvec.add(new opTuple (joinDesc.class, JoinOperator.class)); - opvec.add(new opTuple (mapJoinDesc.class, MapJoinOperator.class)); - opvec.add(new opTuple (limitDesc.class, LimitOperator.class)); - opvec.add(new opTuple (tableScanDesc.class, TableScanOperator.class)); - opvec.add(new opTuple (unionDesc.class, UnionOperator.class)); - opvec.add(new opTuple (udtfDesc.class, UDTFOperator.class)); - opvec.add(new opTuple(lateralViewJoinDesc.class, LateralViewJoinOperator.class)); + opvec = new ArrayList(); + opvec.add(new opTuple(filterDesc.class, FilterOperator.class)); + opvec.add(new opTuple(selectDesc.class, SelectOperator.class)); + opvec + .add(new opTuple(forwardDesc.class, ForwardOperator.class)); + opvec.add(new opTuple(fileSinkDesc.class, + FileSinkOperator.class)); + opvec + .add(new opTuple(collectDesc.class, CollectOperator.class)); + opvec.add(new opTuple(scriptDesc.class, ScriptOperator.class)); + opvec.add(new opTuple(reduceSinkDesc.class, + ReduceSinkOperator.class)); + opvec + .add(new opTuple(extractDesc.class, ExtractOperator.class)); + opvec + .add(new opTuple(groupByDesc.class, GroupByOperator.class)); + opvec.add(new opTuple(joinDesc.class, JoinOperator.class)); + opvec + .add(new opTuple(mapJoinDesc.class, MapJoinOperator.class)); + opvec.add(new opTuple(limitDesc.class, LimitOperator.class)); + opvec.add(new opTuple(tableScanDesc.class, + TableScanOperator.class)); + opvec.add(new opTuple(unionDesc.class, UnionOperator.class)); + opvec.add(new opTuple(udtfDesc.class, UDTFOperator.class)); + opvec.add(new opTuple(lateralViewJoinDesc.class, + LateralViewJoinOperator.class)); } - public static Operator get(Class opClass) { - - for(opTuple o: opvec) { - if(o.descClass == opClass) { + + for (opTuple o : opvec) { + if (o.descClass == opClass) { try { - Operator op = (Operator)o.opClass.newInstance(); + Operator op = (Operator) o.opClass.newInstance(); op.initializeCounters(); return op; } catch (Exception e) { @@ -73,33 +95,37 @@ } } } - throw new RuntimeException ("No operator for descriptor class " + opClass.getName()); + throw new RuntimeException("No operator for descriptor class " + + opClass.getName()); } - public static Operator get(Class opClass, RowSchema rwsch) { - + public static Operator get(Class opClass, + RowSchema rwsch) { + Operator ret = get(opClass); ret.setSchema(rwsch); return ret; } /** - * Returns an operator given the conf and a list of children operators. + * Returns an operator given the conf and a list of children operators. */ - public static Operator get(T conf, Operator ... oplist) { - Operator ret = get((Class )conf.getClass()); + public static Operator get(T conf, + Operator... oplist) { + Operator ret = get((Class) conf.getClass()); ret.setConf(conf); - if(oplist.length == 0) + if (oplist.length == 0) { return (ret); + } - ArrayList> clist = new ArrayList> (); - for(Operator op: oplist) { + ArrayList> clist = new ArrayList>(); + for (Operator op : oplist) { clist.add(op); } ret.setChildOperators(clist); - + // Add this parent to the children - for(Operator op: oplist) { + for (Operator op : oplist) { List> parents = op.getParentOperators(); if (parents == null) { parents = new ArrayList>(); @@ -111,25 +137,28 @@ } /** - * Returns an operator given the conf and a list of children operators. + * Returns an operator given the conf and a list of children operators. */ - public static Operator get(T conf, RowSchema rwsch, Operator ... oplist) { + public static Operator get(T conf, + RowSchema rwsch, Operator... oplist) { Operator ret = get(conf, oplist); ret.setSchema(rwsch); return (ret); } /** - * Returns an operator given the conf and a list of parent operators. + * Returns an operator given the conf and a list of parent operators. */ - public static Operator getAndMakeChild(T conf, Operator ... oplist) { - Operator ret = get((Class )conf.getClass()); + public static Operator getAndMakeChild(T conf, + Operator... oplist) { + Operator ret = get((Class) conf.getClass()); ret.setConf(conf); - if(oplist.length == 0) + if (oplist.length == 0) { return (ret); + } // Add the new operator as child of each of the passed in operators - for(Operator op: oplist) { + for (Operator op : oplist) { List children = op.getChildOperators(); if (children == null) { children = new ArrayList(); @@ -140,18 +169,20 @@ // add parents for the newly created operator List> parent = new ArrayList>(); - for(Operator op: oplist) + for (Operator op : oplist) { parent.add(op); - + } + ret.setParentOperators(parent); return (ret); } /** - * Returns an operator given the conf and a list of parent operators. + * Returns an operator given the conf and a list of parent operators. */ - public static Operator getAndMakeChild(T conf, RowSchema rwsch, Operator ... oplist) { + public static Operator getAndMakeChild(T conf, + RowSchema rwsch, Operator... oplist) { Operator ret = getAndMakeChild(conf, oplist); ret.setSchema(rwsch); return (ret); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java (working copy) @@ -18,8 +18,8 @@ package org.apache.hadoop.hive.ql.exec; -import java.util.*; -import java.io.*; +import java.io.Serializable; +import java.util.Vector; /** * RowSchema Implementation @@ -30,7 +30,8 @@ private static final long serialVersionUID = 1L; private Vector signature; - public RowSchema() {} + public RowSchema() { + } public RowSchema(Vector signature) { this.signature = signature; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DefaultUDAFEvaluatorResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DefaultUDAFEvaluatorResolver.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DefaultUDAFEvaluatorResolver.java (working copy) @@ -25,63 +25,65 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** - * The default UDAF Method resolver. This resolver is used for resolving the UDAF methods are - * used for partial and final evaluation given the list of the argument types. The getEvalMethod goes through all the - * evaluate methods and returns the one that matches the argument signature or is the closest match. - * Closest match is defined as the one that requires the least number of arguments to be converted. - * In case more than one matches are found, the method throws an ambiguous method exception. + * The default UDAF Method resolver. This resolver is used for resolving the + * UDAF methods are used for partial and final evaluation given the list of the + * argument types. The getEvalMethod goes through all the evaluate methods and + * returns the one that matches the argument signature or is the closest match. + * Closest match is defined as the one that requires the least number of + * arguments to be converted. In case more than one matches are found, the + * method throws an ambiguous method exception. */ public class DefaultUDAFEvaluatorResolver implements UDAFEvaluatorResolver { /** * The class of the UDAF. */ - private Class udafClass; - + private final Class udafClass; + /** - * Constructor. - * This constructor sets the resolver to be used for comparison operators. - * See {@link UDAFEvaluatorResolver} + * Constructor. This constructor sets the resolver to be used for comparison + * operators. See {@link UDAFEvaluatorResolver} */ public DefaultUDAFEvaluatorResolver(Class udafClass) { this.udafClass = udafClass; } - + /** * Gets the evaluator class for the UDAF given the parameter types. * - * @param argClasses The list of the parameter types. + * @param argClasses + * The list of the parameter types. */ - public Class getEvaluatorClass(List argClasses) - throws AmbiguousMethodException { - + public Class getEvaluatorClass( + List argClasses) throws AmbiguousMethodException { + ArrayList> classList = new ArrayList>(); - + // Add all the public member classes that implement an evaluator - for(Class enclClass: udafClass.getClasses()) { - for(Class iface: enclClass.getInterfaces()) { + for (Class enclClass : udafClass.getClasses()) { + for (Class iface : enclClass.getInterfaces()) { if (iface == UDAFEvaluator.class) { - classList.add((Class)enclClass); + classList.add((Class) enclClass); } } } - + // Next we locate all the iterate methods for each of these classes. ArrayList mList = new ArrayList(); - for(Class evaluator: classList) { - for(Method m: evaluator.getMethods()) { + for (Class evaluator : classList) { + for (Method m : evaluator.getMethods()) { if (m.getName().equalsIgnoreCase("iterate")) { mList.add(m); } } } - + Method m = FunctionRegistry.getMethodInternal(mList, false, argClasses); if (m == null) { throw new AmbiguousMethodException(udafClass, argClasses); } - - return (Class)m.getDeclaringClass(); + + return (Class) m.getDeclaringClass(); } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (working copy) @@ -45,7 +45,8 @@ /** * Base operator implementation **/ -public abstract class Operator implements Serializable, Node { +public abstract class Operator implements Serializable, + Node { // Bean methods @@ -55,42 +56,44 @@ protected List> parentOperators; protected String operatorId; /** - * List of counter names associated with the operator - * It contains the following default counters - * NUM_INPUT_ROWS - * NUM_OUTPUT_ROWS - * TIME_TAKEN + * List of counter names associated with the operator It contains the + * following default counters NUM_INPUT_ROWS NUM_OUTPUT_ROWS TIME_TAKEN * Individual operators can add to this list via addToCounterNames methods */ protected ArrayList counterNames; /** * Each operator has its own map of its counter names to disjoint - * ProgressCounter - it is populated at compile time and is read in - * at run-time while extracting the operator specific counts + * ProgressCounter - it is populated at compile time and is read in at + * run-time while extracting the operator specific counts */ protected HashMap counterNameToEnum; - private static int seqId; - // It can be optimized later so that an operator operator (init/close) is performed - // only after that operation has been performed on all the parents. This will require - // initializing the whole tree in all the mappers (which might be required for mappers + // It can be optimized later so that an operator operator (init/close) is + // performed + // only after that operation has been performed on all the parents. This will + // require + // initializing the whole tree in all the mappers (which might be required for + // mappers // spanning multiple files anyway, in future) public static enum State { - UNINIT, // initialize() has not been called - INIT, // initialize() has been called and close() has not been called, - // or close() has been called but one of its parent is not closed. - CLOSE // all its parents operators are in state CLOSE and called close() - // to children. Note: close() being called and its state being CLOSE is - // difference since close() could be called but state is not CLOSE if - // one of its parent is not in state CLOSE.. + UNINIT, // initialize() has not been called + INIT, // initialize() has been called and close() has not been called, + // or close() has been called but one of its parent is not closed. + CLOSE + // all its parents operators are in state CLOSE and called close() + // to children. Note: close() being called and its state being CLOSE is + // difference since close() could be called but state is not CLOSE if + // one of its parent is not in state CLOSE.. }; + transient protected State state = State.UNINIT; - transient static boolean fatalError = false; // fatalError is shared acorss all operators - + transient static boolean fatalError = false; // fatalError is shared acorss + // all operators + static { seqId = 0; } @@ -102,17 +105,20 @@ public static void resetId() { seqId = 0; } - + /** * Create an operator with a reporter. - * @param reporter Used to report progress of certain operators. + * + * @param reporter + * Used to report progress of certain operators. */ public Operator(Reporter reporter) { this.reporter = reporter; id = String.valueOf(seqId++); } - public void setChildOperators(List> childOperators) { + public void setChildOperators( + List> childOperators) { this.childOperators = childOperators; } @@ -130,14 +136,15 @@ } Vector ret_vec = new Vector(); - for(Operator op: getChildOperators()) { + for (Operator op : getChildOperators()) { ret_vec.add(op); } return ret_vec; } - public void setParentOperators(List> parentOperators) { + public void setParentOperators( + List> parentOperators) { this.parentOperators = parentOperators; } @@ -178,7 +185,7 @@ // non-bean .. - transient protected HashMap, LongWritable> statsMap = new HashMap, LongWritable> (); + transient protected HashMap, LongWritable> statsMap = new HashMap, LongWritable>(); transient protected OutputCollector out; transient protected Log LOG = LogFactory.getLog(this.getClass().getName()); transient protected String alias; @@ -190,9 +197,9 @@ transient protected ObjectInspector outputObjInspector; /** - * A map of output column name to input expression map. This is used by optimizer - * and built during semantic analysis - * contains only key elements for reduce sink and group by op + * A map of output column name to input expression map. This is used by + * optimizer and built during semantic analysis contains only key elements for + * reduce sink and group by op */ protected transient Map colExprMap; @@ -201,21 +208,24 @@ } /** - * This function is not named getId(), to make sure java serialization - * does NOT serialize it. Some TestParse tests will fail if we serialize - * this field, since the Operator ID will change based on the number of - * query tests. + * This function is not named getId(), to make sure java serialization does + * NOT serialize it. Some TestParse tests will fail if we serialize this + * field, since the Operator ID will change based on the number of query + * tests. */ - public String getIdentifier() { return id; } + public String getIdentifier() { + return id; + } public void setReporter(Reporter rep) { reporter = rep; // the collector is same across all operators - if(childOperators == null) + if (childOperators == null) { return; + } - for(Operator op: childOperators) { + for (Operator op : childOperators) { op.setReporter(rep); } } @@ -224,10 +234,11 @@ this.out = out; // the collector is same across all operators - if(childOperators == null) + if (childOperators == null) { return; + } - for(Operator op: childOperators) { + for (Operator op : childOperators) { op.setOutputCollector(out); } } @@ -238,31 +249,34 @@ public void setAlias(String alias) { this.alias = alias; - if(childOperators == null) + if (childOperators == null) { return; + } - for(Operator op: childOperators) { + for (Operator op : childOperators) { op.setAlias(alias); } } public Map, Long> getStats() { - HashMap, Long> ret = new HashMap, Long> (); - for(Enum one: statsMap.keySet()) { + HashMap, Long> ret = new HashMap, Long>(); + for (Enum one : statsMap.keySet()) { ret.put(one, Long.valueOf(statsMap.get(one).get())); } - return(ret); + return (ret); } /** * checks whether all parent operators are initialized or not - * @return true if there are no parents or all parents are initialized. false otherwise + * + * @return true if there are no parents or all parents are initialized. false + * otherwise */ protected boolean areAllParentsInitialized() { if (parentOperators == null) { return true; } - for(Operator parent: parentOperators) { + for (Operator parent : parentOperators) { if (parent.state != State.INIT) { return false; } @@ -271,46 +285,51 @@ } /** - * Initializes operators only if all parents have been initialized. - * Calls operator specific initializer which then initializes child ops. - * + * Initializes operators only if all parents have been initialized. Calls + * operator specific initializer which then initializes child ops. + * * @param hconf - * @param inputOIs input object inspector array indexes by tag id. null value is ignored. + * @param inputOIs + * input object inspector array indexes by tag id. null value is + * ignored. * @throws HiveException */ - public void initialize(Configuration hconf, ObjectInspector[] inputOIs) throws HiveException { + public void initialize(Configuration hconf, ObjectInspector[] inputOIs) + throws HiveException { if (state == State.INIT) { return; } - if(!areAllParentsInitialized()) { + if (!areAllParentsInitialized()) { return; } - + LOG.info("Initializing Self " + id + " " + getName()); if (inputOIs != null) { inputObjInspectors = inputOIs; } - // initialize structure to maintain child op info. operator tree changes while + // initialize structure to maintain child op info. operator tree changes + // while // initializing so this need to be done here instead of initialize() method if (childOperators != null) { childOperatorsArray = new Operator[childOperators.size()]; - for (int i=0; i> parentOperators = - childOperatorsArray[i].getParentOperators(); + for (int i = 0; i < childOperatorsArray.length; i++) { + List> parentOperators = childOperatorsArray[i] + .getParentOperators(); if (parentOperators == null) { throw new HiveException("Hive internal error: parent is null in " + childOperatorsArray[i].getClass() + "!"); } childOperatorsTag[i] = parentOperators.indexOf(this); if (childOperatorsTag[i] == -1) { - throw new HiveException("Hive internal error: cannot find parent in the child operator!"); + throw new HiveException( + "Hive internal error: cannot find parent in the child operator!"); } } } @@ -333,7 +352,8 @@ } /** - * Calls initialize on each of the children with outputObjetInspector as the output row format + * Calls initialize on each of the children with outputObjetInspector as the + * output row format */ protected void initializeChildren(Configuration hconf) throws HiveException { state = State.INIT; @@ -343,45 +363,59 @@ } LOG.info("Initializing children of " + id + " " + getName()); for (int i = 0; i < childOperatorsArray.length; i++) { - childOperatorsArray[i].initialize(hconf, outputObjInspector, childOperatorsTag[i]); - if ( reporter != null ) { + childOperatorsArray[i].initialize(hconf, outputObjInspector, + childOperatorsTag[i]); + if (reporter != null) { childOperatorsArray[i].setReporter(reporter); } } } /** - * Collects all the parent's output object inspectors and calls actual initialization method + * Collects all the parent's output object inspectors and calls actual + * initialization method + * * @param hconf - * @param inputOI OI of the row that this parent will pass to this op - * @param parentId parent operator id + * @param inputOI + * OI of the row that this parent will pass to this op + * @param parentId + * parent operator id * @throws HiveException */ - private void initialize(Configuration hconf, ObjectInspector inputOI, int parentId) throws HiveException { + private void initialize(Configuration hconf, ObjectInspector inputOI, + int parentId) throws HiveException { LOG.info("Initializing child " + id + " " + getName()); inputObjInspectors[parentId] = inputOI; // call the actual operator initialization function initialize(hconf, null); } - - /** + /** * Process the row. - * @param row The object representing the row. - * @param tag The tag of the row usually means which parent this row comes from. - * Rows with the same tag should have exactly the same rowInspector all the time. + * + * @param row + * The object representing the row. + * @param tag + * The tag of the row usually means which parent this row comes from. + * Rows with the same tag should have exactly the same rowInspector + * all the time. */ public abstract void processOp(Object row, int tag) throws HiveException; /** * Process the row. - * @param row The object representing the row. - * @param tag The tag of the row usually means which parent this row comes from. - * Rows with the same tag should have exactly the same rowInspector all the time. + * + * @param row + * The object representing the row. + * @param tag + * The tag of the row usually means which parent this row comes from. + * Rows with the same tag should have exactly the same rowInspector + * all the time. */ public void process(Object row, int tag) throws HiveException { - if ( fatalError ) + if (fatalError) { return; + } preProcessCounter(); processOp(row, tag); postProcessCounter(); @@ -391,15 +425,18 @@ public void startGroup() throws HiveException { LOG.debug("Starting group"); - if (childOperators == null) + if (childOperators == null) { return; - - if ( fatalError ) + } + + if (fatalError) { return; + } LOG.debug("Starting group for children:"); - for (Operator op: childOperators) + for (Operator op : childOperators) { op.startGroup(); + } LOG.debug("Start group Done"); } @@ -408,24 +445,26 @@ public void endGroup() throws HiveException { LOG.debug("Ending group"); - if (childOperators == null) + if (childOperators == null) { return; + } - if ( fatalError ) + if (fatalError) { return; + } LOG.debug("Ending group for children:"); - for (Operator op: childOperators) + for (Operator op : childOperators) { op.endGroup(); + } LOG.debug("End group Done"); } private boolean allInitializedParentsAreClosed() { if (parentOperators != null) { - for(Operator parent: parentOperators) { - if (!(parent.state == State.CLOSE || - parent.state == State.UNINIT)) { + for (Operator parent : parentOperators) { + if (!(parent.state == State.CLOSE || parent.state == State.UNINIT)) { return false; } } @@ -438,12 +477,14 @@ // more than 1 thread should call this close() function. public void close(boolean abort) throws HiveException { - if (state == State.CLOSE) + if (state == State.CLOSE) { return; + } // check if all parents are finished - if (!allInitializedParentsAreClosed()) + if (!allInitializedParentsAreClosed()) { return; + } // set state as CLOSE as long as all parents are closed // state == CLOSE doesn't mean all children are also in state CLOSE @@ -463,10 +504,11 @@ try { logStats(); - if(childOperators == null) + if (childOperators == null) { return; + } - for(Operator op: childOperators) { + for (Operator op : childOperators) { op.close(abort); } @@ -478,33 +520,35 @@ } /** - * Operator specific close routine. Operators which inherents this - * class should overwrite this funtion for their specific cleanup - * routine. + * Operator specific close routine. Operators which inherents this class + * should overwrite this funtion for their specific cleanup routine. */ protected void closeOp(boolean abort) throws HiveException { } - /** * Unlike other operator interfaces which are called from map or reduce task, * jobClose is called from the jobclient side once the job has completed - * - * @param conf Configuration with with which job was submitted - * @param success whether the job was completed successfully or not + * + * @param conf + * Configuration with with which job was submitted + * @param success + * whether the job was completed successfully or not */ - public void jobClose(Configuration conf, boolean success) throws HiveException { - if(childOperators == null) + public void jobClose(Configuration conf, boolean success) + throws HiveException { + if (childOperators == null) { return; + } - for(Operator op: childOperators) { + for (Operator op : childOperators) { op.jobClose(conf, success); } } /** - * Cache childOperators in an array for faster access. childOperatorsArray is accessed - * per row, so it's important to make the access efficient. + * Cache childOperators in an array for faster access. childOperatorsArray is + * accessed per row, so it's important to make the access efficient. */ transient protected Operator[] childOperatorsArray = null; transient protected int[] childOperatorsTag; @@ -513,54 +557,69 @@ transient private long cntr = 0; transient private long nextCntr = 1; - /** - * Replace one child with another at the same position. The parent of the child is not changed - * @param child the old child - * @param newChild the new child + /** + * Replace one child with another at the same position. The parent of the + * child is not changed + * + * @param child + * the old child + * @param newChild + * the new child */ - public void replaceChild(Operator child, Operator newChild) { + public void replaceChild(Operator child, + Operator newChild) { int childIndex = childOperators.indexOf(child); assert childIndex != -1; childOperators.set(childIndex, newChild); } - public void removeChild(Operator child) { + public void removeChild(Operator child) { int childIndex = childOperators.indexOf(child); assert childIndex != -1; - if (childOperators.size() == 1) + if (childOperators.size() == 1) { childOperators = null; - else + } else { childOperators.remove(childIndex); + } int parentIndex = child.getParentOperators().indexOf(this); assert parentIndex != -1; - if (child.getParentOperators().size() == 1) + if (child.getParentOperators().size() == 1) { child.setParentOperators(null); - else + } else { child.getParentOperators().remove(parentIndex); + } } /** - * Replace one parent with another at the same position. Chilren of the new parent are not updated - * @param parent the old parent - * @param newParent the new parent + * Replace one parent with another at the same position. Chilren of the new + * parent are not updated + * + * @param parent + * the old parent + * @param newParent + * the new parent */ - public void replaceParent(Operator parent, Operator newParent) { + public void replaceParent(Operator parent, + Operator newParent) { int parentIndex = parentOperators.indexOf(parent); assert parentIndex != -1; parentOperators.set(parentIndex, newParent); } private long getNextCntr(long cntr) { - // A very simple counter to keep track of number of rows processed by an operator. It dumps + // A very simple counter to keep track of number of rows processed by an + // operator. It dumps // every 1 million times, and quickly before that - if (cntr >= 1000000) + if (cntr >= 1000000) { return cntr + 1000000; + } return 10 * cntr; } - protected void forward(Object row, ObjectInspector rowInspector) throws HiveException { + protected void forward(Object row, ObjectInspector rowInspector) + throws HiveException { if ((++outputRows % 1000) == 0) { if (counterNameToEnum != null) { @@ -578,14 +637,17 @@ } // For debugging purposes: - // System.out.println("" + this.getClass() + ": " + SerDeUtils.getJSONString(row, rowInspector)); - // System.out.println("" + this.getClass() + ">> " + ObjectInspectorUtils.getObjectInspectorName(rowInspector)); + // System.out.println("" + this.getClass() + ": " + + // SerDeUtils.getJSONString(row, rowInspector)); + // System.out.println("" + this.getClass() + ">> " + + // ObjectInspectorUtils.getObjectInspectorName(rowInspector)); if (childOperatorsArray == null && childOperators != null) { - throw new HiveException("Internal Hive error during operator initialization."); + throw new HiveException( + "Internal Hive error during operator initialization."); } - if((childOperatorsArray == null) || (getDone())) { + if ((childOperatorsArray == null) || (getDone())) { return; } @@ -593,7 +655,7 @@ for (int i = 0; i < childOperatorsArray.length; i++) { Operator o = childOperatorsArray[i]; if (o.getDone()) { - childrenDone ++; + childrenDone++; } else { o.process(row, childOperatorsTag[i]); } @@ -606,7 +668,7 @@ } public void resetStats() { - for(Enum e: statsMap.keySet()) { + for (Enum e : statsMap.keySet()) { statsMap.get(e).set(0L); } } @@ -615,23 +677,24 @@ public void func(Operator op); } - public void preorderMap (OperatorFunc opFunc) { + public void preorderMap(OperatorFunc opFunc) { opFunc.func(this); - if(childOperators != null) { - for(Operator o: childOperators) { + if (childOperators != null) { + for (Operator o : childOperators) { o.preorderMap(opFunc); } } } - public void logStats () { - for(Enum e: statsMap.keySet()) { + public void logStats() { + for (Enum e : statsMap.keySet()) { LOG.info(e.toString() + ":" + statsMap.get(e).toString()); } } /** * Implements the getName function for the Node Interface. + * * @return the name of the operator */ public String getName() { @@ -639,8 +702,9 @@ } /** - * Returns a map of output column name to input expression map - * Note that currently it returns only key columns for ReduceSink and GroupBy operators + * Returns a map of output column name to input expression map Note that + * currently it returns only key columns for ReduceSink and GroupBy operators + * * @return null if the operator doesn't change columns */ public Map getColumnExprMap() { @@ -657,7 +721,7 @@ } StringBuilder s = new StringBuilder(); s.append("\n"); - while(level > 0) { + while (level > 0) { s.append(" "); level--; } @@ -669,8 +733,9 @@ } public String dump(int level, HashSet seenOpts) { - if ( seenOpts.contains(new Integer(id))) + if (seenOpts.contains(new Integer(id))) { return null; + } seenOpts.add(new Integer(id)); StringBuilder s = new StringBuilder(); @@ -683,7 +748,7 @@ s.append(ls); s.append(" "); for (Operator o : childOperators) { - s.append(o.dump(level+2, seenOpts)); + s.append(o.dump(level + 2, seenOpts)); } s.append(ls); s.append(" <\\Children>"); @@ -694,7 +759,7 @@ s.append(" "); for (Operator o : parentOperators) { s.append("Id = " + o.id + " "); - s.append(o.dump(level,seenOpts)); + s.append(o.dump(level, seenOpts)); } s.append("<\\Parent>"); } @@ -711,7 +776,7 @@ protected static ObjectInspector[] initEvaluators(ExprNodeEvaluator[] evals, ObjectInspector rowInspector) throws HiveException { ObjectInspector[] result = new ObjectInspector[evals.length]; - for (int i=0; i outputColName, ObjectInspector rowInspector) - throws HiveException { - ObjectInspector[] fieldObjectInspectors = initEvaluators(evals, rowInspector); + ExprNodeEvaluator[] evals, List outputColName, + ObjectInspector rowInspector) throws HiveException { + ObjectInspector[] fieldObjectInspectors = initEvaluators(evals, + rowInspector); return ObjectInspectorFactory.getStandardStructObjectInspector( - outputColName, - Arrays.asList(fieldObjectInspectors)); + outputColName, Arrays.asList(fieldObjectInspectors)); } /** @@ -738,46 +803,7 @@ * TODO This is a hack for hadoop 0.17 which only supports enum counters */ public static enum ProgressCounter { - C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, - C11, C12, C13, C14, C15, C16, C17, C18, C19, C20, - C21, C22, C23, C24, C25, C26, C27, C28, C29, C30, - C31, C32, C33, C34, C35, C36, C37, C38, C39, C40, - C41, C42, C43, C44, C45, C46, C47, C48, C49, C50, - C51, C52, C53, C54, C55, C56, C57, C58, C59, C60, - C61, C62, C63, C64, C65, C66, C67, C68, C69, C70, - C71, C72, C73, C74, C75, C76, C77, C78, C79, C80, - C81, C82, C83, C84, C85, C86, C87, C88, C89, C90, - C91, C92, C93, C94, C95, C96, C97, C98, C99, C100, - C101, C102, C103, C104, C105, C106, C107, C108, C109, C110, - C111, C112, C113, C114, C115, C116, C117, C118, C119, C120, - C121, C122, C123, C124, C125, C126, C127, C128, C129, C130, - C131, C132, C133, C134, C135, C136, C137, C138, C139, C140, - C141, C142, C143, C144, C145, C146, C147, C148, C149, C150, - C151, C152, C153, C154, C155, C156, C157, C158, C159, C160, - C161, C162, C163, C164, C165, C166, C167, C168, C169, C170, - C171, C172, C173, C174, C175, C176, C177, C178, C179, C180, - C181, C182, C183, C184, C185, C186, C187, C188, C189, C190, - C191, C192, C193, C194, C195, C196, C197, C198, C199, C200, - C201, C202, C203, C204, C205, C206, C207, C208, C209, C210, - C211, C212, C213, C214, C215, C216, C217, C218, C219, C220, - C221, C222, C223, C224, C225, C226, C227, C228, C229, C230, - C231, C232, C233, C234, C235, C236, C237, C238, C239, C240, - C241, C242, C243, C244, C245, C246, C247, C248, C249, C250, - C251, C252, C253, C254, C255, C256, C257, C258, C259, C260, - C261, C262, C263, C264, C265, C266, C267, C268, C269, C270, - C271, C272, C273, C274, C275, C276, C277, C278, C279, C280, - C281, C282, C283, C284, C285, C286, C287, C288, C289, C290, - C291, C292, C293, C294, C295, C296, C297, C298, C299, C300, - C301, C302, C303, C304, C305, C306, C307, C308, C309, C310, - C311, C312, C313, C314, C315, C316, C317, C318, C319, C320, - C321, C322, C323, C324, C325, C326, C327, C328, C329, C330, - C331, C332, C333, C334, C335, C336, C337, C338, C339, C340, - C341, C342, C343, C344, C345, C346, C347, C348, C349, C350, - C351, C352, C353, C354, C355, C356, C357, C358, C359, C360, - C361, C362, C363, C364, C365, C366, C367, C368, C369, C370, - C371, C372, C373, C374, C375, C376, C377, C378, C379, C380, - C381, C382, C383, C384, C385, C386, C387, C388, C389, C390, - C391, C392, C393, C394, C395, C396, C397, C398, C399, C400 + C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11, C12, C13, C14, C15, C16, C17, C18, C19, C20, C21, C22, C23, C24, C25, C26, C27, C28, C29, C30, C31, C32, C33, C34, C35, C36, C37, C38, C39, C40, C41, C42, C43, C44, C45, C46, C47, C48, C49, C50, C51, C52, C53, C54, C55, C56, C57, C58, C59, C60, C61, C62, C63, C64, C65, C66, C67, C68, C69, C70, C71, C72, C73, C74, C75, C76, C77, C78, C79, C80, C81, C82, C83, C84, C85, C86, C87, C88, C89, C90, C91, C92, C93, C94, C95, C96, C97, C98, C99, C100, C101, C102, C103, C104, C105, C106, C107, C108, C109, C110, C111, C112, C113, C114, C115, C116, C117, C118, C119, C120, C121, C122, C123, C124, C125, C126, C127, C128, C129, C130, C131, C132, C133, C134, C135, C136, C137, C138, C139, C140, C141, C142, C143, C144, C145, C146, C147, C148, C149, C150, C151, C152, C153, C154, C155, C156, C157, C158, C159, C160, C161, C162, C163, C164, C165, C166, C167, C168, C169, C170, C171, C172, C173, C174, C175, C176, C177, C178, C179, C180, C181, C182, C183, C184, C185, C186, C187, C188, C189, C190, C191, C192, C193, C194, C195, C196, C197, C198, C199, C200, C201, C202, C203, C204, C205, C206, C207, C208, C209, C210, C211, C212, C213, C214, C215, C216, C217, C218, C219, C220, C221, C222, C223, C224, C225, C226, C227, C228, C229, C230, C231, C232, C233, C234, C235, C236, C237, C238, C239, C240, C241, C242, C243, C244, C245, C246, C247, C248, C249, C250, C251, C252, C253, C254, C255, C256, C257, C258, C259, C260, C261, C262, C263, C264, C265, C266, C267, C268, C269, C270, C271, C272, C273, C274, C275, C276, C277, C278, C279, C280, C281, C282, C283, C284, C285, C286, C287, C288, C289, C290, C291, C292, C293, C294, C295, C296, C297, C298, C299, C300, C301, C302, C303, C304, C305, C306, C307, C308, C309, C310, C311, C312, C313, C314, C315, C316, C317, C318, C319, C320, C321, C322, C323, C324, C325, C326, C327, C328, C329, C330, C331, C332, C333, C334, C335, C336, C337, C338, C339, C340, C341, C342, C343, C344, C345, C346, C347, C348, C349, C350, C351, C352, C353, C354, C355, C356, C357, C358, C359, C360, C361, C362, C363, C364, C365, C366, C367, C368, C369, C370, C371, C372, C373, C374, C375, C376, C377, C378, C379, C380, C381, C382, C383, C384, C385, C386, C387, C388, C389, C390, C391, C392, C393, C394, C395, C396, C397, C398, C399, C400 }; private static int totalNumCntrs = 400; @@ -788,9 +814,8 @@ transient protected Map counters; /** - * keeps track of unique ProgressCounter enums used - * this value is used at compile time while assigning ProgressCounter - * enums to counter names + * keeps track of unique ProgressCounter enums used this value is used at + * compile time while assigning ProgressCounter enums to counter names */ private static int lastEnumUsed; @@ -804,15 +829,14 @@ /** * this is called before operator process to buffer some counters */ - private void preProcessCounter() - { + private void preProcessCounter() { inputRows++; if (counterNameToEnum != null) { if ((inputRows % 1000) == 0) { incrCounter(numInputRowsCntr, inputRows); incrCounter(timeTakenCntr, totalTime); - inputRows = 0 ; + inputRows = 0; totalTime = 0; } beginTime = System.currentTimeMillis(); @@ -822,28 +846,31 @@ /** * this is called after operator process to buffer some counters */ - private void postProcessCounter() - { - if (counterNameToEnum != null) + private void postProcessCounter() { + if (counterNameToEnum != null) { totalTime += (System.currentTimeMillis() - beginTime); + } } - /** * this is called in operators in map or reduce tasks + * * @param name * @param amount */ - protected void incrCounter(String name, long amount) - { + protected void incrCounter(String name, long amount) { String counterName = "CNTR_NAME_" + getOperatorId() + "_" + name; ProgressCounter pc = counterNameToEnum.get(counterName); - // Currently, we maintain fixed number of counters per plan - in case of a bigger tree, we may run out of them - if (pc == null) - LOG.warn("Using too many counters. Increase the total number of counters for " + counterName); - else if (reporter != null) + // Currently, we maintain fixed number of counters per plan - in case of a + // bigger tree, we may run out of them + if (pc == null) { + LOG + .warn("Using too many counters. Increase the total number of counters for " + + counterName); + } else if (reporter != null) { reporter.incrCounter(pc, amount); + } } public ArrayList getCounterNames() { @@ -872,7 +899,9 @@ /** * called in ExecDriver.progress periodically - * @param ctrs counters from the running job + * + * @param ctrs + * counters from the running job */ @SuppressWarnings("unchecked") public void updateCounters(Counters ctrs) { @@ -880,85 +909,101 @@ counters = new HashMap(); } - // For some old unit tests, the counters will not be populated. Eventually, the old tests should be removed - if (counterNameToEnum == null) + // For some old unit tests, the counters will not be populated. Eventually, + // the old tests should be removed + if (counterNameToEnum == null) { return; + } - for (Map.Entry counter: counterNameToEnum.entrySet()) { + for (Map.Entry counter : counterNameToEnum + .entrySet()) { counters.put(counter.getKey(), ctrs.getCounter(counter.getValue())); } // update counters of child operators // this wont be an infinite loop since the operator graph is acyclic // but, some operators may be updated more than once and that's ok if (getChildren() != null) { - for (Node op: getChildren()) { - ((Operator)op).updateCounters(ctrs); + for (Node op : getChildren()) { + ((Operator) op).updateCounters(ctrs); } } } /** - * Recursively check this operator and its descendants to see if the - * fatal error counter is set to non-zero. + * Recursively check this operator and its descendants to see if the fatal + * error counter is set to non-zero. + * * @param ctrs */ public boolean checkFatalErrors(Counters ctrs, StringBuffer errMsg) { - if ( counterNameToEnum == null ) + if (counterNameToEnum == null) { return false; - + } + String counterName = "CNTR_NAME_" + getOperatorId() + "_" + fatalErrorCntr; ProgressCounter pc = counterNameToEnum.get(counterName); - // Currently, we maintain fixed number of counters per plan - in case of a bigger tree, we may run out of them - if (pc == null) - LOG.warn("Using too many counters. Increase the total number of counters for " + counterName); - else { + // Currently, we maintain fixed number of counters per plan - in case of a + // bigger tree, we may run out of them + if (pc == null) { + LOG + .warn("Using too many counters. Increase the total number of counters for " + + counterName); + } else { long value = ctrs.getCounter(pc); fatalErrorMessage(errMsg, value); - if ( value != 0 ) + if (value != 0) { return true; + } } - + if (getChildren() != null) { - for (Node op: getChildren()) { - if (((Operator)op).checkFatalErrors(ctrs, errMsg)) { + for (Node op : getChildren()) { + if (((Operator) op).checkFatalErrors(ctrs, + errMsg)) { return true; } } } return false; } - - /** + + /** * Get the fatal error message based on counter's code. - * @param errMsg error message should be appended to this output parameter. - * @param counterValue input counter code. + * + * @param errMsg + * error message should be appended to this output parameter. + * @param counterValue + * input counter code. */ protected void fatalErrorMessage(StringBuffer errMsg, long counterValue) { } - + // A given query can have multiple map-reduce jobs public static void resetLastEnumUsed() { lastEnumUsed = 0; } /** - * Called only in SemanticAnalyzer after all operators have added their - * own set of counter names + * Called only in SemanticAnalyzer after all operators have added their own + * set of counter names */ public void assignCounterNameToEnum() { if (counterNameToEnum != null) { return; } counterNameToEnum = new HashMap(); - for (String counterName: getCounterNames()) { + for (String counterName : getCounterNames()) { ++lastEnumUsed; // TODO Hack for hadoop-0.17 - // Currently, only maximum number of 'totalNumCntrs' can be used. If you want - // to add more counters, increase the number of counters in ProgressCounter + // Currently, only maximum number of 'totalNumCntrs' can be used. If you + // want + // to add more counters, increase the number of counters in + // ProgressCounter if (lastEnumUsed > totalNumCntrs) { - LOG.warn("Using too many counters. Increase the total number of counters"); + LOG + .warn("Using too many counters. Increase the total number of counters"); return; } String enumName = "C" + lastEnumUsed; @@ -967,10 +1012,10 @@ } } - protected static String numInputRowsCntr = "NUM_INPUT_ROWS"; + protected static String numInputRowsCntr = "NUM_INPUT_ROWS"; protected static String numOutputRowsCntr = "NUM_OUTPUT_ROWS"; - protected static String timeTakenCntr = "TIME_TAKEN"; - protected static String fatalErrorCntr = "FATAL_ERROR"; + protected static String timeTakenCntr = "TIME_TAKEN"; + protected static String fatalErrorCntr = "FATAL_ERROR"; public void initializeCounters() { initOperatorId(); @@ -986,32 +1031,32 @@ } /* - * By default, the list is empty - if an operator wants to add more counters, it should override this method - * and provide the new list. - + * By default, the list is empty - if an operator wants to add more counters, + * it should override this method and provide the new list. */ private List getAdditionalCounters() { return null; } - + public HashMap getCounterNameToEnum() { return counterNameToEnum; } - public void setCounterNameToEnum(HashMap counterNameToEnum) { + public void setCounterNameToEnum( + HashMap counterNameToEnum) { this.counterNameToEnum = counterNameToEnum; } - /** - * Should be overridden to return the type of the specific operator among - * the types in OperatorType - * - * @return OperatorType.* or -1 if not overridden - */ - public int getType() { - assert false; - return -1; - } + /** + * Should be overridden to return the type of the specific operator among the + * types in OperatorType + * + * @return OperatorType.* or -1 if not overridden + */ + public int getType() { + assert false; + return -1; + } public void setGroupKeyObject(Object keyObject) { this.groupKeyObject = keyObject; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (working copy) @@ -18,48 +18,62 @@ package org.apache.hadoop.hive.ql.exec; -import java.io.*; -import java.text.SimpleDateFormat; -import java.util.*; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.Serializable; +import java.io.UnsupportedEncodingException; import java.net.URI; +import java.net.URLDecoder; import java.net.URLEncoder; -import java.net.URLDecoder; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Random; +import java.util.Set; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.LogFactory; -import org.apache.commons.lang.StringUtils; - +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.io.*; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; +import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; +import org.apache.hadoop.hive.ql.io.HiveKey; +import org.apache.hadoop.hive.ql.io.HiveOutputFormat; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.mapredWork; +import org.apache.hadoop.hive.ql.plan.partitionDesc; +import org.apache.hadoop.hive.ql.plan.api.StageType; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.Counters; +import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; +import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.RunningJob; import org.apache.hadoop.mapred.TaskCompletionEvent; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.hive.ql.plan.mapredWork; -import org.apache.hadoop.hive.ql.plan.partitionDesc; -import org.apache.hadoop.hive.ql.plan.api.StageType; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; -import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; -import org.apache.hadoop.hive.ql.io.*; -import org.apache.hadoop.hive.ql.DriverContext; -import org.apache.hadoop.hive.ql.QueryPlan; -import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; -import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.log4j.BasicConfigurator; import org.apache.log4j.varia.NullAppender; -import java.lang.ClassNotFoundException; -import org.apache.hadoop.hive.common.FileUtils; public class ExecDriver extends Task implements Serializable { @@ -69,8 +83,9 @@ transient protected int mapProgress = 0; transient protected int reduceProgress = 0; transient protected boolean success = false; // if job execution is successful - + public static Random randGen = new Random(); + /** * Constructor when invoked from QL */ @@ -78,7 +93,8 @@ super(); } - public static String getResourceFiles(Configuration conf, SessionState.ResourceType t) { + public static String getResourceFiles(Configuration conf, + SessionState.ResourceType t) { // fill in local files to be added to the task environment SessionState ss = SessionState.get(); Set files = (ss == null) ? null : ss.list_resource(t, null); @@ -109,7 +125,8 @@ * Initialization when invoked from QL */ @Override - public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) { + public void initialize(HiveConf conf, QueryPlan queryPlan, + DriverContext driverContext) { super.initialize(conf, queryPlan, driverContext); job = new JobConf(conf, ExecDriver.class); // NOTE: initialize is only called if it is in non-local mode. @@ -127,7 +144,8 @@ if (StringUtils.isNotBlank(addedJars)) { HiveConf.setVar(job, ConfVars.HIVEADDEDJARS, addedJars); } - String addedArchives = getResourceFiles(job, SessionState.ResourceType.ARCHIVE); + String addedArchives = getResourceFiles(job, + SessionState.ResourceType.ARCHIVE); if (StringUtils.isNotBlank(addedArchives)) { HiveConf.setVar(job, ConfVars.HIVEADDEDARCHIVES, addedArchives); } @@ -149,35 +167,35 @@ * used to kill all running jobs in the event of an unexpected shutdown - * i.e., the JVM shuts down while there are still jobs running. */ - public static Map runningJobKillURIs - = Collections.synchronizedMap(new HashMap()); + public static Map runningJobKillURIs = Collections + .synchronizedMap(new HashMap()); /** * In Hive, when the user control-c's the command line, any running jobs * spawned from that command line are best-effort killed. - * + * * This static constructor registers a shutdown thread to iterate over all the * running job kill URLs and do a get on them. - * + * */ static { if (new org.apache.hadoop.conf.Configuration().getBoolean( "webinterface.private.actions", false)) { Runtime.getRuntime().addShutdownHook(new Thread() { + @Override public void run() { - synchronized(runningJobKillURIs) { - for (Iterator elems = runningJobKillURIs.values().iterator(); elems - .hasNext();) { - String uri = elems.next(); + synchronized (runningJobKillURIs) { + for (String uri : runningJobKillURIs.values()) { try { System.err.println("killing job with: " + uri); - java.net.HttpURLConnection conn = (java.net.HttpURLConnection) - new java.net.URL(uri).openConnection(); + java.net.HttpURLConnection conn = (java.net.HttpURLConnection) new java.net.URL( + uri).openConnection(); conn.setRequestMethod("POST"); int retCode = conn.getResponseCode(); if (retCode != 200) { - System.err.println("Got an error trying to kill job with URI: " - + uri + " = " + retCode); + System.err + .println("Got an error trying to kill job with URI: " + + uri + " = " + retCode); } } catch (Exception e) { System.err.println("trying to kill job, caught: " + e); @@ -200,11 +218,11 @@ String hp = job.get("mapred.job.tracker"); if (SessionState.get() != null) { SessionState.get().getHiveHistory().setTaskProperty( - SessionState.get().getQueryId(), getId(), - Keys.TASK_HADOOP_ID, rj.getJobID()); + SessionState.get().getQueryId(), getId(), Keys.TASK_HADOOP_ID, + rj.getJobID()); } - console.printInfo(ExecDriver.getJobEndMsg(rj.getJobID()) + ", Tracking URL = " - + rj.getTrackingURL()); + console.printInfo(ExecDriver.getJobEndMsg(rj.getJobID()) + + ", Tracking URL = " + rj.getTrackingURL()); console.printInfo("Kill Command = " + HiveConf.getVar(job, HiveConf.ConfVars.HADOOPBIN) + " job -Dmapred.job.tracker=" + hp + " -kill " + rj.getJobID()); @@ -212,66 +230,73 @@ } /** - * This class contains the state of the running task - * Going forward, we will return this handle from execute - * and Driver can split execute into start, monitorProgess and postProcess + * This class contains the state of the running task Going forward, we will + * return this handle from execute and Driver can split execute into start, + * monitorProgess and postProcess */ public static class ExecDriverTaskHandle extends TaskHandle { JobClient jc; RunningJob rj; + JobClient getJobClient() { return jc; } + RunningJob getRunningJob() { return rj; } + public ExecDriverTaskHandle(JobClient jc, RunningJob rj) { this.jc = jc; this.rj = rj; } + public void setRunningJob(RunningJob job) { - this.rj = job; + rj = job; } + public Counters getCounters() throws IOException { return rj.getCounters(); } } - + /** * Fatal errors are those errors that cannot be recovered by retries. These - * are application dependent. Examples of fatal errors include: - * - the small table in the map-side joins is too large to be feasible to be - * handled by one mapper. The job should fail and the user should be warned - * to use regular joins rather than map-side joins. - * Fatal errors are indicated by counters that are set at execution time. - * If the counter is non-zero, a fatal error occurred. The value of the counter - * indicates the error type. - * @return true if fatal errors happened during job execution, false otherwise. + * are application dependent. Examples of fatal errors include: - the small + * table in the map-side joins is too large to be feasible to be handled by + * one mapper. The job should fail and the user should be warned to use + * regular joins rather than map-side joins. Fatal errors are indicated by + * counters that are set at execution time. If the counter is non-zero, a + * fatal error occurred. The value of the counter indicates the error type. + * + * @return true if fatal errors happened during job execution, false + * otherwise. */ protected boolean checkFatalErrors(TaskHandle t, StringBuffer errMsg) { ExecDriverTaskHandle th = (ExecDriverTaskHandle) t; RunningJob rj = th.getRunningJob(); try { Counters ctrs = th.getCounters(); - for (Operator op: work.getAliasToWork().values()) { - if (op.checkFatalErrors(ctrs, errMsg)) + for (Operator op : work.getAliasToWork().values()) { + if (op.checkFatalErrors(ctrs, errMsg)) { return true; + } } return false; - } catch (IOException e) { + } catch (IOException e) { // this exception can be tolerated e.printStackTrace(); return false; } } - + public void progress(TaskHandle taskHandle) throws IOException { - ExecDriverTaskHandle th = (ExecDriverTaskHandle)taskHandle; + ExecDriverTaskHandle th = (ExecDriverTaskHandle) taskHandle; JobClient jc = th.getJobClient(); RunningJob rj = th.getRunningJob(); String lastReport = ""; - SimpleDateFormat dateFormat - = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS"); + SimpleDateFormat dateFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 60 * 1000; // One minute boolean fatal = false; @@ -282,38 +307,41 @@ } catch (InterruptedException e) { } th.setRunningJob(jc.getJob(rj.getJobID())); - + // If fatal errors happen we should kill the job immediately rather than // let the job retry several times, which eventually lead to failure. - if (fatal) - continue; // wait until rj.isComplete - if ( fatal = checkFatalErrors(th, errMsg)) { + if (fatal) { + continue; // wait until rj.isComplete + } + if (fatal = checkFatalErrors(th, errMsg)) { success = false; - console.printError("[Fatal Error] " + errMsg.toString() + ". Killing the job."); + console.printError("[Fatal Error] " + errMsg.toString() + + ". Killing the job."); rj.killJob(); continue; } errMsg.setLength(0); - + updateCounters(th); - String report = " "+getId()+" map = " + this.mapProgress + "%, reduce = " + this.reduceProgress + "%"; - + String report = " " + getId() + " map = " + mapProgress + "%, reduce = " + + reduceProgress + "%"; + if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { // write out serialized plan with counters to log file // LOG.info(queryPlan); - String output = dateFormat.format(Calendar.getInstance().getTime()) + report; + String output = dateFormat.format(Calendar.getInstance().getTime()) + + report; SessionState ss = SessionState.get(); if (ss != null) { - ss.getHiveHistory().setTaskCounters( - SessionState.get().getQueryId(), getId(), rj); - ss.getHiveHistory().setTaskProperty( - SessionState.get().getQueryId(), getId(), - Keys.TASK_HADOOP_PROGRESS, output); - ss.getHiveHistory().progressTask( - SessionState.get().getQueryId(), this); + ss.getHiveHistory().setTaskCounters(SessionState.get().getQueryId(), + getId(), rj); + ss.getHiveHistory().setTaskProperty(SessionState.get().getQueryId(), + getId(), Keys.TASK_HADOOP_PROGRESS, output); + ss.getHiveHistory().progressTask(SessionState.get().getQueryId(), + this); ss.getHiveHistory().logPlanProgress(queryPlan); } console.printInfo(output); @@ -321,13 +349,15 @@ reportTime = System.currentTimeMillis(); } } - // check for fatal error again in case it occurred after the last check before the job is completed - if ( !fatal && (fatal = checkFatalErrors(th, errMsg))) { + // check for fatal error again in case it occurred after the last check + // before the job is completed + if (!fatal && (fatal = checkFatalErrors(th, errMsg))) { console.printError("[Fatal Error] " + errMsg.toString()); success = false; - } else + } else { success = rj.isSuccessful(); - + } + setDone(); th.setRunningJob(jc.getJob(rj.getJobID())); updateCounters(th); @@ -335,15 +365,17 @@ if (ss != null) { ss.getHiveHistory().logPlanProgress(queryPlan); } - //LOG.info(queryPlan); + // LOG.info(queryPlan); } /** * Estimate the number of reducers needed for this job, based on job input, * and configuration parameters. + * * @return the number of reducers. */ - public int estimateNumberOfReducers(HiveConf hive, JobConf job, mapredWork work) throws IOException { + public int estimateNumberOfReducers(HiveConf hive, JobConf job, + mapredWork work) throws IOException { if (hive == null) { hive = new HiveConf(); } @@ -351,10 +383,10 @@ int maxReducers = hive.getIntVar(HiveConf.ConfVars.MAXREDUCERS); long totalInputFileSize = getTotalInputFileSize(job, work); - LOG.info("BytesPerReducer=" + bytesPerReducer + " maxReducers=" + maxReducers - + " totalInputFileSize=" + totalInputFileSize); + LOG.info("BytesPerReducer=" + bytesPerReducer + " maxReducers=" + + maxReducers + " totalInputFileSize=" + totalInputFileSize); - int reducers = (int)((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer); + int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer); reducers = Math.max(1, reducers); reducers = Math.min(maxReducers, reducers); return reducers; @@ -367,41 +399,54 @@ // this is a temporary hack to fix things that are not fixed in the compiler Integer numReducersFromWork = work.getNumReduceTasks(); - if(work.getReducer() == null) { - console.printInfo("Number of reduce tasks is set to 0 since there's no reduce operator"); + if (work.getReducer() == null) { + console + .printInfo("Number of reduce tasks is set to 0 since there's no reduce operator"); work.setNumReduceTasks(Integer.valueOf(0)); } else { if (numReducersFromWork >= 0) { - console.printInfo("Number of reduce tasks determined at compile time: " + work.getNumReduceTasks()); + console.printInfo("Number of reduce tasks determined at compile time: " + + work.getNumReduceTasks()); } else if (job.getNumReduceTasks() > 0) { int reducers = job.getNumReduceTasks(); work.setNumReduceTasks(reducers); - console.printInfo("Number of reduce tasks not specified. Defaulting to jobconf value of: " + reducers); + console + .printInfo("Number of reduce tasks not specified. Defaulting to jobconf value of: " + + reducers); } else { int reducers = estimateNumberOfReducers(conf, job, work); work.setNumReduceTasks(reducers); - console.printInfo("Number of reduce tasks not specified. Estimated from input data size: " + reducers); + console + .printInfo("Number of reduce tasks not specified. Estimated from input data size: " + + reducers); } - console.printInfo("In order to change the average load for a reducer (in bytes):"); - console.printInfo(" set " + HiveConf.ConfVars.BYTESPERREDUCER.varname + "="); + console + .printInfo("In order to change the average load for a reducer (in bytes):"); + console.printInfo(" set " + HiveConf.ConfVars.BYTESPERREDUCER.varname + + "="); console.printInfo("In order to limit the maximum number of reducers:"); - console.printInfo(" set " + HiveConf.ConfVars.MAXREDUCERS.varname + "="); + console.printInfo(" set " + HiveConf.ConfVars.MAXREDUCERS.varname + + "="); console.printInfo("In order to set a constant number of reducers:"); - console.printInfo(" set " + HiveConf.ConfVars.HADOOPNUMREDUCERS + "="); + console.printInfo(" set " + HiveConf.ConfVars.HADOOPNUMREDUCERS + + "="); } } /** * Calculate the total size of input files. - * @param job the hadoop job conf. + * + * @param job + * the hadoop job conf. * @return the total size in bytes. * @throws IOException */ - public long getTotalInputFileSize(JobConf job, mapredWork work) throws IOException { + public long getTotalInputFileSize(JobConf job, mapredWork work) + throws IOException { long r = 0; // For each input path, calculate the total size. - for (String path: work.getPathToAliases().keySet()) { + for (String path : work.getPathToAliases().keySet()) { try { Path p = new Path(path); FileSystem fs = p.getFileSystem(job); @@ -419,14 +464,16 @@ */ @Override public void updateCounters(TaskHandle t) throws IOException { - ExecDriverTaskHandle th = (ExecDriverTaskHandle)t; + ExecDriverTaskHandle th = (ExecDriverTaskHandle) t; RunningJob rj = th.getRunningJob(); - this.mapProgress = Math.round(rj.mapProgress() * 100); - this.reduceProgress = Math.round(rj.reduceProgress() * 100); - taskCounters.put("CNTR_NAME_" + getId() + "_MAP_PROGRESS", Long.valueOf(this.mapProgress)); - taskCounters.put("CNTR_NAME_" + getId() + "_REDUCE_PROGRESS", Long.valueOf(this.reduceProgress)); + mapProgress = Math.round(rj.mapProgress() * 100); + reduceProgress = Math.round(rj.reduceProgress() * 100); + taskCounters.put("CNTR_NAME_" + getId() + "_MAP_PROGRESS", Long + .valueOf(mapProgress)); + taskCounters.put("CNTR_NAME_" + getId() + "_REDUCE_PROGRESS", Long + .valueOf(reduceProgress)); Counters ctrs = th.getCounters(); - for (Operator op: work.getAliasToWork().values()) { + for (Operator op : work.getAliasToWork().values()) { op.updateCounters(ctrs); } if (work.getReducer() != null) { @@ -450,21 +497,20 @@ return reduceProgress == 100; } - /** * Execute a query plan using Hadoop */ public int execute() { - + success = true; - + try { setNumberOfReducers(); - } catch(IOException e) { + } catch (IOException e) { String statusMesg = "IOException while accessing HDFS to estimate the number of reducers: " - + e.getMessage(); + + e.getMessage(); console.printError(statusMesg, "\n" - + org.apache.hadoop.util.StringUtils.stringifyException(e)); + + org.apache.hadoop.util.StringUtils.stringifyException(e)); return 1; } @@ -473,16 +519,17 @@ throw new RuntimeException("Plan invalid, Reason: " + invalidReason); } - String hiveScratchDir = HiveConf.getVar(job, HiveConf.ConfVars.SCRATCHDIR); - String jobScratchDirStr = hiveScratchDir + File.separator+ Utilities.randGen.nextInt(); - Path jobScratchDir = new Path(jobScratchDirStr); + String jobScratchDirStr = hiveScratchDir + File.separator + + Utilities.randGen.nextInt(); + Path jobScratchDir = new Path(jobScratchDirStr); String emptyScratchDirStr = null; - Path emptyScratchDir = null; + Path emptyScratchDir = null; int numTries = 3; while (numTries > 0) { - emptyScratchDirStr = hiveScratchDir + File.separator + Utilities.randGen.nextInt(); + emptyScratchDirStr = hiveScratchDir + File.separator + + Utilities.randGen.nextInt(); emptyScratchDir = new Path(emptyScratchDirStr); try { @@ -490,10 +537,12 @@ fs.mkdirs(emptyScratchDir); break; } catch (Exception e) { - if (numTries > 0) + if (numTries > 0) { numTries--; - else - throw new RuntimeException("Failed to make dir " + emptyScratchDir.toString() + " : " + e.getMessage()); + } else { + throw new RuntimeException("Failed to make dir " + + emptyScratchDir.toString() + " : " + e.getMessage()); + } } } @@ -507,17 +556,19 @@ job.setReducerClass(ExecReducer.class); // Turn on speculative execution for reducers - HiveConf.setVar(job,HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, - HiveConf.getVar(job, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS)); + HiveConf.setVar(job, HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, + HiveConf.getVar(job, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS)); String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); - if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) + if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); + } LOG.info("Using " + inpFormat); try { - job.setInputFormat((Class)(Class.forName(inpFormat))); + job.setInputFormat((Class) (Class + .forName(inpFormat))); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage()); } @@ -526,14 +577,14 @@ job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); - // Transfer HIVEAUXJARS and HIVEADDEDJARS to "tmpjars" so hadoop understands it + // Transfer HIVEAUXJARS and HIVEADDEDJARS to "tmpjars" so hadoop understands + // it String auxJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEAUXJARS); String addedJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDJARS); if (StringUtils.isNotBlank(auxJars) || StringUtils.isNotBlank(addedJars)) { - String allJars = - StringUtils.isNotBlank(auxJars) - ? (StringUtils.isNotBlank(addedJars) ? addedJars + "," + auxJars : auxJars) - : addedJars; + String allJars = StringUtils.isNotBlank(auxJars) ? (StringUtils + .isNotBlank(addedJars) ? addedJars + "," + auxJars : auxJars) + : addedJars; LOG.info("adding libjars: " + allJars); initializeFiles("tmpjars", allJars); } @@ -544,7 +595,8 @@ initializeFiles("tmpfiles", addedFiles); } // Transfer HIVEADDEDARCHIVES to "tmparchives" so hadoop understands it - String addedArchives = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDARCHIVES); + String addedArchives = HiveConf.getVar(job, + HiveConf.ConfVars.HIVEADDEDARCHIVES); if (StringUtils.isNotBlank(addedArchives)) { initializeFiles("tmparchives", addedArchives); } @@ -552,12 +604,13 @@ int returnVal = 0; RunningJob rj = null, orig_rj = null; - boolean noName = StringUtils.isEmpty(HiveConf. - getVar(job,HiveConf.ConfVars.HADOOPJOBNAME)); + boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, + HiveConf.ConfVars.HADOOPJOBNAME)); - if(noName) { + if (noName) { // This is for a special case to ensure unit tests pass - HiveConf.setVar(job,HiveConf.ConfVars.HADOOPJOBNAME, "JOB"+randGen.nextInt()); + HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, "JOB" + + randGen.nextInt()); } try { @@ -565,20 +618,22 @@ Utilities.setMapRedWork(job, work); - // remove the pwd from conf file so that job tracker doesn't show this logs + // remove the pwd from conf file so that job tracker doesn't show this + // logs String pwd = job.get(HiveConf.ConfVars.METASTOREPWD.varname); - if (pwd != null) + if (pwd != null) { job.set(HiveConf.ConfVars.METASTOREPWD.varname, "HIVE"); + } JobClient jc = new JobClient(job); - // make this client wait if job trcker is not behaving well. Throttle.checkJobTracker(job, LOG); orig_rj = rj = jc.submitJob(job); // replace it back - if (pwd != null) + if (pwd != null) { job.set(HiveConf.ConfVars.METASTOREPWD.varname, pwd); + } // add to list of running jobs so in case of abnormal shutdown can kill // it. @@ -590,11 +645,12 @@ progress(th); // success status will be setup inside progress if (rj == null) { - // in the corner case where the running job has disappeared from JT memory + // in the corner case where the running job has disappeared from JT + // memory // remember that we did actually submit the job. rj = orig_rj; success = false; - } + } String statusMesg = getJobEndMsg(rj.getJobID()); if (!success) { @@ -636,41 +692,44 @@ try { if (rj != null) { - if(work.getAliasToWork() != null) { - for(Operator op: - work.getAliasToWork().values()) { + if (work.getAliasToWork() != null) { + for (Operator op : work.getAliasToWork() + .values()) { op.jobClose(job, success); } } - if(work.getReducer() != null) { + if (work.getReducer() != null) { work.getReducer().jobClose(job, success); } } } catch (Exception e) { // jobClose needs to execute successfully otherwise fail task - if(success) { + if (success) { success = false; returnVal = 3; - String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'"; + String mesg = "Job Commit failed with exception '" + + Utilities.getNameMessage(e) + "'"; console.printError(mesg, "\n" - + org.apache.hadoop.util.StringUtils.stringifyException(e)); + + org.apache.hadoop.util.StringUtils.stringifyException(e)); } } return (returnVal); } - + /** * this msg pattern is used to track when a job is started + * * @param jobId * @return */ public static String getJobStartMsg(String jobId) { return "Starting Job = " + jobId; } - + /** * this msg pattern is used to track when a job is successfully done. + * * @param jobId * @return */ @@ -678,29 +737,33 @@ return "Ended Job = " + jobId; } - private void showJobFailDebugInfo(JobConf conf, RunningJob rj) throws IOException { + private void showJobFailDebugInfo(JobConf conf, RunningJob rj) + throws IOException { Map failures = new HashMap(); - Set successes = new HashSet (); - Map taskToJob = new HashMap(); + Set successes = new HashSet(); + Map taskToJob = new HashMap(); int startIndex = 0; - while(true) { - TaskCompletionEvent[] taskCompletions = rj.getTaskCompletionEvents(startIndex); + while (true) { + TaskCompletionEvent[] taskCompletions = rj + .getTaskCompletionEvents(startIndex); - if(taskCompletions == null || taskCompletions.length == 0) { + if (taskCompletions == null || taskCompletions.length == 0) { break; } boolean more = true; - for(TaskCompletionEvent t : taskCompletions) { - // getTaskJobIDs return Strings for compatibility with Hadoop version without + for (TaskCompletionEvent t : taskCompletions) { + // getTaskJobIDs return Strings for compatibility with Hadoop version + // without // TaskID or TaskAttemptID - String [] taskJobIds = ShimLoader.getHadoopShims().getTaskJobIDs(t); + String[] taskJobIds = ShimLoader.getHadoopShims().getTaskJobIDs(t); - if(taskJobIds == null) { - console.printError("Task attempt info is unavailable in this Hadoop version"); + if (taskJobIds == null) { + console + .printError("Task attempt info is unavailable in this Hadoop version"); more = false; break; } @@ -709,9 +772,9 @@ String jobId = taskJobIds[1]; taskToJob.put(taskId, jobId); - if(t.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) { + if (t.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) { Integer failAttempts = failures.get(taskId); - if(failAttempts == null) { + if (failAttempts == null) { failAttempts = Integer.valueOf(0); } failAttempts = Integer.valueOf(failAttempts.intValue() + 1); @@ -720,36 +783,39 @@ successes.add(taskId); } } - if(!more) { + if (!more) { break; } startIndex += taskCompletions.length; } // Remove failures for tasks that succeeded - for(String task : successes) { + for (String task : successes) { failures.remove(task); } - if(failures.keySet().size() == 0) { + if (failures.keySet().size() == 0) { return; } // Find the highest failure count int maxFailures = 0; - for(Integer failCount : failures.values()) { - if(maxFailures < failCount.intValue()) + for (Integer failCount : failures.values()) { + if (maxFailures < failCount.intValue()) { maxFailures = failCount.intValue(); + } } // Display Error Message for tasks with the highest failure count - console.printError("\nFailed tasks with most" + "(" + maxFailures + ")" + " failures " + ": "); + console.printError("\nFailed tasks with most" + "(" + maxFailures + ")" + + " failures " + ": "); String jtUrl = JobTrackerURLResolver.getURL(conf); - for(String task : failures.keySet()) { - if(failures.get(task).intValue() == maxFailures) { + for (String task : failures.keySet()) { + if (failures.get(task).intValue() == maxFailures) { String jobId = taskToJob.get(task); - String taskUrl = jtUrl + "/taskdetails.jsp?jobid=" + jobId + "&tipid=" + task.toString(); - console.printError("Task URL: " + taskUrl +"\n"); + String taskUrl = jtUrl + "/taskdetails.jsp?jobid=" + jobId + "&tipid=" + + task.toString(); + console.printError("Task URL: " + taskUrl + "\n"); // Only print out one task because that's good enough for debugging. break; } @@ -845,14 +911,18 @@ // see also - code in CliDriver.java ClassLoader loader = conf.getClassLoader(); if (StringUtils.isNotBlank(auxJars)) { - loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ",")); + loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, + ",")); } if (StringUtils.isNotBlank(addedJars)) { - loader = Utilities.addToClassPath(loader, StringUtils.split(addedJars, ",")); + loader = Utilities.addToClassPath(loader, StringUtils.split( + addedJars, ",")); } conf.setClassLoader(loader); - // Also set this to the Thread ContextClassLoader, so new threads will inherit - // this class loader, and propagate into newly created Configurations by those + // Also set this to the Thread ContextClassLoader, so new threads will + // inherit + // this class loader, and propagate into newly created Configurations by + // those // new threads. Thread.currentThread().setContextClassLoader(loader); } catch (Exception e) { @@ -887,8 +957,9 @@ String oneProp = (String) one; if (localMode - && (oneProp.equals(hadoopSysDir) || oneProp.equals(hadoopWorkDir))) + && (oneProp.equals(hadoopSysDir) || oneProp.equals(hadoopWorkDir))) { continue; + } String oneValue = deltaP.getProperty(oneProp); @@ -941,8 +1012,9 @@ if (inpFs.exists(dirPath)) { FileStatus[] fStats = inpFs.listStatus(dirPath); - if (fStats.length > 0) + if (fStats.length > 0) { return false; + } } return true; } @@ -950,18 +1022,22 @@ /** * Handle a empty/null path for a given alias */ - private int addInputPath(String path, JobConf job, mapredWork work, String hiveScratchDir, int numEmptyPaths, - boolean isEmptyPath, String alias) throws Exception { + private int addInputPath(String path, JobConf job, mapredWork work, + String hiveScratchDir, int numEmptyPaths, boolean isEmptyPath, + String alias) throws Exception { // either the directory does not exist or it is empty assert path == null || isEmptyPath; // The input file does not exist, replace it by a empty file Class outFileFormat = null; - if (isEmptyPath) - outFileFormat = work.getPathToPartitionInfo().get(path).getTableDesc().getOutputFileFormatClass(); - else - outFileFormat = work.getAliasToPartnInfo().get(alias).getTableDesc().getOutputFileFormatClass(); + if (isEmptyPath) { + outFileFormat = work.getPathToPartitionInfo().get(path).getTableDesc() + .getOutputFileFormatClass(); + } else { + outFileFormat = work.getAliasToPartnInfo().get(alias).getTableDesc() + .getOutputFileFormatClass(); + } // create a dummy empty file in a new directory String newDir = hiveScratchDir + File.separator + (++numEmptyPaths); @@ -974,13 +1050,13 @@ LOG.info("Changed input file to " + newPath.toString()); // toggle the work - LinkedHashMap> pathToAliases = work.getPathToAliases(); + LinkedHashMap> pathToAliases = work + .getPathToAliases(); if (isEmptyPath) { assert path != null; pathToAliases.put(newPath.toUri().toString(), pathToAliases.get(path)); pathToAliases.remove(path); - } - else { + } else { assert path == null; ArrayList newList = new ArrayList(); newList.add(alias); @@ -989,25 +1065,28 @@ work.setPathToAliases(pathToAliases); - LinkedHashMap pathToPartitionInfo = work.getPathToPartitionInfo(); + LinkedHashMap pathToPartitionInfo = work + .getPathToPartitionInfo(); if (isEmptyPath) { - pathToPartitionInfo.put(newPath.toUri().toString(), pathToPartitionInfo.get(path)); + pathToPartitionInfo.put(newPath.toUri().toString(), pathToPartitionInfo + .get(path)); pathToPartitionInfo.remove(path); - } - else { + } else { partitionDesc pDesc = work.getAliasToPartnInfo().get(alias).clone(); pathToPartitionInfo.put(newPath.toUri().toString(), pDesc); } work.setPathToPartitionInfo(pathToPartitionInfo); String onefile = newPath.toString(); - RecordWriter recWriter = outFileFormat.newInstance().getHiveRecordWriter(job, newFilePath, Text.class, false, new Properties(), null); + RecordWriter recWriter = outFileFormat.newInstance().getHiveRecordWriter( + job, newFilePath, Text.class, false, new Properties(), null); recWriter.close(false); FileInputFormat.addInputPaths(job, onefile); return numEmptyPaths; } - private void addInputPaths(JobConf job, mapredWork work, String hiveScratchDir) throws Exception { + private void addInputPaths(JobConf job, mapredWork work, String hiveScratchDir) + throws Exception { int numEmptyPaths = 0; List pathsProcessed = new ArrayList(); @@ -1015,7 +1094,7 @@ // AliasToWork contains all the aliases for (String oneAlias : work.getAliasToWork().keySet()) { LOG.info("Processing alias " + oneAlias); - List emptyPaths = new ArrayList(); + List emptyPaths = new ArrayList(); // The alias may not have any path String path = null; @@ -1024,31 +1103,41 @@ if (aliases.contains(oneAlias)) { path = onefile; - // Multiple aliases can point to the same path - it should be processed only once - if (pathsProcessed.contains(path)) + // Multiple aliases can point to the same path - it should be + // processed only once + if (pathsProcessed.contains(path)) { continue; + } pathsProcessed.add(path); LOG.info("Adding input file " + path); - if (!isEmptyPath(job, path)) + if (!isEmptyPath(job, path)) { FileInputFormat.addInputPaths(job, path); - else + } else { emptyPaths.add(path); + } } } // Create a empty file if the directory is empty - for (String emptyPath : emptyPaths) - numEmptyPaths = addInputPath(emptyPath, job, work, hiveScratchDir, numEmptyPaths, true, oneAlias); + for (String emptyPath : emptyPaths) { + numEmptyPaths = addInputPath(emptyPath, job, work, hiveScratchDir, + numEmptyPaths, true, oneAlias); + } // If the query references non-existent partitions - // We need to add a empty file, it is not acceptable to change the operator tree + // We need to add a empty file, it is not acceptable to change the + // operator tree // Consider the query: - // select * from (select count(1) from T union all select count(1) from T2) x; - // If T is empty and T2 contains 100 rows, the user expects: 0, 100 (2 rows) - if (path == null) - numEmptyPaths = addInputPath(null, job, work, hiveScratchDir, numEmptyPaths, false, oneAlias); + // select * from (select count(1) from T union all select count(1) from + // T2) x; + // If T is empty and T2 contains 100 rows, the user expects: 0, 100 (2 + // rows) + if (path == null) { + numEmptyPaths = addInputPath(null, job, work, hiveScratchDir, + numEmptyPaths, false, oneAlias); + } } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/TaskResult.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/TaskResult.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TaskResult.java (working copy) @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec; -import java.util.*; /** * TaskResult implementation @@ -27,6 +26,7 @@ public class TaskResult { protected int exitVal; protected boolean runStatus; + public TaskResult() { exitVal = -1; setRunning(true); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/RecordReader.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/RecordReader.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/RecordReader.java (working copy) @@ -25,10 +25,10 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Writable; - public interface RecordReader { - public void initialize(InputStream in, Configuration conf, Properties tbl) throws IOException; + public void initialize(InputStream in, Configuration conf, Properties tbl) + throws IOException; public Writable createRow() throws IOException; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java (working copy) @@ -32,27 +32,34 @@ /** * Filter operator implementation **/ -public class FilterOperator extends Operator implements Serializable { +public class FilterOperator extends Operator implements + Serializable { private static final long serialVersionUID = 1L; - public static enum Counter {FILTERED, PASSED} + + public static enum Counter { + FILTERED, PASSED + } + transient private final LongWritable filtered_count, passed_count; transient private ExprNodeEvaluator conditionEvaluator; - transient private PrimitiveObjectInspector conditionInspector; + transient private PrimitiveObjectInspector conditionInspector; transient private int consecutiveFails; - transient int heartbeatInterval; - - public FilterOperator () { + transient int heartbeatInterval; + + public FilterOperator() { super(); filtered_count = new LongWritable(); passed_count = new LongWritable(); consecutiveFails = 0; } + @Override protected void initializeOp(Configuration hconf) throws HiveException { try { - heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT); - this.conditionEvaluator = ExprNodeEvaluatorFactory.get(conf.getPredicate()); + heartbeatInterval = HiveConf.getIntVar(hconf, + HiveConf.ConfVars.HIVESENDHEARTBEAT); + conditionEvaluator = ExprNodeEvaluatorFactory.get(conf.getPredicate()); statsMap.put(Counter.FILTERED, filtered_count); statsMap.put(Counter.PASSED, passed_count); conditionInspector = null; @@ -62,24 +69,29 @@ initializeChildren(hconf); } + @Override public void processOp(Object row, int tag) throws HiveException { ObjectInspector rowInspector = inputObjInspectors[tag]; if (conditionInspector == null) { - conditionInspector = (PrimitiveObjectInspector)conditionEvaluator.initialize(rowInspector); + conditionInspector = (PrimitiveObjectInspector) conditionEvaluator + .initialize(rowInspector); } Object condition = conditionEvaluator.evaluate(row); - Boolean ret = (Boolean)conditionInspector.getPrimitiveJavaObject(condition); + Boolean ret = (Boolean) conditionInspector + .getPrimitiveJavaObject(condition); if (Boolean.TRUE.equals(ret)) { forward(row, rowInspector); - passed_count.set(passed_count.get()+1); + passed_count.set(passed_count.get() + 1); consecutiveFails = 0; } else { - filtered_count.set(filtered_count.get()+1); + filtered_count.set(filtered_count.get() + 1); consecutiveFails++; - - // In case of a lot of consecutive failures, send a heartbeat in order to avoid timeout - if (((consecutiveFails % heartbeatInterval) == 0) && (reporter != null)) + + // In case of a lot of consecutive failures, send a heartbeat in order to + // avoid timeout + if (((consecutiveFails % heartbeatInterval) == 0) && (reporter != null)) { reporter.progress(); + } } } @@ -91,6 +103,7 @@ return new String("FIL"); } + @Override public int getType() { return OperatorType.FILTER; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/UDFMethodResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/UDFMethodResolver.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/UDFMethodResolver.java (working copy) @@ -24,24 +24,27 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** - * The UDF Method resolver interface. A user can plugin a resolver to their UDF by implementing the - * functions in this interface. Note that the resolver is stored in the UDF class as an instance - * variable. We did not use a static variable because many resolvers maintain the class of the - * enclosing UDF as state and are called from a base class e.g. UDFBaseCompare. This makes it very - * easy to write UDFs that want to do resolution similar to the comparison operators. Such UDFs - * just need to extend UDFBaseCompare and do not have to care about the UDFMethodResolver interface. - * Same is true for UDFs that want to do resolution similar to that done by the numeric operators. - * Such UDFs simply have to extend UDFBaseNumericOp class. For the default resolution the UDF - * implementation simply needs to extend the UDF class. + * The UDF Method resolver interface. A user can plugin a resolver to their UDF + * by implementing the functions in this interface. Note that the resolver is + * stored in the UDF class as an instance variable. We did not use a static + * variable because many resolvers maintain the class of the enclosing UDF as + * state and are called from a base class e.g. UDFBaseCompare. This makes it + * very easy to write UDFs that want to do resolution similar to the comparison + * operators. Such UDFs just need to extend UDFBaseCompare and do not have to + * care about the UDFMethodResolver interface. Same is true for UDFs that want + * to do resolution similar to that done by the numeric operators. Such UDFs + * simply have to extend UDFBaseNumericOp class. For the default resolution the + * UDF implementation simply needs to extend the UDF class. */ public interface UDFMethodResolver { - + /** * Gets the evaluate method for the UDF given the parameter types. * - * @param argClasses The list of the argument types that need to matched with the evaluate - * function signature. + * @param argClasses + * The list of the argument types that need to matched with the + * evaluate function signature. */ - public Method getEvalMethod(List argClasses) - throws AmbiguousMethodException, UDFArgumentException; + public Method getEvalMethod(List argClasses) + throws AmbiguousMethodException, UDFArgumentException; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java (working copy) @@ -33,64 +33,67 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** - * Union Operator - * Just forwards. Doesn't do anything itself. + * Union Operator Just forwards. Doesn't do anything itself. **/ -public class UnionOperator extends Operator implements Serializable { +public class UnionOperator extends Operator implements Serializable { private static final long serialVersionUID = 1L; - + StructObjectInspector[] parentObjInspectors; List[] parentFields; ReturnObjectInspectorResolver[] columnTypeResolvers; boolean[] needsTransform; - + ArrayList outputRow; - /** UnionOperator will transform the input rows if the inputObjInspectors - * from different parents are different. - * If one parent has exactly the same ObjectInspector as the output - * ObjectInspector, then we don't need to do transformation for that parent. - * This information is recorded in needsTransform[]. + /** + * UnionOperator will transform the input rows if the inputObjInspectors from + * different parents are different. If one parent has exactly the same + * ObjectInspector as the output ObjectInspector, then we don't need to do + * transformation for that parent. This information is recorded in + * needsTransform[]. */ + @Override protected void initializeOp(Configuration hconf) throws HiveException { - + int parents = parentOperators.size(); parentObjInspectors = new StructObjectInspector[parents]; parentFields = new List[parents]; for (int p = 0; p < parents; p++) { - parentObjInspectors[p] = (StructObjectInspector)inputObjInspectors[p]; + parentObjInspectors[p] = (StructObjectInspector) inputObjInspectors[p]; parentFields[p] = parentObjInspectors[p].getAllStructFieldRefs(); } - + // Get columnNames from the first parent int columns = parentFields[0].size(); ArrayList columnNames = new ArrayList(columns); for (int c = 0; c < columns; c++) { columnNames.add(parentFields[0].get(c).getFieldName()); } - + // Get outputFieldOIs columnTypeResolvers = new ReturnObjectInspectorResolver[columns]; for (int c = 0; c < columns; c++) { columnTypeResolvers[c] = new ReturnObjectInspectorResolver(); } - + for (int p = 0; p < parents; p++) { - assert(parentFields[p].size() == columns); + assert (parentFields[p].size() == columns); for (int c = 0; c < columns; c++) { - columnTypeResolvers[c].update(parentFields[p].get(c).getFieldObjectInspector()); + columnTypeResolvers[c].update(parentFields[p].get(c) + .getFieldObjectInspector()); } } - - ArrayList outputFieldOIs = new ArrayList(columns); + + ArrayList outputFieldOIs = new ArrayList( + columns); for (int c = 0; c < columns; c++) { outputFieldOIs.add(columnTypeResolvers[c].get()); } - + // create output row ObjectInspector - outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector( - columnNames, outputFieldOIs); + outputObjInspector = ObjectInspectorFactory + .getStandardStructObjectInspector(columnNames, outputFieldOIs); outputRow = new ArrayList(columns); for (int c = 0; c < columns; c++) { outputRow.add(null); @@ -99,17 +102,18 @@ // whether we need to do transformation for each parent needsTransform = new boolean[parents]; for (int p = 0; p < parents; p++) { - // Testing using != is good enough, because we use ObjectInspectorFactory to + // Testing using != is good enough, because we use ObjectInspectorFactory + // to // create ObjectInspectors. needsTransform[p] = (inputObjInspectors[p] != outputObjInspector); if (needsTransform[p]) { - LOG.info("Union Operator needs to transform row from parent[" + p + "] from " - + inputObjInspectors[p] + " to " + outputObjInspector); + LOG.info("Union Operator needs to transform row from parent[" + p + + "] from " + inputObjInspectors[p] + " to " + outputObjInspector); } } initializeChildren(hconf); } - + @Override public synchronized void processOp(Object row, int tag) throws HiveException { @@ -118,9 +122,9 @@ if (needsTransform[tag]) { for (int c = 0; c < fields.size(); c++) { - outputRow.set(c, columnTypeResolvers[c].convertIfNecessary( - soi.getStructFieldData(row, fields.get(c)), - fields.get(c).getFieldObjectInspector())); + outputRow.set(c, columnTypeResolvers[c].convertIfNecessary(soi + .getStructFieldData(row, fields.get(c)), fields.get(c) + .getFieldObjectInspector())); } forward(outputRow, outputObjInspector); } else { @@ -135,7 +139,8 @@ public String getName() { return new String("UNION"); } - + + @Override public int getType() { return OperatorType.UNION; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Throttle.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Throttle.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Throttle.java (working copy) @@ -18,21 +18,13 @@ package org.apache.hadoop.hive.ql.exec; -import java.io.*; -import java.util.*; -import java.util.regex.Pattern; +import java.io.IOException; +import java.io.InputStream; import java.net.URL; -import java.net.URLEncoder; -import java.net.URLDecoder; -import java.net.MalformedURLException; -import java.net.InetSocketAddress; +import java.util.regex.Pattern; import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.JobTracker; /* * Intelligence to make clients wait if the cluster is in a bad state. @@ -51,14 +43,14 @@ /** * fetch http://tracker.om:/gc.jsp?threshold=period */ - static void checkJobTracker(JobConf conf, Log LOG) { + static void checkJobTracker(JobConf conf, Log LOG) { try { - byte buffer[] = new byte[1024]; + byte buffer[] = new byte[1024]; int threshold = conf.getInt("mapred.throttle.threshold.percent", - DEFAULT_MEMORY_GC_PERCENT); + DEFAULT_MEMORY_GC_PERCENT); int retry = conf.getInt("mapred.throttle.retry.period", - DEFAULT_RETRY_PERIOD); + DEFAULT_RETRY_PERIOD); // If the threshold is 100 percent, then there is no throttling if (threshold == 100) { @@ -66,35 +58,35 @@ } // This is the Job Tracker URL - String tracker = JobTrackerURLResolver.getURL(conf) + - "/gc.jsp?threshold=" + threshold; + String tracker = JobTrackerURLResolver.getURL(conf) + + "/gc.jsp?threshold=" + threshold; while (true) { // read in the first 1K characters from the URL URL url = new URL(tracker); LOG.debug("Throttle: URL " + tracker); InputStream in = url.openStream(); - int numRead = in.read(buffer); + in.read(buffer); in.close(); String fetchString = new String(buffer); // fetch the xml tag xxx - Pattern dowait = Pattern.compile("", - Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.MULTILINE); + Pattern dowait = Pattern.compile("", Pattern.CASE_INSENSITIVE + | Pattern.DOTALL | Pattern.MULTILINE); String[] results = dowait.split(fetchString); if (results.length != 2) { - throw new IOException("Throttle: Unable to parse response of URL " + url + - ". Get retuned " + fetchString); + throw new IOException("Throttle: Unable to parse response of URL " + + url + ". Get retuned " + fetchString); } - dowait = Pattern.compile("", - Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.MULTILINE); + dowait = Pattern.compile("", Pattern.CASE_INSENSITIVE + | Pattern.DOTALL | Pattern.MULTILINE); results = dowait.split(results[1]); if (results.length < 1) { - throw new IOException("Throttle: Unable to parse response of URL " + url + - ". Get retuned " + fetchString); + throw new IOException("Throttle: Unable to parse response of URL " + + url + ". Get retuned " + fetchString); } - // if the jobtracker signalled that the threshold is not exceeded, + // if the jobtracker signalled that the threshold is not exceeded, // then we return immediately. if (results[0].trim().compareToIgnoreCase("false") == 0) { return; @@ -102,8 +94,8 @@ // The JobTracker has exceeded its threshold and is doing a GC. // The client has to wait and retry. - LOG.warn("Job is being throttled because of resource crunch on the " + - "JobTracker. Will retry in " + retry + " seconds.."); + LOG.warn("Job is being throttled because of resource crunch on the " + + "JobTracker. Will retry in " + retry + " seconds.."); Thread.sleep(retry * 1000L); } } catch (Exception e) { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/TextRecordReader.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/TextRecordReader.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TextRecordReader.java (working copy) @@ -22,20 +22,19 @@ import java.io.InputStream; import java.util.Properties; -import org.apache.hadoop.mapred.LineRecordReader.LineReader; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.LineRecordReader.LineReader; - public class TextRecordReader implements RecordReader { - private LineReader lineReader; + private LineReader lineReader; private InputStream in; - private Text row; + private Text row; - public void initialize(InputStream in, Configuration conf, Properties tbl) throws IOException { + public void initialize(InputStream in, Configuration conf, Properties tbl) + throws IOException { lineReader = new LineReader(in, conf); this.in = in; } @@ -46,14 +45,16 @@ } public int next(Writable row) throws IOException { - if (lineReader == null) + if (lineReader == null) { return -1; + } - return lineReader.readLine((Text)row); + return lineReader.readLine((Text) row); } public void close() throws IOException { - if (in != null) + if (in != null) { in.close(); + } } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (working copy) @@ -42,69 +42,73 @@ /** * Reduce Sink Operator sends output to the reduce stage **/ -public class ReduceSinkOperator extends TerminalOperator implements Serializable { +public class ReduceSinkOperator extends TerminalOperator + implements Serializable { private static final long serialVersionUID = 1L; /** - * The evaluators for the key columns. - * Key columns decide the sort order on the reducer side. - * Key columns are passed to the reducer in the "key". + * The evaluators for the key columns. Key columns decide the sort order on + * the reducer side. Key columns are passed to the reducer in the "key". */ transient protected ExprNodeEvaluator[] keyEval; /** - * The evaluators for the value columns. - * Value columns are passed to reducer in the "value". + * The evaluators for the value columns. Value columns are passed to reducer + * in the "value". */ transient protected ExprNodeEvaluator[] valueEval; /** - * The evaluators for the partition columns (CLUSTER BY or DISTRIBUTE BY in Hive language). - * Partition columns decide the reducer that the current row goes to. - * Partition columns are not passed to reducer. + * The evaluators for the partition columns (CLUSTER BY or DISTRIBUTE BY in + * Hive language). Partition columns decide the reducer that the current row + * goes to. Partition columns are not passed to reducer. */ transient protected ExprNodeEvaluator[] partitionEval; - - // TODO: we use MetadataTypedColumnsetSerDe for now, till DynamicSerDe is ready + + // TODO: we use MetadataTypedColumnsetSerDe for now, till DynamicSerDe is + // ready transient Serializer keySerializer; transient boolean keyIsText; transient Serializer valueSerializer; transient int tag; transient byte[] tagByte = new byte[1]; - + + @Override protected void initializeOp(Configuration hconf) throws HiveException { try { keyEval = new ExprNodeEvaluator[conf.getKeyCols().size()]; - int i=0; - for(exprNodeDesc e: conf.getKeyCols()) { + int i = 0; + for (exprNodeDesc e : conf.getKeyCols()) { keyEval[i++] = ExprNodeEvaluatorFactory.get(e); } valueEval = new ExprNodeEvaluator[conf.getValueCols().size()]; - i=0; - for(exprNodeDesc e: conf.getValueCols()) { + i = 0; + for (exprNodeDesc e : conf.getValueCols()) { valueEval[i++] = ExprNodeEvaluatorFactory.get(e); } partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()]; - i=0; - for(exprNodeDesc e: conf.getPartitionCols()) { + i = 0; + for (exprNodeDesc e : conf.getPartitionCols()) { partitionEval[i++] = ExprNodeEvaluatorFactory.get(e); } tag = conf.getTag(); - tagByte[0] = (byte)tag; + tagByte[0] = (byte) tag; LOG.info("Using tag = " + tag); tableDesc keyTableDesc = conf.getKeySerializeInfo(); - keySerializer = (Serializer)keyTableDesc.getDeserializerClass().newInstance(); + keySerializer = (Serializer) keyTableDesc.getDeserializerClass() + .newInstance(); keySerializer.initialize(null, keyTableDesc.getProperties()); keyIsText = keySerializer.getSerializedClass().equals(Text.class); - + tableDesc valueTableDesc = conf.getValueSerializeInfo(); - valueSerializer = (Serializer)valueTableDesc.getDeserializerClass().newInstance(); + valueSerializer = (Serializer) valueTableDesc.getDeserializerClass() + .newInstance(); valueSerializer.initialize(null, valueTableDesc.getProperties()); - + firstRow = true; initializeChildren(hconf); } catch (Exception e) { @@ -116,65 +120,72 @@ transient InspectableObject tempInspectableObject = new InspectableObject(); transient HiveKey keyWritable = new HiveKey(); transient Writable value; - + transient StructObjectInspector keyObjectInspector; transient StructObjectInspector valueObjectInspector; transient ObjectInspector[] partitionObjectInspectors; transient Object[] cachedKeys; transient Object[] cachedValues; - + boolean firstRow; - + transient Random random; + + @Override public void processOp(Object row, int tag) throws HiveException { try { ObjectInspector rowInspector = inputObjInspectors[tag]; if (firstRow) { firstRow = false; - keyObjectInspector = initEvaluatorsAndReturnStruct(keyEval, conf.getOutputKeyColumnNames(), rowInspector); - valueObjectInspector = initEvaluatorsAndReturnStruct(valueEval, conf.getOutputValueColumnNames(), rowInspector); + keyObjectInspector = initEvaluatorsAndReturnStruct(keyEval, conf + .getOutputKeyColumnNames(), rowInspector); + valueObjectInspector = initEvaluatorsAndReturnStruct(valueEval, conf + .getOutputValueColumnNames(), rowInspector); partitionObjectInspectors = initEvaluators(partitionEval, rowInspector); cachedKeys = new Object[keyEval.length]; cachedValues = new Object[valueEval.length]; } - - + // Evaluate the keys - for (int i=0; i gWorkMap= - Collections.synchronizedMap(new HashMap()); + + public static enum ReduceField { + KEY, VALUE, ALIAS + }; + + private static Map gWorkMap = Collections + .synchronizedMap(new HashMap()); static final private Log LOG = LogFactory.getLog(Utilities.class.getName()); - public static void clearMapRedWork (Configuration job) { + public static void clearMapRedWork(Configuration job) { try { Path planPath = new Path(HiveConf.getVar(job, HiveConf.ConfVars.PLAN)); FileSystem fs = FileSystem.get(job); - if(fs.exists(planPath)) { - try { - fs.delete(planPath, true); - } catch (IOException e) { - e.printStackTrace(); - } + if (fs.exists(planPath)) { + try { + fs.delete(planPath, true); + } catch (IOException e) { + e.printStackTrace(); + } } } catch (Exception e) { } finally { // where a single process works with multiple plans - we must clear // the cache before working with the next plan. - synchronized(gWorkMap) { + synchronized (gWorkMap) { gWorkMap.remove(getJobName(job)); } } } - public static mapredWork getMapRedWork (Configuration job) { + public static mapredWork getMapRedWork(Configuration job) { mapredWork gWork = null; try { - synchronized(gWorkMap) { + synchronized (gWorkMap) { gWork = gWorkMap.get(getJobName(job)); } - if(gWork == null) { + if (gWork == null) { synchronized (Utilities.class) { - if(gWork != null) + if (gWork != null) { return (gWork); + } InputStream in = new FileInputStream("HIVE_PLAN" - +sanitizedJobId(job)); + + sanitizedJobId(job)); mapredWork ret = deserializeMapRedWork(in, job); gWork = ret; gWork.initialize(); @@ -126,7 +151,7 @@ return (gWork); } catch (Exception e) { e.printStackTrace(); - throw new RuntimeException (e); + throw new RuntimeException(e); } } @@ -136,60 +161,62 @@ } ArrayList ret = new ArrayList(); - for(FieldSchema f: fl) { - ret.add(f.getName() + " " + f.getType() + - (f.getComment() != null ? (" " + f.getComment()) : "")); + for (FieldSchema f : fl) { + ret.add(f.getName() + " " + f.getType() + + (f.getComment() != null ? (" " + f.getComment()) : "")); } return ret; } /** - * Java 1.5 workaround. - * From http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=5015403 + * Java 1.5 workaround. From + * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=5015403 */ public static class EnumDelegate extends DefaultPersistenceDelegate { @Override - protected Expression instantiate(Object oldInstance, Encoder out) { - return new Expression(Enum.class, - "valueOf", - new Object[] { oldInstance.getClass(), ((Enum) oldInstance).name() }); + protected Expression instantiate(Object oldInstance, Encoder out) { + return new Expression(Enum.class, "valueOf", new Object[] { + oldInstance.getClass(), ((Enum) oldInstance).name() }); } + + @Override protected boolean mutatesTo(Object oldInstance, Object newInstance) { return oldInstance == newInstance; } } - public static void setMapRedWork (Configuration job, mapredWork w) { + public static void setMapRedWork(Configuration job, mapredWork w) { try { // use the default file system of the job FileSystem fs = FileSystem.get(job); - Path planPath = new Path(HiveConf.getVar(job, HiveConf.ConfVars.SCRATCHDIR), - "plan."+randGen.nextInt()); + Path planPath = new Path(HiveConf.getVar(job, + HiveConf.ConfVars.SCRATCHDIR), "plan." + randGen.nextInt()); FSDataOutputStream out = fs.create(planPath); serializeMapRedWork(w, out); HiveConf.setVar(job, HiveConf.ConfVars.PLAN, planPath.toString()); // Set up distributed cache DistributedCache.createSymlink(job); String uriWithLink = planPath.toUri().toString() + "#HIVE_PLAN" - +sanitizedJobId(job); + + sanitizedJobId(job); DistributedCache.addCacheFile(new URI(uriWithLink), job); - // Cache the object in this process too so lookups don't hit the file system + // Cache the object in this process too so lookups don't hit the file + // system synchronized (Utilities.class) { w.initialize(); - gWorkMap.put(getJobName(job),w); + gWorkMap.put(getJobName(job), w); } } catch (Exception e) { e.printStackTrace(); - throw new RuntimeException (e); + throw new RuntimeException(e); } } - public static String getJobName( Configuration job) { + public static String getJobName(Configuration job) { String s = HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJOBNAME); // This is just a backup case. We would like Hive to always have jobnames. - if(s == null) { + if (s == null) { // There is no job name => we set one - s = "JOB"+randGen.nextInt(); + s = "JOB" + randGen.nextInt(); HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, s); } return s; @@ -204,54 +231,64 @@ return s.hashCode(); } - public static void serializeTasks(Task t, OutputStream out) { + public static void serializeTasks(Task t, + OutputStream out) { XMLEncoder e = new XMLEncoder(out); // workaround for java 1.5 - e.setPersistenceDelegate( ExpressionTypes.class, new EnumDelegate() ); - e.setPersistenceDelegate( groupByDesc.Mode.class, new EnumDelegate()); - e.setPersistenceDelegate( Operator.ProgressCounter.class, new EnumDelegate()); + e.setPersistenceDelegate(ExpressionTypes.class, new EnumDelegate()); + e.setPersistenceDelegate(groupByDesc.Mode.class, new EnumDelegate()); + e + .setPersistenceDelegate(Operator.ProgressCounter.class, + new EnumDelegate()); e.writeObject(t); e.close(); } /** - * Serialize the plan object to an output stream. - * DO NOT use this to write to standard output since it closes the output stream - * DO USE mapredWork.toXML() instead + * Serialize the plan object to an output stream. DO NOT use this to write to + * standard output since it closes the output stream DO USE mapredWork.toXML() + * instead */ public static void serializeMapRedWork(mapredWork w, OutputStream out) { XMLEncoder e = new XMLEncoder(out); // workaround for java 1.5 - e.setPersistenceDelegate( ExpressionTypes.class, new EnumDelegate() ); - e.setPersistenceDelegate( groupByDesc.Mode.class, new EnumDelegate()); + e.setPersistenceDelegate(ExpressionTypes.class, new EnumDelegate()); + e.setPersistenceDelegate(groupByDesc.Mode.class, new EnumDelegate()); e.writeObject(w); e.close(); } - public static mapredWork deserializeMapRedWork (InputStream in, Configuration conf) { + public static mapredWork deserializeMapRedWork(InputStream in, + Configuration conf) { XMLDecoder d = new XMLDecoder(in, null, null, conf.getClassLoader()); - mapredWork ret = (mapredWork)d.readObject(); + mapredWork ret = (mapredWork) d.readObject(); d.close(); return (ret); } public static class Tuple { - private T one; - private V two; + private final T one; + private final V two; public Tuple(T one, V two) { this.one = one; this.two = two; } - public T getOne() {return this.one;} - public V getTwo() {return this.two;} + + public T getOne() { + return this.one; + } + + public V getTwo() { + return this.two; + } } public static tableDesc defaultTd; static { // by default we expect ^A separated strings - // This tableDesc does not provide column names. We should always use + // This tableDesc does not provide column names. We should always use // PlanUtils.getDefaultTableDesc(String separatorCode, String columns) // or getBinarySortableTableDesc(List fieldSchemas) when // we know the column names. @@ -273,44 +310,42 @@ public static Random randGen = new Random(); /** - * Gets the task id if we are running as a Hadoop job. - * Gets a random number otherwise. + * Gets the task id if we are running as a Hadoop job. Gets a random number + * otherwise. */ public static String getTaskId(Configuration hconf) { String taskid = (hconf == null) ? null : hconf.get("mapred.task.id"); - if((taskid == null) || taskid.equals("")) { - return (""+randGen.nextInt()); + if ((taskid == null) || taskid.equals("")) { + return ("" + randGen.nextInt()); } else { return taskid.replaceAll("task_[0-9]+_", ""); } } - public static HashMap makeMap(Object ... olist) { - HashMap ret = new HashMap (); - for(int i=0; i" + line); + } } else { - while ( (line = br.readLine()) != null) + while ((line = br.readLine()) != null) { os.println(line); + } } } catch (IOException ioe) { ioe.printStackTrace(); @@ -341,40 +379,45 @@ } public static tableDesc getTableDesc(Table tbl) { - return (new tableDesc (tbl.getDeserializer().getClass(), tbl.getInputFormatClass(), tbl.getOutputFormatClass(), tbl.getSchema())); + return (new tableDesc(tbl.getDeserializer().getClass(), tbl + .getInputFormatClass(), tbl.getOutputFormatClass(), tbl.getSchema())); } - - //column names and column types are all delimited by comma + + // column names and column types are all delimited by comma public static tableDesc getTableDesc(String cols, String colTypes) { return (new tableDesc(LazySimpleSerDe.class, SequenceFileInputFormat.class, HiveSequenceFileOutputFormat.class, Utilities.makeProperties( - org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, "" + Utilities.ctrlaCode, + org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, "" + + Utilities.ctrlaCode, org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS, cols, org.apache.hadoop.hive.serde.Constants.LIST_COLUMN_TYPES, colTypes))); } - - public static partitionDesc getPartitionDesc(Partition part) throws HiveException { - return (new partitionDesc (part)); + public static partitionDesc getPartitionDesc(Partition part) + throws HiveException { + return (new partitionDesc(part)); } - public static void addMapWork(mapredWork mr, Table tbl, String alias, Operator work) { + public static void addMapWork(mapredWork mr, Table tbl, String alias, + Operator work) { mr.addMapWork(tbl.getDataLocation().getPath(), alias, work, - new partitionDesc(getTableDesc(tbl), null)); + new partitionDesc(getTableDesc(tbl), null)); } private static String getOpTreeSkel_helper(Operator op, String indent) { - if (op == null) + if (op == null) { return ""; + } StringBuffer sb = new StringBuffer(); sb.append(indent); sb.append(op.toString()); sb.append("\n"); - if (op.getChildOperators() != null) - for(Object child: op.getChildOperators()) { - sb.append(getOpTreeSkel_helper((Operator)child, indent + " ")); + if (op.getChildOperators() != null) { + for (Object child : op.getChildOperators()) { + sb.append(getOpTreeSkel_helper((Operator) child, indent + " ")); } + } return sb.toString(); } @@ -383,39 +426,46 @@ return getOpTreeSkel_helper(op, ""); } - private static boolean isWhitespace( int c ) { - if( c == -1 ) { return false; } - return Character.isWhitespace( ( char )c ); + private static boolean isWhitespace(int c) { + if (c == -1) { + return false; + } + return Character.isWhitespace((char) c); } - public static boolean contentsEqual( InputStream is1, InputStream is2, boolean ignoreWhitespace ) - throws IOException { + public static boolean contentsEqual(InputStream is1, InputStream is2, + boolean ignoreWhitespace) throws IOException { try { - if((is1 == is2) || (is1 == null && is2 == null)) - return true; + if ((is1 == is2) || (is1 == null && is2 == null)) { + return true; + } - if(is1 == null || is2 == null) + if (is1 == null || is2 == null) { return false; + } - while( true ) { + while (true) { int c1 = is1.read(); - while( ignoreWhitespace && isWhitespace( c1 ) ) + while (ignoreWhitespace && isWhitespace(c1)) { c1 = is1.read(); + } int c2 = is2.read(); - while( ignoreWhitespace && isWhitespace( c2 ) ) + while (ignoreWhitespace && isWhitespace(c2)) { c2 = is2.read(); - if( c1 == -1 && c2 == -1 ) + } + if (c1 == -1 && c2 == -1) { return true; - if( c1 != c2 ) + } + if (c1 != c2) { break; + } } - } catch( FileNotFoundException e ) { + } catch (FileNotFoundException e) { e.printStackTrace(); } return false; } - /** * convert "From src insert blah blah" to "From src insert ... blah" */ @@ -425,11 +475,11 @@ int len = str.length(); int suffixlength = 20; - if(len <= max) + if (len <= max) { return str; + } - - suffixlength = Math.min(suffixlength, (max-3)/2); + suffixlength = Math.min(suffixlength, (max - 3) / 2); String rev = StringUtils.reverse(str); // get the last few words @@ -437,19 +487,24 @@ suffix = StringUtils.reverse(suffix); // first few .. - String prefix = StringUtils.abbreviate(str, max-suffix.length()); + String prefix = StringUtils.abbreviate(str, max - suffix.length()); - return prefix+suffix; + return prefix + suffix; } public final static String NSTR = ""; - public static enum streamStatus {EOF, TERMINATED} - public static streamStatus readColumn(DataInput in, OutputStream out) throws IOException { + public static enum streamStatus { + EOF, TERMINATED + } + + public static streamStatus readColumn(DataInput in, OutputStream out) + throws IOException { + while (true) { int b; try { - b = (int)in.readByte(); + b = in.readByte(); } catch (EOFException e) { return streamStatus.EOF; } @@ -464,15 +519,17 @@ } /** - * Convert an output stream to a compressed output stream based on codecs - * and compression options specified in the Job Configuration. - * @param jc Job Configuration - * @param out Output Stream to be converted into compressed output stream + * Convert an output stream to a compressed output stream based on codecs and + * compression options specified in the Job Configuration. + * + * @param jc + * Job Configuration + * @param out + * Output Stream to be converted into compressed output stream * @return compressed output stream */ - public static OutputStream createCompressedStream(JobConf jc, - OutputStream out) - throws IOException { + public static OutputStream createCompressedStream(JobConf jc, OutputStream out) + throws IOException { boolean isCompressed = FileOutputFormat.getCompressOutput(jc); return createCompressedStream(jc, out, isCompressed); } @@ -481,20 +538,22 @@ * Convert an output stream to a compressed output stream based on codecs * codecs in the Job Configuration. Caller specifies directly whether file is * compressed or not - * @param jc Job Configuration - * @param out Output Stream to be converted into compressed output stream - * @param isCompressed whether the output stream needs to be compressed or not + * + * @param jc + * Job Configuration + * @param out + * Output Stream to be converted into compressed output stream + * @param isCompressed + * whether the output stream needs to be compressed or not * @return compressed output stream */ public static OutputStream createCompressedStream(JobConf jc, - OutputStream out, - boolean isCompressed) - throws IOException { - if(isCompressed) { - Class codecClass = - FileOutputFormat.getOutputCompressorClass(jc, DefaultCodec.class); - CompressionCodec codec = (CompressionCodec) - ReflectionUtils.newInstance(codecClass, jc); + OutputStream out, boolean isCompressed) throws IOException { + if (isCompressed) { + Class codecClass = FileOutputFormat + .getOutputCompressorClass(jc, DefaultCodec.class); + CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance( + codecClass, jc); return codec.createOutputStream(out); } else { return (out); @@ -502,74 +561,87 @@ } /** - * Based on compression option and configured output codec - get extension - * for output file. This is only required for text files - not sequencefiles - * @param jc Job Configuration - * @param isCompressed Whether the output file is compressed or not + * Based on compression option and configured output codec - get extension for + * output file. This is only required for text files - not sequencefiles + * + * @param jc + * Job Configuration + * @param isCompressed + * Whether the output file is compressed or not * @return the required file extension (example: .gz) */ public static String getFileExtension(JobConf jc, boolean isCompressed) { - if(!isCompressed) { + if (!isCompressed) { return ""; } else { - Class codecClass = - FileOutputFormat.getOutputCompressorClass(jc, DefaultCodec.class); - CompressionCodec codec = (CompressionCodec) - ReflectionUtils.newInstance(codecClass, jc); + Class codecClass = FileOutputFormat + .getOutputCompressorClass(jc, DefaultCodec.class); + CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance( + codecClass, jc); return codec.getDefaultExtension(); } } /** * Create a sequencefile output stream based on job configuration - * @param jc Job configuration - * @param fs File System to create file in - * @param file Path to be created - * @param keyClass Java Class for key - * @param valClass Java Class for value + * + * @param jc + * Job configuration + * @param fs + * File System to create file in + * @param file + * Path to be created + * @param keyClass + * Java Class for key + * @param valClass + * Java Class for value * @return output stream over the created sequencefile */ - public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, - Path file, Class keyClass, - Class valClass) - throws IOException { - boolean isCompressed = SequenceFileOutputFormat.getCompressOutput(jc); + public static SequenceFile.Writer createSequenceWriter(JobConf jc, + FileSystem fs, Path file, Class keyClass, Class valClass) + throws IOException { + boolean isCompressed = FileOutputFormat.getCompressOutput(jc); return createSequenceWriter(jc, fs, file, keyClass, valClass, isCompressed); } /** - * Create a sequencefile output stream based on job configuration - * Uses user supplied compression flag (rather than obtaining it from the Job Configuration) - * @param jc Job configuration - * @param fs File System to create file in - * @param file Path to be created - * @param keyClass Java Class for key - * @param valClass Java Class for value + * Create a sequencefile output stream based on job configuration Uses user + * supplied compression flag (rather than obtaining it from the Job + * Configuration) + * + * @param jc + * Job configuration + * @param fs + * File System to create file in + * @param file + * Path to be created + * @param keyClass + * Java Class for key + * @param valClass + * Java Class for value * @return output stream over the created sequencefile */ - public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, - Path file, Class keyClass, - Class valClass, - boolean isCompressed) - throws IOException { + public static SequenceFile.Writer createSequenceWriter(JobConf jc, + FileSystem fs, Path file, Class keyClass, Class valClass, + boolean isCompressed) throws IOException { CompressionCodec codec = null; CompressionType compressionType = CompressionType.NONE; Class codecClass = null; if (isCompressed) { compressionType = SequenceFileOutputFormat.getOutputCompressionType(jc); - codecClass = SequenceFileOutputFormat.getOutputCompressorClass(jc, DefaultCodec.class); - codec = (CompressionCodec) - ReflectionUtils.newInstance(codecClass, jc); + codecClass = FileOutputFormat.getOutputCompressorClass(jc, + DefaultCodec.class); + codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, jc); } - return (SequenceFile.createWriter(fs, jc, file, - keyClass, valClass, compressionType, codec)); + return (SequenceFile.createWriter(fs, jc, file, keyClass, valClass, + compressionType, codec)); } /** * Create a RCFile output stream based on job configuration Uses user supplied * compression flag (rather than obtaining it from the Job Configuration) - * + * * @param jc * Job configuration * @param fs @@ -593,9 +665,10 @@ /** * Shamelessly cloned from GenericOptionsParser */ - public static String realFile(String newFile, Configuration conf) throws IOException { + public static String realFile(String newFile, Configuration conf) + throws IOException { Path path = new Path(newFile); - URI pathURI = path.toUri(); + URI pathURI = path.toUri(); FileSystem fs; if (pathURI.getScheme() == null) { @@ -610,7 +683,9 @@ try { fs.close(); - } catch(IOException e){}; + } catch (IOException e) { + } + ; String file = path.makeQualified(fs).toString(); // For compatibility with hadoop 0.17, change file:/a/b/c to file:///a/b/c @@ -622,13 +697,18 @@ } public static List mergeUniqElems(List src, List dest) { - if (dest == null) return src; - if (src == null) return dest; + if (dest == null) { + return src; + } + if (src == null) { + return dest; + } int pos = 0; while (pos < dest.size()) { - if (!src.contains(dest.get(pos))) + if (!src.contains(dest.get(pos))) { src.add(dest.get(pos)); + } pos++; } @@ -638,8 +718,9 @@ private static final String tmpPrefix = "_tmp."; public static Path toTempPath(Path orig) { - if(orig.getName().indexOf(tmpPrefix) == 0) + if (orig.getName().indexOf(tmpPrefix) == 0) { return orig; + } return new Path(orig.getParent(), tmpPrefix + orig.getName()); } @@ -661,40 +742,49 @@ } /** - * Rename src to dst, or in the case dst already exists, move files in src - * to dst. If there is an existing file with the same name, the new file's - * name will be appended with "_1", "_2", etc. - * @param fs the FileSystem where src and dst are on. - * @param src the src directory - * @param dst the target directory + * Rename src to dst, or in the case dst already exists, move files in src to + * dst. If there is an existing file with the same name, the new file's name + * will be appended with "_1", "_2", etc. + * + * @param fs + * the FileSystem where src and dst are on. + * @param src + * the src directory + * @param dst + * the target directory * @throws IOException */ static public void rename(FileSystem fs, Path src, Path dst) - throws IOException, HiveException { + throws IOException, HiveException { if (!fs.rename(src, dst)) { - throw new HiveException ("Unable to move: " + src + " to: " + dst); + throw new HiveException("Unable to move: " + src + " to: " + dst); } } + /** - * Rename src to dst, or in the case dst already exists, move files in src - * to dst. If there is an existing file with the same name, the new file's - * name will be appended with "_1", "_2", etc. - * @param fs the FileSystem where src and dst are on. - * @param src the src directory - * @param dst the target directory + * Rename src to dst, or in the case dst already exists, move files in src to + * dst. If there is an existing file with the same name, the new file's name + * will be appended with "_1", "_2", etc. + * + * @param fs + * the FileSystem where src and dst are on. + * @param src + * the src directory + * @param dst + * the target directory * @throws IOException */ static public void renameOrMoveFiles(FileSystem fs, Path src, Path dst) - throws IOException, HiveException { + throws IOException, HiveException { if (!fs.exists(dst)) { if (!fs.rename(src, dst)) { - throw new HiveException ("Unable to move: " + src + " to: " + dst); + throw new HiveException("Unable to move: " + src + " to: " + dst); } } else { // move file by file FileStatus[] files = fs.listStatus(src); - for (int i=0; i taskIdToFile = new HashMap(); - for(FileStatus one: items) { - if(isTempPath(one)) { - if(!fs.delete(one.getPath(), true)) { - throw new IOException ("Unable to delete tmp file: " + one.getPath()); + for (FileStatus one : items) { + if (isTempPath(one)) { + if (!fs.delete(one.getPath(), true)) { + throw new IOException("Unable to delete tmp file: " + one.getPath()); } } else { String taskId = getTaskIdFromFilename(one.getPath().getName()); @@ -755,8 +853,8 @@ if (otherFile == null) { taskIdToFile.put(taskId, one); } else { - if(!fs.delete(one.getPath(), true)) { - throw new IOException ("Unable to delete duplicate file: " + if (!fs.delete(one.getPath(), true)) { + throw new IOException("Unable to delete duplicate file: " + one.getPath() + ". Existing file: " + otherFile.getPath()); } else { LOG.warn("Duplicate taskid file removed: " + one.getPath() @@ -768,30 +866,32 @@ } public static String getNameMessage(Exception e) { - return e.getClass().getName() + "(" + e.getMessage() + ")"; + return e.getClass().getName() + "(" + e.getMessage() + ")"; } /** * Add new elements to the classpath - * + * * @param newPaths * Array of classpath elements */ - public static ClassLoader addToClassPath(ClassLoader cloader, String[] newPaths) throws Exception { - URLClassLoader loader = (URLClassLoader)cloader; + public static ClassLoader addToClassPath(ClassLoader cloader, + String[] newPaths) throws Exception { + URLClassLoader loader = (URLClassLoader) cloader; List curPath = Arrays.asList(loader.getURLs()); ArrayList newPath = new ArrayList(); // get a list with the current classpath components - for(URL onePath: curPath) { + for (URL onePath : curPath) { newPath.add(onePath); } curPath = newPath; for (String onestr : newPaths) { // special processing for hadoop-17. file:// needs to be removed - if (StringUtils.indexOf(onestr, "file://") == 0) + if (StringUtils.indexOf(onestr, "file://") == 0) { onestr = StringUtils.substring(onestr, 7); + } URL oneurl = (new File(onestr)).toURL(); if (!curPath.contains(oneurl)) { @@ -804,19 +904,21 @@ /** * remove elements from the classpath - * + * * @param pathsToRemove * Array of classpath elements */ - public static void removeFromClassPath(String[] pathsToRemove) throws Exception { + public static void removeFromClassPath(String[] pathsToRemove) + throws Exception { Thread curThread = Thread.currentThread(); URLClassLoader loader = (URLClassLoader) curThread.getContextClassLoader(); Set newPath = new HashSet(Arrays.asList(loader.getURLs())); for (String onestr : pathsToRemove) { // special processing for hadoop-17. file:// needs to be removed - if (StringUtils.indexOf(onestr, "file://") == 0) + if (StringUtils.indexOf(onestr, "file://") == 0) { onestr = StringUtils.substring(onestr, 7); + } URL oneurl = (new File(onestr)).toURL(); newPath.remove(oneurl); @@ -843,35 +945,38 @@ return names; } - public static List getColumnNamesFromFieldSchema(List partCols) { + public static List getColumnNamesFromFieldSchema( + List partCols) { List names = new ArrayList(); for (FieldSchema o : partCols) { names.add(o.getName()); } return names; } - + public static List getColumnNames(Properties props) { List names = new ArrayList(); String colNames = props.getProperty(Constants.LIST_COLUMNS); String[] cols = colNames.trim().split(","); if (cols != null) { - for(String col : cols) { - if(col!=null && !col.trim().equals("")) + for (String col : cols) { + if (col != null && !col.trim().equals("")) { names.add(col); + } } } return names; } - + public static List getColumnTypes(Properties props) { List names = new ArrayList(); String colNames = props.getProperty(Constants.LIST_COLUMN_TYPES); String[] cols = colNames.trim().split(","); if (cols != null) { - for(String col : cols) { - if(col!=null && !col.trim().equals("")) + for (String col : cols) { + if (col != null && !col.trim().equals("")) { names.add(col); + } } } return names; @@ -891,21 +996,23 @@ break; } } - if (!found) + if (!found) { throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg()); + } } } /** * Gets the default notification interval to send progress updates to the * tracker. Useful for operators that may not output data for a while. - * + * * @param hconf * @return the interval in miliseconds */ public static int getDefaultNotificationInterval(Configuration hconf) { int notificationInterval; - Integer expInterval = Integer.decode(hconf.get("mapred.tasktracker.expiry.interval")); + Integer expInterval = Integer.decode(hconf + .get("mapred.tasktracker.expiry.interval")); if (expInterval != null) { notificationInterval = expInterval.intValue() / 2; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/TaskHandle.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/TaskHandle.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TaskHandle.java (working copy) @@ -5,10 +5,13 @@ import org.apache.hadoop.mapred.Counters; public class TaskHandle { - // The eventual goal is to monitor the progress of all the tasks, not only the map reduce task. - // The execute() method of the tasks will return immediately, and return a task specific handle to - // monitor the progress of that task. - // Right now, the behavior is kind of broken, ExecDriver's execute method calls progress - instead it should + // The eventual goal is to monitor the progress of all the tasks, not only the + // map reduce task. + // The execute() method of the tasks will return immediately, and return a + // task specific handle to + // monitor the progress of that task. + // Right now, the behavior is kind of broken, ExecDriver's execute method + // calls progress - instead it should // be invoked by Driver public Counters getCounters() throws IOException { // default implementation Index: ql/src/java/org/apache/hadoop/hive/ql/exec/TextRecordWriter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/TextRecordWriter.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TextRecordWriter.java (working copy) @@ -22,19 +22,20 @@ import java.io.OutputStream; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.Text; public class TextRecordWriter implements RecordWriter { private OutputStream out; - public void initialize(OutputStream out, Configuration conf) throws IOException { + public void initialize(OutputStream out, Configuration conf) + throws IOException { this.out = out; } public void write(Writable row) throws IOException { - Text text = (Text)row; + Text text = (Text) row; out.write(text.getBytes(), 0, text.getLength()); out.write(Utilities.newLineCode); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (working copy) @@ -31,6 +31,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.persistence.RowContainer; import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.exprNodeDesc; @@ -45,16 +46,17 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; -import org.apache.hadoop.hive.ql.exec.persistence.RowContainer; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.util.ReflectionUtils; /** * Join operator implementation. */ -public abstract class CommonJoinOperator extends Operator implements Serializable { +public abstract class CommonJoinOperator extends + Operator implements Serializable { private static final long serialVersionUID = 1L; - static final protected Log LOG = LogFactory.getLog(CommonJoinOperator.class.getName()); + static final protected Log LOG = LogFactory.getLog(CommonJoinOperator.class + .getName()); public static class IntermediateObject { ArrayList[] objs; @@ -82,7 +84,7 @@ } public Object topObj() { - return objs[curSize-1]; + return objs[curSize - 1]; } } @@ -100,28 +102,38 @@ */ transient protected Map> joinValuesStandardObjectInspectors; - transient static protected Byte[] order; // order in which the results should be output + transient static protected Byte[] order; // order in which the results should + // be output transient protected joinCond[] condn; transient protected boolean noOuterJoin; transient private Object[] dummyObj; // for outer joins, contains the - // potential nulls for the concerned - // aliases - transient protected RowContainer>[] dummyObjVectors; // empty rows for each table + // potential nulls for the concerned + // aliases + transient protected RowContainer>[] dummyObjVectors; // empty + // rows + // for + // each + // table transient protected int totalSz; // total size of the composite object - // keys are the column names. basically this maps the position of the column in + // keys are the column names. basically this maps the position of the column + // in // the output of the CommonJoinOperator to the input columnInfo. transient private Map> posToAliasMap; - + transient LazyBinarySerDe[] spillTableSerDe; - transient protected Map spillTableDesc; // spill tables are used if the join input is too large to fit in memory + transient protected Map spillTableDesc; // spill tables are + // used if the join + // input is too large + // to fit in memory - HashMap>> storage; // map b/w table alias to RowContainer + HashMap>> storage; // map b/w table alias + // to RowContainer int joinEmitInterval = -1; int joinCacheSize = 0; int nextSz = 0; transient Byte lastAlias = null; - + transient boolean handleSkewJoin = false; protected int populateJoinKeyValue(Map> outMap, @@ -129,19 +141,21 @@ int total = 0; - Iterator>> entryIter = inputMap.entrySet().iterator(); + Iterator>> entryIter = inputMap + .entrySet().iterator(); while (entryIter.hasNext()) { - Map.Entry> e = (Map.Entry>) entryIter.next(); + Map.Entry> e = entryIter.next(); Byte key = order[e.getKey()]; - List expr = (List) e.getValue(); + List expr = e.getValue(); int sz = expr.size(); total += sz; List valueFields = new ArrayList(); - for (int j = 0; j < sz; j++) + for (int j = 0; j < sz; j++) { valueFields.add(ExprNodeEvaluatorFactory.get(expr.get(j))); + } outMap.put(key, valueFields); } @@ -150,14 +164,15 @@ } protected static HashMap> getObjectInspectorsFromEvaluators( - Map> exprEntries, ObjectInspector[] inputObjInspector) - throws HiveException { + Map> exprEntries, + ObjectInspector[] inputObjInspector) throws HiveException { HashMap> result = new HashMap>(); - for(Entry> exprEntry : exprEntries.entrySet()) { + for (Entry> exprEntry : exprEntries + .entrySet()) { Byte alias = exprEntry.getKey(); List exprList = exprEntry.getValue(); ArrayList fieldOIList = new ArrayList(); - for (int i=0; i> getStandardObjectInspectors( Map> aliasToObjectInspectors) { HashMap> result = new HashMap>(); - for(Entry> oiEntry: aliasToObjectInspectors.entrySet()) { + for (Entry> oiEntry : aliasToObjectInspectors + .entrySet()) { Byte alias = oiEntry.getKey(); List oiList = oiEntry.getValue(); - ArrayList fieldOIList = new ArrayList(oiList.size()); - for (int i=0; i fieldOIList = new ArrayList( + oiList.size()); + for (int i = 0; i < oiList.size(); i++) { + fieldOIList.add(ObjectInspectorUtils.getStandardObjectInspector(oiList + .get(i), ObjectInspectorCopyOption.WRITABLE)); } result.put(alias, fieldOIList); } @@ -182,8 +199,9 @@ } - protected static ObjectInspector getJoinOutputObjectInspector(Byte[] order, - Map> aliasToObjectInspectors, T conf) { + protected static ObjectInspector getJoinOutputObjectInspector( + Byte[] order, Map> aliasToObjectInspectors, + T conf) { ArrayList structFieldObjectInspectors = new ArrayList(); for (Byte alias : order) { List oiList = aliasToObjectInspectors.get(alias); @@ -191,15 +209,19 @@ } StructObjectInspector joinOutputObjectInspector = ObjectInspectorFactory - .getStandardStructObjectInspector(conf.getOutputColumnNames(), structFieldObjectInspectors); + .getStandardStructObjectInspector(conf.getOutputColumnNames(), + structFieldObjectInspectors); return joinOutputObjectInspector; } Configuration hconf; + + @Override protected void initializeOp(Configuration hconf) throws HiveException { this.handleSkewJoin = conf.getHandleSkewJoin(); this.hconf = hconf; - LOG.info("COMMONJOIN " + ((StructObjectInspector)inputObjInspectors[0]).getTypeName()); + LOG.info("COMMONJOIN " + + ((StructObjectInspector) inputObjInspectors[0]).getTypeName()); totalSz = 0; // Map that contains the rows for each alias storage = new HashMap>>(); @@ -216,32 +238,37 @@ totalSz = populateJoinKeyValue(joinValues, conf.getExprs()); - joinValuesObjectInspectors = getObjectInspectorsFromEvaluators(joinValues, inputObjInspectors); + joinValuesObjectInspectors = getObjectInspectorsFromEvaluators(joinValues, + inputObjInspectors); joinValuesStandardObjectInspectors = getStandardObjectInspectors(joinValuesObjectInspectors); dummyObj = new Object[numAliases]; dummyObjVectors = new RowContainer[numAliases]; - joinEmitInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEJOINEMITINTERVAL); - joinCacheSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEJOINCACHESIZE); - - // construct dummy null row (indicating empty table) and - // construct spill table serde which is used if input is too + joinEmitInterval = HiveConf.getIntVar(hconf, + HiveConf.ConfVars.HIVEJOINEMITINTERVAL); + joinCacheSize = HiveConf.getIntVar(hconf, + HiveConf.ConfVars.HIVEJOINCACHESIZE); + + // construct dummy null row (indicating empty table) and + // construct spill table serde which is used if input is too // large to fit into main memory. byte pos = 0; for (Byte alias : order) { int sz = conf.getExprs().get(alias).size(); ArrayList nr = new ArrayList(sz); - - for (int j = 0; j < sz; j++) + + for (int j = 0; j < sz; j++) { nr.add(null); + } dummyObj[pos] = nr; // there should be only 1 dummy object in the RowContainer - RowContainer> values = getRowContainer(hconf, pos, alias, 1); + RowContainer> values = getRowContainer(hconf, pos, + alias, 1); values.add((ArrayList) dummyObj[pos]); dummyObjVectors[pos] = values; - // if serde is null, the input doesn't need to be spilled out + // if serde is null, the input doesn't need to be spilled out // e.g., the output columns does not contains the input table RowContainer rc = getRowContainer(hconf, pos, alias, joinCacheSize); storage.put(pos, rc); @@ -251,39 +278,45 @@ forwardCache = new Object[totalSz]; - outputObjInspector = getJoinOutputObjectInspector(order, joinValuesStandardObjectInspectors, conf); - LOG.info("JOIN " + ((StructObjectInspector)outputObjInspector).getTypeName() + " totalsz = " + totalSz); - + outputObjInspector = getJoinOutputObjectInspector(order, + joinValuesStandardObjectInspectors, conf); + LOG.info("JOIN " + + ((StructObjectInspector) outputObjInspector).getTypeName() + + " totalsz = " + totalSz); + } - RowContainer getRowContainer(Configuration hconf, byte pos, Byte alias, int containerSize) - throws HiveException { + RowContainer getRowContainer(Configuration hconf, byte pos, Byte alias, + int containerSize) throws HiveException { tableDesc tblDesc = getSpillTableDesc(alias); SerDe serde = getSpillSerDe(alias); - - if ( serde == null ) + + if (serde == null) { containerSize = 1; - + } + RowContainer rc = new RowContainer(containerSize, hconf); StructObjectInspector rcOI = null; - if(tblDesc != null) { - // arbitrary column names used internally for serializing to spill table + if (tblDesc != null) { + // arbitrary column names used internally for serializing to spill table List colNames = Utilities.getColumnNames(tblDesc.getProperties()); // object inspector for serializing input tuples rcOI = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, - joinValuesStandardObjectInspectors.get(pos)); + joinValuesStandardObjectInspectors.get(pos)); } rc.setSerDe(serde, rcOI); rc.setTableDesc(tblDesc); return rc; } - + private SerDe getSpillSerDe(byte alias) { tableDesc desc = getSpillTableDesc(alias); - if ( desc == null ) + if (desc == null) { return null; - SerDe sd = (SerDe) ReflectionUtils.newInstance(desc.getDeserializerClass(), null); + } + SerDe sd = (SerDe) ReflectionUtils.newInstance(desc.getDeserializerClass(), + null); try { sd.initialize(null, desc.getProperties()); } catch (SerDeException e) { @@ -292,64 +325,72 @@ } return sd; } - + transient boolean newGroupStarted = false; - + public tableDesc getSpillTableDesc(Byte alias) { - if(spillTableDesc == null || spillTableDesc.size() == 0) + if (spillTableDesc == null || spillTableDesc.size() == 0) { initSpillTables(); + } return spillTableDesc.get(alias); } - + public Map getSpillTableDesc() { - if(spillTableDesc == null) + if (spillTableDesc == null) { initSpillTables(); + } return spillTableDesc; } - + private void initSpillTables() { Map> exprs = conf.getExprs(); spillTableDesc = new HashMap(exprs.size()); for (int tag = 0; tag < exprs.size(); tag++) { - List valueCols = exprs.get((byte)tag); + List valueCols = exprs.get((byte) tag); int columnSize = valueCols.size(); StringBuffer colNames = new StringBuffer(); StringBuffer colTypes = new StringBuffer(); - if ( columnSize <= 0 ) + if (columnSize <= 0) { continue; + } for (int k = 0; k < columnSize; k++) { - String newColName = tag + "_VALUE_" + k; // any name, it does not matter. + String newColName = tag + "_VALUE_" + k; // any name, it does not + // matter. colNames.append(newColName); - colNames.append(','); - colTypes.append(valueCols.get(k).getTypeString()); - colTypes.append(','); + colNames.append(','); + colTypes.append(valueCols.get(k).getTypeString()); + colTypes.append(','); } // remove the last ',' - colNames.setLength(colNames.length()-1); - colTypes.setLength(colTypes.length()-1); - tableDesc tblDesc = - new tableDesc(LazyBinarySerDe.class, - SequenceFileInputFormat.class, - HiveSequenceFileOutputFormat.class, - Utilities.makeProperties(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, "" + Utilities.ctrlaCode, - org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS, colNames.toString(), - org.apache.hadoop.hive.serde.Constants.LIST_COLUMN_TYPES, colTypes.toString())); - spillTableDesc.put((byte)tag, tblDesc); + colNames.setLength(colNames.length() - 1); + colTypes.setLength(colTypes.length() - 1); + tableDesc tblDesc = new tableDesc(LazyBinarySerDe.class, + SequenceFileInputFormat.class, HiveSequenceFileOutputFormat.class, + Utilities.makeProperties( + org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, "" + + Utilities.ctrlaCode, + org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS, colNames + .toString(), + org.apache.hadoop.hive.serde.Constants.LIST_COLUMN_TYPES, + colTypes.toString())); + spillTableDesc.put((byte) tag, tblDesc); } } - + + @Override public void startGroup() throws HiveException { LOG.trace("Join: Starting new group"); newGroupStarted = true; - for (RowContainer> alw: storage.values()) { + for (RowContainer> alw : storage.values()) { alw.clear(); } } protected int getNextSize(int sz) { // A very simple counter to keep track of join entries for a key - if (sz >= 100000) + if (sz >= 100000) { return sz + 100000; + } return 2 * sz; } @@ -357,19 +398,19 @@ transient protected Byte alias; /** - * Return the value as a standard object. - * StandardObject can be inspected by a standard ObjectInspector. + * Return the value as a standard object. StandardObject can be inspected by a + * standard ObjectInspector. */ protected static ArrayList computeValues(Object row, - List valueFields, List valueFieldsOI) throws HiveException { + List valueFields, List valueFieldsOI) + throws HiveException { // Compute the values ArrayList nr = new ArrayList(valueFields.size()); - for (int i=0; i joinObjectsInnerJoin(ArrayList resNulls, - ArrayList inputNulls, ArrayList newObj, - IntermediateObject intObj, int left, boolean newObjNull) { - if (newObjNull) + private ArrayList joinObjectsInnerJoin( + ArrayList resNulls, ArrayList inputNulls, + ArrayList newObj, IntermediateObject intObj, int left, + boolean newObjNull) { + if (newObjNull) { return resNulls; + } Iterator nullsIter = inputNulls.iterator(); while (nullsIter.hasNext()) { boolean[] oldNulls = nullsIter.next(); @@ -425,14 +469,13 @@ /** * Implement semi join operator. */ - private ArrayList joinObjectsLeftSemiJoin(ArrayList resNulls, - ArrayList inputNulls, - ArrayList newObj, - IntermediateObject intObj, - int left, - boolean newObjNull) { - if (newObjNull) + private ArrayList joinObjectsLeftSemiJoin( + ArrayList resNulls, ArrayList inputNulls, + ArrayList newObj, IntermediateObject intObj, int left, + boolean newObjNull) { + if (newObjNull) { return resNulls; + } Iterator nullsIter = inputNulls.iterator(); while (nullsIter.hasNext()) { boolean[] oldNulls = nullsIter.next(); @@ -457,10 +500,11 @@ boolean oldObjNull = oldNulls[left]; boolean[] newNulls = new boolean[intObj.getCurSize()]; copyOldArray(oldNulls, newNulls); - if (oldObjNull) + if (oldObjNull) { newNulls[oldNulls.length] = true; - else + } else { newNulls[oldNulls.length] = newObjNull; + } resNulls.add(newNulls); } return resNulls; @@ -470,14 +514,16 @@ ArrayList resNulls, ArrayList inputNulls, ArrayList newObj, IntermediateObject intObj, int left, boolean newObjNull, boolean firstRow) { - if (newObjNull) + if (newObjNull) { return resNulls; + } if (inputNulls.isEmpty() && firstRow) { boolean[] newNulls = new boolean[intObj.getCurSize()]; - for (int i = 0; i < intObj.getCurSize() - 1; i++) + for (int i = 0; i < intObj.getCurSize() - 1; i++) { newNulls[i] = true; - newNulls[intObj.getCurSize()-1] = newObjNull; + } + newNulls[intObj.getCurSize() - 1] = newObjNull; resNulls.add(newNulls); return resNulls; } @@ -505,8 +551,9 @@ resNulls.add(newNulls); } else if (allOldObjsNull) { boolean[] newNulls = new boolean[intObj.getCurSize()]; - for (int i = 0; i < intObj.getCurSize() - 1; i++) + for (int i = 0; i < intObj.getCurSize() - 1; i++) { newNulls[i] = true; + } newNulls[oldNulls.length] = newObjNull; resNulls.add(newNulls); return resNulls; @@ -533,9 +580,10 @@ if (inputNulls.isEmpty() && firstRow) { boolean[] newNulls = new boolean[intObj.getCurSize()]; - for (int i = 0; i < intObj.getCurSize() - 1; i++) + for (int i = 0; i < intObj.getCurSize() - 1; i++) { newNulls[i] = true; - newNulls[intObj.getCurSize()-1] = newObjNull; + } + newNulls[intObj.getCurSize() - 1] = newObjNull; resNulls.add(newNulls); return resNulls; } @@ -570,8 +618,9 @@ if (allOldObjsNull && !rhsPreserved) { newNulls = new boolean[intObj.getCurSize()]; - for (int i = 0; i < oldNulls.length; i++) + for (int i = 0; i < oldNulls.length; i++) { newNulls[i] = true; + } newNulls[oldNulls.length] = false; resNulls.add(newNulls); rhsPreserved = true; @@ -589,13 +638,14 @@ * inner join. The outer joins are processed appropriately. */ private ArrayList joinObjects(ArrayList inputNulls, - ArrayList newObj, IntermediateObject intObj, - int joinPos, boolean firstRow) { + ArrayList newObj, IntermediateObject intObj, int joinPos, + boolean firstRow) { ArrayList resNulls = new ArrayList(); boolean newObjNull = newObj == dummyObj[joinPos] ? true : false; if (joinPos == 0) { - if (newObjNull) + if (newObjNull) { return null; + } boolean[] nulls = new boolean[1]; nulls[0] = newObjNull; resNulls.add(nulls); @@ -609,32 +659,35 @@ if (((type == joinDesc.RIGHT_OUTER_JOIN) || (type == joinDesc.FULL_OUTER_JOIN)) && !newObjNull && (inputNulls == null) && firstRow) { boolean[] newNulls = new boolean[intObj.getCurSize()]; - for (int i = 0; i < newNulls.length - 1; i++) + for (int i = 0; i < newNulls.length - 1; i++) { newNulls[i] = true; + } newNulls[newNulls.length - 1] = false; resNulls.add(newNulls); return resNulls; } - if (inputNulls == null) + if (inputNulls == null) { return null; + } - if (type == joinDesc.INNER_JOIN) + if (type == joinDesc.INNER_JOIN) { return joinObjectsInnerJoin(resNulls, inputNulls, newObj, intObj, left, newObjNull); - else if (type == joinDesc.LEFT_OUTER_JOIN) + } else if (type == joinDesc.LEFT_OUTER_JOIN) { return joinObjectsLeftOuterJoin(resNulls, inputNulls, newObj, intObj, left, newObjNull); - else if (type == joinDesc.RIGHT_OUTER_JOIN) + } else if (type == joinDesc.RIGHT_OUTER_JOIN) { return joinObjectsRightOuterJoin(resNulls, inputNulls, newObj, intObj, - left, newObjNull, firstRow); - else if (type == joinDesc.LEFT_SEMI_JOIN) + left, newObjNull, firstRow); + } else if (type == joinDesc.LEFT_SEMI_JOIN) { return joinObjectsLeftSemiJoin(resNulls, inputNulls, newObj, intObj, - left, newObjNull); + left, newObjNull); + } assert (type == joinDesc.FULL_OUTER_JOIN); return joinObjectsFullOuterJoin(resNulls, inputNulls, newObj, intObj, left, - newObjNull, firstRow); + newObjNull, firstRow); } /* @@ -645,7 +698,7 @@ * are accounted for, the output is forwared appropriately. */ private void genObject(ArrayList inputNulls, int aliasNum, - IntermediateObject intObj, boolean firstRow) throws HiveException { + IntermediateObject intObj, boolean firstRow) throws HiveException { boolean childFirstRow = firstRow; boolean skipping = false; @@ -653,23 +706,22 @@ // search for match in the rhs table RowContainer> aliasRes = storage.get(order[aliasNum]); - - for (ArrayList newObj = aliasRes.first(); - newObj != null; - newObj = aliasRes.next()) { + for (ArrayList newObj = aliasRes.first(); newObj != null; newObj = aliasRes + .next()) { + // check for skipping in case of left semi join - if (aliasNum > 0 && - condn[aliasNum - 1].getType() == joinDesc.LEFT_SEMI_JOIN && - newObj != dummyObj[aliasNum] ) { // successful match + if (aliasNum > 0 + && condn[aliasNum - 1].getType() == joinDesc.LEFT_SEMI_JOIN + && newObj != dummyObj[aliasNum]) { // successful match skipping = true; } intObj.pushObj(newObj); - + // execute the actual join algorithm - ArrayList newNulls = joinObjects(inputNulls, newObj, intObj, - aliasNum, childFirstRow); + ArrayList newNulls = joinObjects(inputNulls, newObj, intObj, + aliasNum, childFirstRow); // recursively call the join the other rhs tables genObject(newNulls, aliasNum + 1, intObj, firstRow); @@ -677,14 +729,16 @@ intObj.popObj(); firstRow = false; - // if left-semi-join found a match, skipping the rest of the rows in the rhs table of the semijoin - if ( skipping ) { + // if left-semi-join found a match, skipping the rest of the rows in the + // rhs table of the semijoin + if (skipping) { break; } } } else { - if (inputNulls == null) + if (inputNulls == null) { return; + } Iterator nullsIter = inputNulls.iterator(); while (nullsIter.hasNext()) { boolean[] nullsVec = nullsIter.next(); @@ -695,18 +749,18 @@ /** * Forward a record of join results. - * + * * @throws HiveException */ + @Override public void endGroup() throws HiveException { LOG.trace("Join Op: endGroup called: numValues=" + numAliases); - checkAndGenObject(); } private void genUniqueJoinObject(int aliasNum, IntermediateObject intObj) - throws HiveException { + throws HiveException { if (aliasNum == numAliases) { int p = 0; for (int i = 0; i < numAliases; i++) { @@ -722,33 +776,30 @@ } RowContainer> alias = storage.get(order[aliasNum]); - for (ArrayList row = alias.first(); - row != null; - row = alias.next() ) { + for (ArrayList row = alias.first(); row != null; row = alias.next()) { intObj.pushObj(row); - genUniqueJoinObject(aliasNum+1, intObj); + genUniqueJoinObject(aliasNum + 1, intObj); intObj.popObj(); } } protected void checkAndGenObject() throws HiveException { if (condn[0].getType() == joinDesc.UNIQUE_JOIN) { - IntermediateObject intObj = - new IntermediateObject(new ArrayList[numAliases], 0); + new IntermediateObject(new ArrayList[numAliases], 0); // Check if results need to be emitted. // Results only need to be emitted if there is a non-null entry in a table // that is preserved or if there are no non-null entries boolean preserve = false; // Will be true if there is a non-null entry - // in a preserved table + // in a preserved table boolean hasNulls = false; // Will be true if there are null entries for (int i = 0; i < numAliases; i++) { Byte alias = order[i]; - RowContainer> alw = storage.get(alias); - if ( alw.size() == 0) { - alw.add((ArrayList)dummyObj[i]); + RowContainer> alw = storage.get(alias); + if (alw.size() == 0) { + alw.add((ArrayList) dummyObj[i]); hasNulls = true; - } else if(condn[i].getPreserved()) { + } else if (condn[i].getPreserved()) { preserve = true; } } @@ -758,38 +809,42 @@ } LOG.trace("calling genUniqueJoinObject"); - genUniqueJoinObject(0, new IntermediateObject(new ArrayList[numAliases], 0)); + genUniqueJoinObject(0, new IntermediateObject(new ArrayList[numAliases], + 0)); LOG.trace("called genUniqueJoinObject"); } else { // does any result need to be emitted for (int i = 0; i < numAliases; i++) { Byte alias = order[i]; - RowContainer> alw = storage.get(alias); + RowContainer> alw = storage.get(alias); if (alw.size() == 0) { if (noOuterJoin) { LOG.trace("No data for alias=" + i); return; } else { - alw.add((ArrayList)dummyObj[i]); + alw.add((ArrayList) dummyObj[i]); } } } LOG.trace("calling genObject"); - genObject(null, 0, new IntermediateObject(new ArrayList[numAliases], 0), true); + genObject(null, 0, new IntermediateObject(new ArrayList[numAliases], 0), + true); LOG.trace("called genObject"); } } /** * All done - * + * */ + @Override public void closeOp(boolean abort) throws HiveException { LOG.trace("Join Op close"); - for ( RowContainer> alw: storage.values() ) { - if(alw != null) //it maybe null for mapjoins - alw.clear(); // clean up the temp files + for (RowContainer> alw : storage.values()) { + if (alw != null) { + alw.clear(); // clean up the temp files + } } storage.clear(); } @@ -807,7 +862,8 @@ } /** - * @param posToAliasMap the posToAliasMap to set + * @param posToAliasMap + * the posToAliasMap to set */ public void setPosToAliasMap(Map> posToAliasMap) { this.posToAliasMap = posToAliasMap; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (working copy) @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.exec; +import static org.apache.commons.lang.StringUtils.join; +import static org.apache.hadoop.util.StringUtils.stringifyException; + import java.io.BufferedWriter; import java.io.DataOutput; import java.io.FileNotFoundException; @@ -57,7 +60,6 @@ import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveMetaStoreChecker; -import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.InvalidTableException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; @@ -86,19 +88,16 @@ import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.shims.ShimLoader; -import static org.apache.hadoop.util.StringUtils.stringifyException; -import static org.apache.commons.lang.StringUtils.join; - /** * DDLTask implementation - * + * **/ public class DDLTask extends Task implements Serializable { private static final long serialVersionUID = 1L; static final private Log LOG = LogFactory.getLog("hive.ql.exec.DDLTask"); transient HiveConf conf; - static final private int separator = Utilities.tabCode; + static final private int separator = Utilities.tabCode; static final private int terminator = Utilities.newLineCode; public DDLTask() { @@ -142,7 +141,7 @@ if (crtView != null) { return createView(db, crtView); } - + AddPartitionDesc addPartitionDesc = work.getAddPartitionDesc(); if (addPartitionDesc != null) { return addPartition(db, addPartitionDesc); @@ -188,11 +187,13 @@ LOG.debug(stringifyException(e)); return 1; } catch (HiveException e) { - console.printError("FAILED: Error in metadata: " + e.getMessage(), "\n" + stringifyException(e)); + console.printError("FAILED: Error in metadata: " + e.getMessage(), "\n" + + stringifyException(e)); LOG.debug(stringifyException(e)); return 1; } catch (Exception e) { - console.printError("Failed with exception " + e.getMessage(), "\n" + stringifyException(e)); + console.printError("Failed with exception " + e.getMessage(), "\n" + + stringifyException(e)); return (1); } assert false; @@ -201,44 +202,48 @@ /** * Add a partition to a table. - * @param db Database to add the partition to. - * @param addPartitionDesc Add this partition. + * + * @param db + * Database to add the partition to. + * @param addPartitionDesc + * Add this partition. * @return Returns 0 when execution succeeds and above 0 if it fails. * @throws HiveException */ private int addPartition(Hive db, AddPartitionDesc addPartitionDesc) - throws HiveException { + throws HiveException { - Table tbl = db.getTable(addPartitionDesc.getDbName(), - addPartitionDesc.getTableName()); + Table tbl = db.getTable(addPartitionDesc.getDbName(), addPartitionDesc + .getTableName()); if (tbl.isView()) { throw new HiveException("Cannot use ALTER TABLE on a view"); } - if(addPartitionDesc.getLocation() == null) { + if (addPartitionDesc.getLocation() == null) { db.createPartition(tbl, addPartitionDesc.getPartSpec()); } else { - //set partition path relative to table - db.createPartition(tbl, addPartitionDesc.getPartSpec(), - new Path(tbl.getPath(), addPartitionDesc.getLocation())); + // set partition path relative to table + db.createPartition(tbl, addPartitionDesc.getPartSpec(), new Path(tbl + .getPath(), addPartitionDesc.getLocation())); } - Partition part = db.getPartition(tbl, addPartitionDesc.getPartSpec(), false); + Partition part = db + .getPartition(tbl, addPartitionDesc.getPartSpec(), false); work.getOutputs().add(new WriteEntity(part)); return 0; } /** - * MetastoreCheck, see if the data in the metastore matches - * what is on the dfs. - * Current version checks for tables and partitions that - * are either missing on disk on in the metastore. - * - * @param db The database in question. - * @param msckDesc Information about the tables and partitions - * we want to check for. + * MetastoreCheck, see if the data in the metastore matches what is on the + * dfs. Current version checks for tables and partitions that are either + * missing on disk on in the metastore. + * + * @param db + * The database in question. + * @param msckDesc + * Information about the tables and partitions we want to check for. * @return Returns 0 when execution succeeds and above 0 if it fails. */ private int msck(Hive db, MsckDesc msckDesc) { @@ -246,19 +251,17 @@ List repairOutput = new ArrayList(); try { HiveMetaStoreChecker checker = new HiveMetaStoreChecker(db); - checker.checkMetastore( - MetaStoreUtils.DEFAULT_DATABASE_NAME, msckDesc.getTableName(), - msckDesc.getPartitionSpec(), - result); - if(msckDesc.isRepairPartitions()) { + checker.checkMetastore(MetaStoreUtils.DEFAULT_DATABASE_NAME, msckDesc + .getTableName(), msckDesc.getPartitionSpec(), result); + if (msckDesc.isRepairPartitions()) { Table table = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, msckDesc.getTableName()); for (CheckResult.PartitionResult part : result.getPartitionsNotInMs()) { try { - db.createPartition(table, - Warehouse.makeSpecFromName(part.getPartitionName())); - repairOutput.add("Repair: Added partition to metastore " + msckDesc.getTableName() - + ':' + part.getPartitionName()); + db.createPartition(table, Warehouse.makeSpecFromName(part + .getPartitionName())); + repairOutput.add("Repair: Added partition to metastore " + + msckDesc.getTableName() + ':' + part.getPartitionName()); } catch (Exception e) { LOG.warn("Repair error, could not add partition to metastore: ", e); } @@ -274,8 +277,8 @@ BufferedWriter resultOut = null; try { FileSystem fs = msckDesc.getResFile().getFileSystem(conf); - resultOut = new BufferedWriter( - new OutputStreamWriter(fs.create(msckDesc.getResFile()))); + resultOut = new BufferedWriter(new OutputStreamWriter(fs + .create(msckDesc.getResFile()))); boolean firstWritten = false; firstWritten |= writeMsckResult(result.getTablesNotInMs(), @@ -298,7 +301,7 @@ LOG.warn("Failed to save metacheck output: ", e); return 1; } finally { - if(resultOut != null) { + if (resultOut != null) { try { resultOut.close(); } catch (IOException e) { @@ -314,18 +317,24 @@ /** * Write the result of msck to a writer. - * @param result The result we're going to write - * @param msg Message to write. - * @param out Writer to write to - * @param wrote if any previous call wrote data + * + * @param result + * The result we're going to write + * @param msg + * Message to write. + * @param out + * Writer to write to + * @param wrote + * if any previous call wrote data * @return true if something was written - * @throws IOException In case the writing fails + * @throws IOException + * In case the writing fails */ private boolean writeMsckResult(List result, String msg, Writer out, boolean wrote) throws IOException { - if(!result.isEmpty()) { - if(wrote) { + if (!result.isEmpty()) { + if (wrote) { out.write(terminator); } @@ -342,14 +351,17 @@ /** * Write a list of partitions to a file. - * - * @param db The database in question. - * @param showParts These are the partitions we're interested in. + * + * @param db + * The database in question. + * @param showParts + * These are the partitions we're interested in. * @return Returns 0 when execution succeeds and above 0 if it fails. - * @throws HiveException Throws this exception if an unexpected error occurs. + * @throws HiveException + * Throws this exception if an unexpected error occurs. */ - private int showPartitions(Hive db, - showPartitionsDesc showParts) throws HiveException { + private int showPartitions(Hive db, showPartitionsDesc showParts) + throws HiveException { // get the partitions for the table and populate the output String tabName = showParts.getTabName(); Table tbl = null; @@ -376,7 +388,7 @@ outStream.writeBytes(iterParts.next()); outStream.write(terminator); } - ((FSDataOutputStream)outStream).close(); + ((FSDataOutputStream) outStream).close(); } catch (FileNotFoundException e) { LOG.info("show partitions: " + stringifyException(e)); throw new HiveException(e.toString()); @@ -392,27 +404,30 @@ /** * Write a list of the tables in the database to a file. - * - * @param db The database in question. - * @param showTbls These are the tables we're interested in. + * + * @param db + * The database in question. + * @param showTbls + * These are the tables we're interested in. * @return Returns 0 when execution succeeds and above 0 if it fails. - * @throws HiveException Throws this exception if an unexpected error occurs. + * @throws HiveException + * Throws this exception if an unexpected error occurs. */ - private int showTables(Hive db, showTablesDesc showTbls) - throws HiveException { + private int showTables(Hive db, showTablesDesc showTbls) throws HiveException { // get the tables for the desired pattenn - populate the output stream List tbls = null; if (showTbls.getPattern() != null) { LOG.info("pattern: " + showTbls.getPattern()); tbls = db.getTablesByPattern(showTbls.getPattern()); LOG.info("results : " + tbls.size()); - } else + } else { tbls = db.getAllTables(); + } // write the results in the file try { FileSystem fs = showTbls.getResFile().getFileSystem(conf); - DataOutput outStream = (DataOutput)fs.create(showTbls.getResFile()); + DataOutput outStream = (DataOutput) fs.create(showTbls.getResFile()); SortedSet sortedTbls = new TreeSet(tbls); Iterator iterTbls = sortedTbls.iterator(); @@ -421,7 +436,7 @@ outStream.writeBytes(iterTbls.next()); outStream.write(terminator); } - ((FSDataOutputStream)outStream).close(); + ((FSDataOutputStream) outStream).close(); } catch (FileNotFoundException e) { LOG.warn("show table: " + stringifyException(e)); return 1; @@ -436,26 +451,28 @@ /** * Write a list of the user defined functions to a file. - * - * @param showFuncs are the functions we're interested in. + * + * @param showFuncs + * are the functions we're interested in. * @return Returns 0 when execution succeeds and above 0 if it fails. - * @throws HiveException Throws this exception if an unexpected error occurs. + * @throws HiveException + * Throws this exception if an unexpected error occurs. */ - private int showFunctions(showFunctionsDesc showFuncs) - throws HiveException { + private int showFunctions(showFunctionsDesc showFuncs) throws HiveException { // get the tables for the desired pattenn - populate the output stream Set funcs = null; if (showFuncs.getPattern() != null) { LOG.info("pattern: " + showFuncs.getPattern()); funcs = FunctionRegistry.getFunctionNames(showFuncs.getPattern()); LOG.info("results : " + funcs.size()); - } else + } else { funcs = FunctionRegistry.getFunctionNames(); + } // write the results in the file try { FileSystem fs = showFuncs.getResFile().getFileSystem(conf); - DataOutput outStream = (DataOutput)fs.create(showFuncs.getResFile()); + DataOutput outStream = (DataOutput) fs.create(showFuncs.getResFile()); SortedSet sortedFuncs = new TreeSet(funcs); Iterator iterFuncs = sortedFuncs.iterator(); @@ -464,7 +481,7 @@ outStream.writeBytes(iterFuncs.next()); outStream.write(terminator); } - ((FSDataOutputStream)outStream).close(); + ((FSDataOutputStream) outStream).close(); } catch (FileNotFoundException e) { LOG.warn("show function: " + stringifyException(e)); return 1; @@ -479,18 +496,18 @@ /** * Shows a description of a function. - * - * @param descFunc is the function we are describing + * + * @param descFunc + * is the function we are describing * @throws HiveException */ - private int describeFunction(descFunctionDesc descFunc) - throws HiveException { + private int describeFunction(descFunctionDesc descFunc) throws HiveException { String funcName = descFunc.getName(); // write the results in the file try { FileSystem fs = descFunc.getResFile().getFileSystem(conf); - DataOutput outStream = (DataOutput)fs.create(descFunc.getResFile()); + DataOutput outStream = (DataOutput) fs.create(descFunc.getResFile()); // get the function documentation description desc = null; @@ -504,18 +521,20 @@ } if (desc != null) { outStream.writeBytes(desc.value().replace("_FUNC_", funcName)); - if(descFunc.isExtended()) { + if (descFunc.isExtended()) { Set synonyms = FunctionRegistry.getFunctionSynonyms(funcName); if (synonyms.size() > 0) { outStream.writeBytes("\nSynonyms: " + join(synonyms, ", ")); } if (desc.extended().length() > 0) { - outStream.writeBytes("\n"+desc.extended().replace("_FUNC_", funcName)); + outStream.writeBytes("\n" + + desc.extended().replace("_FUNC_", funcName)); } } } else { if (funcClass != null) { - outStream.writeBytes("There is no documentation for function '" + funcName + "'"); + outStream.writeBytes("There is no documentation for function '" + + funcName + "'"); } else { outStream.writeBytes("Function '" + funcName + "' does not exist."); } @@ -523,7 +542,7 @@ outStream.write(terminator); - ((FSDataOutputStream)outStream).close(); + ((FSDataOutputStream) outStream).close(); } catch (FileNotFoundException e) { LOG.warn("describe function: " + stringifyException(e)); return 1; @@ -536,16 +555,17 @@ return 0; } - /** * Write the status of tables to a file. - * - * @param db The database in question. - * @param showTblStatus tables we are interested in + * + * @param db + * The database in question. + * @param showTblStatus + * tables we are interested in * @return Return 0 when execution succeeds and above 0 if it fails. */ private int showTableStatus(Hive db, showTableStatusDesc showTblStatus) - throws HiveException { + throws HiveException { // get the tables for the desired pattenn - populate the output stream List tbls = new ArrayList
(); Map part = showTblStatus.getPartSpec(); @@ -582,27 +602,28 @@ String tableName = tbl.getName(); String tblLoc = null; String inputFormattCls = null; - String outputFormattCls = null; - if (part != null) { - if(par !=null) { - tblLoc = par.getDataLocation().toString(); - inputFormattCls = par.getTPartition().getSd().getInputFormat(); - outputFormattCls = par.getTPartition().getSd().getOutputFormat(); - } - } else { - tblLoc = tbl.getDataLocation().toString(); - inputFormattCls = tbl.getInputFormatClass().getName(); - outputFormattCls = tbl.getOutputFormatClass().getName(); - } - + String outputFormattCls = null; + if (part != null) { + if (par != null) { + tblLoc = par.getDataLocation().toString(); + inputFormattCls = par.getTPartition().getSd().getInputFormat(); + outputFormattCls = par.getTPartition().getSd().getOutputFormat(); + } + } else { + tblLoc = tbl.getDataLocation().toString(); + inputFormattCls = tbl.getInputFormatClass().getName(); + outputFormattCls = tbl.getOutputFormatClass().getName(); + } + String owner = tbl.getOwner(); List cols = tbl.getCols(); String ddlCols = MetaStoreUtils.getDDLFromFieldSchema("columns", cols); boolean isPartitioned = tbl.isPartitioned(); String partitionCols = ""; - if (isPartitioned) + if (isPartitioned) { partitionCols = MetaStoreUtils.getDDLFromFieldSchema( "partition_columns", tbl.getPartCols()); + } outStream.writeBytes("tableName:" + tableName); outStream.write(terminator); @@ -626,7 +647,8 @@ if (isPartitioned) { if (par == null) { for (Partition curPart : db.getPartitions(tbl)) { - locations.add(new Path(curPart.getTPartition().getSd().getLocation())); + locations.add(new Path(curPart.getTPartition().getSd() + .getLocation())); } } else { locations.add(new Path(par.getTPartition().getSd().getLocation())); @@ -653,20 +675,24 @@ /** * Write the description of a table to a file. - * - * @param db The database in question. - * @param descTbl This is the table we're interested in. + * + * @param db + * The database in question. + * @param descTbl + * This is the table we're interested in. * @return Returns 0 when execution succeeds and above 0 if it fails. - * @throws HiveException Throws this exception if an unexpected error occurs. + * @throws HiveException + * Throws this exception if an unexpected error occurs. */ private int describeTable(Hive db, descTableDesc descTbl) - throws HiveException { + throws HiveException { String colPath = descTbl.getTableName(); String tableName = colPath.substring(0, colPath.indexOf('.') == -1 ? colPath.length() : colPath.indexOf('.')); // describe the table - populate the output stream - Table tbl = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName, false); + Table tbl = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName, + false); Partition part = null; try { if (tbl == null) { @@ -712,7 +738,7 @@ cols = Hive.getFieldsFromDeserializer(colPath, tbl.getDeserializer()); } FileSystem fs = descTbl.getResFile().getFileSystem(conf); - DataOutput outStream = (DataOutput)fs.create(descTbl.getResFile()); + DataOutput outStream = (DataOutput) fs.create(descTbl.getResFile()); Iterator iterCols = cols.iterator(); while (iterCols.hasNext()) { // create a row per column @@ -735,7 +761,8 @@ outStream.write(separator); outStream.writeBytes(col.getType()); outStream.write(separator); - outStream.writeBytes(col.getComment() == null ? "" : col.getComment()); + outStream + .writeBytes(col.getComment() == null ? "" : col.getComment()); outStream.write(terminator); } @@ -800,7 +827,8 @@ tmpStatus = fs.getFileStatus(locations.get(0)); } } catch (IOException e) { - LOG.warn("Cannot access File System. File System status will be unknown: ", e); + LOG.warn( + "Cannot access File System. File System status will be unknown: ", e); unknown = true; } @@ -813,28 +841,37 @@ long updateTime = status.getModificationTime(); // no matter loc is the table location or part location, it must be a // directory. - if (!status.isDir()) + if (!status.isDir()) { continue; - if (accessTime > lastAccessTime) + } + if (accessTime > lastAccessTime) { lastAccessTime = accessTime; - if (updateTime > lastUpdateTime) + } + if (updateTime > lastUpdateTime) { lastUpdateTime = updateTime; + } for (FileStatus currentStatus : files) { - if (currentStatus.isDir()) + if (currentStatus.isDir()) { continue; + } numOfFiles++; long fileLen = currentStatus.getLen(); totalFileSize += fileLen; - if (fileLen > maxFileSize) + if (fileLen > maxFileSize) { maxFileSize = fileLen; - if (fileLen < minFileSize) + } + if (fileLen < minFileSize) { minFileSize = fileLen; - accessTime = ShimLoader.getHadoopShims().getAccessTime(currentStatus); + } + accessTime = ShimLoader.getHadoopShims().getAccessTime( + currentStatus); updateTime = currentStatus.getModificationTime(); - if (accessTime > lastAccessTime) + if (accessTime > lastAccessTime) { lastAccessTime = accessTime; - if (updateTime > lastUpdateTime) + } + if (updateTime > lastUpdateTime) { lastUpdateTime = updateTime; + } } } catch (IOException e) { // ignore @@ -843,59 +880,69 @@ } String unknownString = "unknown"; - for (int k = 0; k < indent; k++) + for (int k = 0; k < indent; k++) { outStream.writeBytes(Utilities.INDENT); + } outStream.writeBytes("totalNumberFiles:"); outStream.writeBytes(unknown ? unknownString : "" + numOfFiles); outStream.write(terminator); - for (int k = 0; k < indent; k++) + for (int k = 0; k < indent; k++) { outStream.writeBytes(Utilities.INDENT); + } outStream.writeBytes("totalFileSize:"); outStream.writeBytes(unknown ? unknownString : "" + totalFileSize); outStream.write(terminator); - for (int k = 0; k < indent; k++) + for (int k = 0; k < indent; k++) { outStream.writeBytes(Utilities.INDENT); + } outStream.writeBytes("maxFileSize:"); outStream.writeBytes(unknown ? unknownString : "" + maxFileSize); outStream.write(terminator); - for (int k = 0; k < indent; k++) + for (int k = 0; k < indent; k++) { outStream.writeBytes(Utilities.INDENT); + } outStream.writeBytes("minFileSize:"); - if (numOfFiles > 0) + if (numOfFiles > 0) { outStream.writeBytes(unknown ? unknownString : "" + minFileSize); - else + } else { outStream.writeBytes(unknown ? unknownString : "" + 0); + } outStream.write(terminator); - for (int k = 0; k < indent; k++) + for (int k = 0; k < indent; k++) { outStream.writeBytes(Utilities.INDENT); + } outStream.writeBytes("lastAccessTime:"); outStream.writeBytes((unknown || lastAccessTime < 0) ? unknownString : "" + lastAccessTime); outStream.write(terminator); - for (int k = 0; k < indent; k++) + for (int k = 0; k < indent; k++) { outStream.writeBytes(Utilities.INDENT); + } outStream.writeBytes("lastUpdateTime:"); outStream.writeBytes(unknown ? unknownString : "" + lastUpdateTime); outStream.write(terminator); } - /** * Alter a given table. - * - * @param db The database in question. - * @param alterTbl This is the table we're altering. + * + * @param db + * The database in question. + * @param alterTbl + * This is the table we're altering. * @return Returns 0 when execution succeeds and above 0 if it fails. - * @throws HiveException Throws this exception if an unexpected error occurs. + * @throws HiveException + * Throws this exception if an unexpected error occurs. */ private int alterTable(Hive db, alterTableDesc alterTbl) throws HiveException { // alter the table - Table tbl = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, alterTbl.getOldName()); + Table tbl = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, alterTbl + .getOldName()); if (tbl.isView()) { throw new HiveException("Cannot use ALTER TABLE on a view"); @@ -904,11 +951,11 @@ if (alterTbl.getOp() == alterTableDesc.alterTableTypes.RENAME) { tbl.getTTable().setTableName(alterTbl.getNewName()); - } - else if (alterTbl.getOp() == alterTableDesc.alterTableTypes.ADDCOLS) { + } else if (alterTbl.getOp() == alterTableDesc.alterTableTypes.ADDCOLS) { List newCols = alterTbl.getNewCols(); List oldCols = tbl.getCols(); - if (tbl.getSerializationLib().equals("org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) { + if (tbl.getSerializationLib().equals( + "org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) { console .printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe"); tbl.setSerializationLib(LazySimpleSerDe.class.getName()); @@ -942,72 +989,73 @@ boolean first = alterTbl.getFirst(); String afterCol = alterTbl.getAfterCol(); FieldSchema column = null; - + boolean found = false; int position = -1; - if(first) + if (first) { position = 0; - + } + int i = 1; - while(iterOldCols.hasNext()) { + while (iterOldCols.hasNext()) { FieldSchema col = iterOldCols.next(); String oldColName = col.getName(); if (oldColName.equalsIgnoreCase(newName) && !oldColName.equalsIgnoreCase(oldName)) { console.printError("Column '" + newName + "' exists"); return 1; - } else if (oldColName.equalsIgnoreCase(oldName)) { + } else if (oldColName.equalsIgnoreCase(oldName)) { col.setName(newName); if (type != null && !type.trim().equals("")) { col.setType(type); } - if(comment != null) + if (comment != null) { col.setComment(comment); + } found = true; - if(first || (afterCol!=null&& !afterCol.trim().equals(""))) { + if (first || (afterCol != null && !afterCol.trim().equals(""))) { column = col; continue; } - } - + } + if (afterCol != null && !afterCol.trim().equals("") && oldColName.equalsIgnoreCase(afterCol)) { position = i; } - + i++; newCols.add(col); } - - //did not find the column - if(!found) { + + // did not find the column + if (!found) { console.printError("Column '" + oldName + "' does not exist"); return 1; } - //after column is not null, but we did not find it. + // after column is not null, but we did not find it. if ((afterCol != null && !afterCol.trim().equals("")) && position < 0) { console.printError("Column '" + afterCol + "' does not exist"); return 1; } - - if (position >= 0) + + if (position >= 0) { newCols.add(position, column); - + } + tbl.getTTable().getSd().setCols(newCols); } else if (alterTbl.getOp() == alterTableDesc.alterTableTypes.REPLACECOLS) { // change SerDe to LazySimpleSerDe if it is columnsetSerDe - if (tbl.getSerializationLib().equals("org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) { + if (tbl.getSerializationLib().equals( + "org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) { console .printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe"); tbl.setSerializationLib(LazySimpleSerDe.class.getName()); } else if (!tbl.getSerializationLib().equals( MetadataTypedColumnsetSerDe.class.getName()) - && !tbl.getSerializationLib().equals( - LazySimpleSerDe.class.getName()) - && !tbl.getSerializationLib().equals( - ColumnarSerDe.class.getName()) - && !tbl.getSerializationLib().equals( - DynamicSerDe.class.getName())) { + && !tbl.getSerializationLib().equals(LazySimpleSerDe.class.getName()) + && !tbl.getSerializationLib().equals(ColumnarSerDe.class.getName()) + && !tbl.getSerializationLib().equals(DynamicSerDe.class.getName())) { console .printError("Replace columns is not supported for this table. SerDe may be incompatible."); return 1; @@ -1020,28 +1068,33 @@ alterTbl.getProps()); } else if (alterTbl.getOp() == alterTableDesc.alterTableTypes.ADDSERDE) { tbl.setSerializationLib(alterTbl.getSerdeName()); - if ((alterTbl.getProps() != null) && (alterTbl.getProps().size() > 0)) + if ((alterTbl.getProps() != null) && (alterTbl.getProps().size() > 0)) { tbl.getTTable().getSd().getSerdeInfo().getParameters().putAll( alterTbl.getProps()); + } // since serde is modified then do the appropriate things to reset columns // etc tbl.reinitSerDe(); tbl.setFields(Hive.getFieldsFromDeserializer(tbl.getName(), tbl .getDeserializer())); } else if (alterTbl.getOp() == alterTableDesc.alterTableTypes.ADDFILEFORMAT) { - tbl.getTTable().getSd().setInputFormat(alterTbl.getInputFormat()); - tbl.getTTable().getSd().setOutputFormat(alterTbl.getOutputFormat()); - if (alterTbl.getSerdeName() != null) - tbl.setSerializationLib(alterTbl.getSerdeName()); + tbl.getTTable().getSd().setInputFormat(alterTbl.getInputFormat()); + tbl.getTTable().getSd().setOutputFormat(alterTbl.getOutputFormat()); + if (alterTbl.getSerdeName() != null) { + tbl.setSerializationLib(alterTbl.getSerdeName()); + } } else if (alterTbl.getOp() == alterTableDesc.alterTableTypes.ADDCLUSTERSORTCOLUMN) { - //validate sort columns and bucket columns - List columns = Utilities.getColumnNamesFromFieldSchema(tbl.getCols()); - Utilities.validateColumnNames(columns, alterTbl.getBucketColumns()); - if (alterTbl.getSortColumns() != null) - Utilities.validateColumnNames(columns, Utilities.getColumnNamesFromSortCols(alterTbl.getSortColumns())); - tbl.getTTable().getSd().setBucketCols(alterTbl.getBucketColumns()); - tbl.getTTable().getSd().setNumBuckets(alterTbl.getNumberBuckets()); - tbl.getTTable().getSd().setSortCols(alterTbl.getSortColumns()); + // validate sort columns and bucket columns + List columns = Utilities.getColumnNamesFromFieldSchema(tbl + .getCols()); + Utilities.validateColumnNames(columns, alterTbl.getBucketColumns()); + if (alterTbl.getSortColumns() != null) { + Utilities.validateColumnNames(columns, Utilities + .getColumnNamesFromSortCols(alterTbl.getSortColumns())); + } + tbl.getTTable().getSd().setBucketCols(alterTbl.getBucketColumns()); + tbl.getTTable().getSd().setNumBuckets(alterTbl.getNumberBuckets()); + tbl.getTTable().getSd().setSortCols(alterTbl.getSortColumns()); } else { console.printError("Unsupported Alter commnad"); return 1; @@ -1051,7 +1104,8 @@ try { tbl.setProperty("last_modified_by", conf.getUser()); } catch (IOException e) { - console.printError("Unable to get current user: " + e.getMessage(), stringifyException(e)); + console.printError("Unable to get current user: " + e.getMessage(), + stringifyException(e)); return 1; } tbl.setProperty("last_modified_time", Long.toString(System @@ -1060,7 +1114,8 @@ try { tbl.checkValidity(); } catch (HiveException e) { - console.printError("Invalid table columns : " + e.getMessage(), stringifyException(e)); + console.printError("Invalid table columns : " + e.getMessage(), + stringifyException(e)); return 1; } @@ -1074,8 +1129,10 @@ return 1; } - // This is kind of hacky - the read entity contains the old table, whereas the write entity - // contains the new table. This is needed for rename - both the old and the new table names are + // This is kind of hacky - the read entity contains the old table, whereas + // the write entity + // contains the new table. This is needed for rename - both the old and the + // new table names are // passed work.getInputs().add(new ReadEntity(oldTbl)); work.getOutputs().add(new WriteEntity(tbl)); @@ -1084,18 +1141,23 @@ /** * Drop a given table. - * - * @param db The database in question. - * @param dropTbl This is the table we're dropping. + * + * @param db + * The database in question. + * @param dropTbl + * This is the table we're dropping. * @return Returns 0 when execution succeeds and above 0 if it fails. - * @throws HiveException Throws this exception if an unexpected error occurs. + * @throws HiveException + * Throws this exception if an unexpected error occurs. */ private int dropTable(Hive db, dropTableDesc dropTbl) throws HiveException { - // We need to fetch the table before it is dropped so that it can be passed to + // We need to fetch the table before it is dropped so that it can be passed + // to // post-execution hook Table tbl = null; try { - tbl = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, dropTbl.getTableName()); + tbl = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, dropTbl + .getTableName()); } catch (InvalidTableException e) { // drop table is idempotent } @@ -1104,39 +1166,45 @@ if (tbl.isView()) { if (!dropTbl.getExpectView()) { throw new HiveException("Cannot drop a view with DROP TABLE"); - } + } } else { if (dropTbl.getExpectView()) { throw new HiveException("Cannot drop a base table with DROP VIEW"); - } + } } } if (dropTbl.getPartSpecs() == null) { // drop the table - db.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, dropTbl.getTableName()); - if (tbl != null) + db + .dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, dropTbl + .getTableName()); + if (tbl != null) { work.getOutputs().add(new WriteEntity(tbl)); + } } else { // get all partitions of the table - List partitionNames = db.getPartitionNames(MetaStoreUtils.DEFAULT_DATABASE_NAME, dropTbl.getTableName(), (short)-1); + List partitionNames = db.getPartitionNames( + MetaStoreUtils.DEFAULT_DATABASE_NAME, dropTbl.getTableName(), + (short) -1); Set> partitions = new HashSet>(); for (int i = 0; i < partitionNames.size(); i++) { try { partitions.add(Warehouse.makeSpecFromName(partitionNames.get(i))); } catch (MetaException e) { - LOG.warn("Unrecognized partition name from metastore: " + partitionNames.get(i)); + LOG.warn("Unrecognized partition name from metastore: " + + partitionNames.get(i)); } } // drop partitions in the list List partsToDelete = new ArrayList(); for (Map partSpec : dropTbl.getPartSpecs()) { - Iterator> it = partitions.iterator(); + Iterator> it = partitions.iterator(); while (it.hasNext()) { Map part = it.next(); // test if partSpec matches part boolean match = true; - for (Map.Entry item: partSpec.entrySet()) { + for (Map.Entry item : partSpec.entrySet()) { if (!item.getValue().equals(part.get(item.getKey()))) { match = false; break; @@ -1148,13 +1216,13 @@ } } } - + // drop all existing partitions from the list for (Partition partition : partsToDelete) { console.printInfo("Dropping the partition " + partition.getName()); db.dropPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, dropTbl .getTableName(), partition.getValues(), true); // drop data for the - // partition + // partition work.getOutputs().add(new WriteEntity(partition)); } } @@ -1162,49 +1230,53 @@ return 0; } - /** * Check if the given serde is valid */ private void validateSerDe(String serdeName) throws HiveException { try { Deserializer d = SerDeUtils.lookupDeserializer(serdeName); - if(d != null) { + if (d != null) { System.out.println("Found class for " + serdeName); } } catch (SerDeException e) { - throw new HiveException ("Cannot validate serde: " + serdeName, e); + throw new HiveException("Cannot validate serde: " + serdeName, e); } } - - /** * Create a new table. - * - * @param db The database in question. - * @param crtTbl This is the table we're creating. + * + * @param db + * The database in question. + * @param crtTbl + * This is the table we're creating. * @return Returns 0 when execution succeeds and above 0 if it fails. - * @throws HiveException Throws this exception if an unexpected error occurs. + * @throws HiveException + * Throws this exception if an unexpected error occurs. */ private int createTable(Hive db, createTableDesc crtTbl) throws HiveException { // create the table Table tbl = new Table(crtTbl.getTableName()); StorageDescriptor tblStorDesc = tbl.getTTable().getSd(); - if (crtTbl.getBucketCols() != null) + if (crtTbl.getBucketCols() != null) { tblStorDesc.setBucketCols(crtTbl.getBucketCols()); - if (crtTbl.getSortCols() != null) + } + if (crtTbl.getSortCols() != null) { tbl.setSortCols(crtTbl.getSortCols()); - if (crtTbl.getPartCols() != null) + } + if (crtTbl.getPartCols() != null) { tbl.setPartCols(crtTbl.getPartCols()); - if (crtTbl.getNumBuckets() != -1) + } + if (crtTbl.getNumBuckets() != -1) { tblStorDesc.setNumBuckets(crtTbl.getNumBuckets()); + } if (crtTbl.getSerName() != null) { tbl.setSerializationLib(crtTbl.getSerName()); if (crtTbl.getMapProp() != null) { - Iterator> iter = crtTbl.getMapProp() - .entrySet().iterator(); + Iterator> iter = crtTbl.getMapProp().entrySet() + .iterator(); while (iter.hasNext()) { Entry m = (Entry) iter.next(); tbl.setSerdeParam(m.getKey(), m.getValue()); @@ -1220,39 +1292,48 @@ tbl.setSerdeParam(Constants.ESCAPE_CHAR, crtTbl.getFieldEscape()); } - if (crtTbl.getCollItemDelim() != null) - tbl.setSerdeParam(Constants.COLLECTION_DELIM, - crtTbl.getCollItemDelim()); - if (crtTbl.getMapKeyDelim() != null) + if (crtTbl.getCollItemDelim() != null) { + tbl + .setSerdeParam(Constants.COLLECTION_DELIM, crtTbl + .getCollItemDelim()); + } + if (crtTbl.getMapKeyDelim() != null) { tbl.setSerdeParam(Constants.MAPKEY_DELIM, crtTbl.getMapKeyDelim()); - if (crtTbl.getLineDelim() != null) + } + if (crtTbl.getLineDelim() != null) { tbl.setSerdeParam(Constants.LINE_DELIM, crtTbl.getLineDelim()); + } } /** * We use LazySimpleSerDe by default. - * - * If the user didn't specify a SerDe, and any of the columns are not simple types, - * we will have to use DynamicSerDe instead. + * + * If the user didn't specify a SerDe, and any of the columns are not simple + * types, we will have to use DynamicSerDe instead. */ if (crtTbl.getSerName() == null) { - LOG.info("Default to LazySimpleSerDe for table " + crtTbl.getTableName() ); - tbl.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName()); + LOG.info("Default to LazySimpleSerDe for table " + crtTbl.getTableName()); + tbl + .setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class + .getName()); } else { // let's validate that the serde exists validateSerDe(crtTbl.getSerName()); } - if (crtTbl.getComment() != null) + if (crtTbl.getComment() != null) { tbl.setProperty("comment", crtTbl.getComment()); - if (crtTbl.getLocation() != null) + } + if (crtTbl.getLocation() != null) { tblStorDesc.setLocation(crtTbl.getLocation()); + } tbl.setInputFormatClass(crtTbl.getInputFormat()); tbl.setOutputFormatClass(crtTbl.getOutputFormat()); - if (crtTbl.isExternal()) + if (crtTbl.isExternal()) { tbl.setProperty("EXTERNAL", "TRUE"); + } // If the sorted columns is a superset of bucketed columns, store this fact. // It can be later used to @@ -1281,8 +1362,9 @@ break; } } - if (found) + if (found) { tbl.setProperty("SORTBUCKETCOLSPREFIX", "TRUE"); + } } } @@ -1301,18 +1383,22 @@ return 0; } - /** * Create a new table like an existing table. - * - * @param db The database in question. - * @param crtTbl This is the table we're creating. + * + * @param db + * The database in question. + * @param crtTbl + * This is the table we're creating. * @return Returns 0 when execution succeeds and above 0 if it fails. - * @throws HiveException Throws this exception if an unexpected error occurs. + * @throws HiveException + * Throws this exception if an unexpected error occurs. */ - private int createTableLike(Hive db, createTableLikeDesc crtTbl) throws HiveException { + private int createTableLike(Hive db, createTableLikeDesc crtTbl) + throws HiveException { // Get the existing table - Table tbl = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, crtTbl.getLikeTableName()); + Table tbl = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, crtTbl + .getLikeTableName()); StorageDescriptor tblStorDesc = tbl.getTTable().getSd(); tbl.getTTable().setTableName(crtTbl.getTableName()); @@ -1336,14 +1422,16 @@ return 0; } - /** * Create a new view. - * - * @param db The database in question. - * @param crtView This is the view we're creating. + * + * @param db + * The database in question. + * @param crtView + * This is the view we're creating. * @return Returns 0 when execution succeeds and above 0 if it fails. - * @throws HiveException Throws this exception if an unexpected error occurs. + * @throws HiveException + * Throws this exception if an unexpected error occurs. */ private int createView(Hive db, createViewDesc crtView) throws HiveException { Table tbl = new Table(crtView.getViewName()); @@ -1368,7 +1456,8 @@ try { tbl.setOwner(conf.getUser()); } catch (IOException e) { - console.printError("Unable to get current user: " + e.getMessage(), stringifyException(e)); + console.printError("Unable to get current user: " + e.getMessage(), + stringifyException(e)); return 1; } // set create time Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (working copy) @@ -51,6 +51,7 @@ super(); } + @Override public int execute() { try { @@ -64,39 +65,44 @@ if (lfd.getIsDfsDir()) { // Just do a rename on the URIs, they belong to the same FS String mesg = "Moving data to: " + lfd.getTargetDir(); - String mesg_detail = " from " + lfd.getSourceDir(); + String mesg_detail = " from " + lfd.getSourceDir(); console.printInfo(mesg, mesg_detail); // delete the output directory if it already exists fs.delete(targetPath, true); // if source exists, rename. Otherwise, create a empty directory if (fs.exists(sourcePath)) { - if (!fs.rename(sourcePath, targetPath)) - throw new HiveException ("Unable to rename: " + sourcePath + " to: " - + targetPath); - } else - if (!fs.mkdirs(targetPath)) - throw new HiveException ("Unable to make directory: " + targetPath); + if (!fs.rename(sourcePath, targetPath)) { + throw new HiveException("Unable to rename: " + sourcePath + + " to: " + targetPath); + } + } else if (!fs.mkdirs(targetPath)) { + throw new HiveException("Unable to make directory: " + targetPath); + } } else { // This is a local file String mesg = "Copying data to local directory " + lfd.getTargetDir(); - String mesg_detail = " from " + lfd.getSourceDir(); + String mesg_detail = " from " + lfd.getSourceDir(); console.printInfo(mesg, mesg_detail); // delete the existing dest directory LocalFileSystem dstFs = FileSystem.getLocal(conf); - if(dstFs.delete(targetPath, true) || !dstFs.exists(targetPath)) { + if (dstFs.delete(targetPath, true) || !dstFs.exists(targetPath)) { console.printInfo(mesg, mesg_detail); // if source exists, rename. Otherwise, create a empty directory - if (fs.exists(sourcePath)) + if (fs.exists(sourcePath)) { fs.copyToLocalFile(sourcePath, targetPath); - else { - if (!dstFs.mkdirs(targetPath)) - throw new HiveException ("Unable to make local directory: " + targetPath); + } else { + if (!dstFs.mkdirs(targetPath)) { + throw new HiveException("Unable to make local directory: " + + targetPath); + } } } else { - throw new AccessControlException("Unable to delete the existing destination directory: " + targetPath); + throw new AccessControlException( + "Unable to delete the existing destination directory: " + + targetPath); } } } @@ -104,55 +110,69 @@ // Next we do this for tables and partitions loadTableDesc tbd = work.getLoadTableWork(); if (tbd != null) { - String mesg = "Loading data to table " + tbd.getTable().getTableName() + - ((tbd.getPartitionSpec().size() > 0) ? - " partition " + tbd.getPartitionSpec().toString() : ""); + String mesg = "Loading data to table " + + tbd.getTable().getTableName() + + ((tbd.getPartitionSpec().size() > 0) ? " partition " + + tbd.getPartitionSpec().toString() : ""); String mesg_detail = " from " + tbd.getSourceDir(); console.printInfo(mesg, mesg_detail); - Table table = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tbd.getTable().getTableName()); + Table table = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tbd + .getTable().getTableName()); if (work.getCheckFileFormat()) { // Get all files from the src directory - FileStatus [] dirs; + FileStatus[] dirs; ArrayList files; FileSystem fs; try { - fs = FileSystem.get(table.getDataLocation(),conf); + fs = FileSystem.get(table.getDataLocation(), conf); dirs = fs.globStatus(new Path(tbd.getSourceDir())); files = new ArrayList(); - for (int i=0; (dirs != null && i0) break; + // We only check one file, so exit the loop when we have at least + // one. + if (files.size() > 0) { + break; + } } } catch (IOException e) { - throw new HiveException("addFiles: filesystem error in check phase", e); + throw new HiveException( + "addFiles: filesystem error in check phase", e); } // Check if the file format of the file matches that of the table. - boolean flag = HiveFileFormatUtils.checkInputFormat(fs, conf, tbd.getTable().getInputFileFormatClass(), files); - if(!flag) - throw new HiveException("Wrong file format. Please check the file's format."); + boolean flag = HiveFileFormatUtils.checkInputFormat(fs, conf, tbd + .getTable().getInputFileFormatClass(), files); + if (!flag) { + throw new HiveException( + "Wrong file format. Please check the file's format."); + } } - if(tbd.getPartitionSpec().size() == 0) { - db.loadTable(new Path(tbd.getSourceDir()), tbd.getTable().getTableName(), tbd.getReplace(), new Path(tbd.getTmpDir())); - if (work.getOutputs() != null) + if (tbd.getPartitionSpec().size() == 0) { + db.loadTable(new Path(tbd.getSourceDir()), tbd.getTable() + .getTableName(), tbd.getReplace(), new Path(tbd.getTmpDir())); + if (work.getOutputs() != null) { work.getOutputs().add(new WriteEntity(table)); + } } else { LOG.info("Partition is: " + tbd.getPartitionSpec().toString()); - db.loadPartition(new Path(tbd.getSourceDir()), tbd.getTable().getTableName(), - tbd.getPartitionSpec(), tbd.getReplace(), new Path(tbd.getTmpDir())); - Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false); - if (work.getOutputs() != null) + db.loadPartition(new Path(tbd.getSourceDir()), tbd.getTable() + .getTableName(), tbd.getPartitionSpec(), tbd.getReplace(), + new Path(tbd.getTmpDir())); + Partition partn = db.getPartition(table, tbd.getPartitionSpec(), + false); + if (work.getOutputs() != null) { work.getOutputs().add(new WriteEntity(partn)); + } } } return 0; - } - catch (Exception e) { - console.printError("Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e)); + } catch (Exception e) { + console.printError("Failed with exception " + e.getMessage(), "\n" + + StringUtils.stringifyException(e)); return (1); } } @@ -162,21 +182,23 @@ */ public boolean isLocal() { loadTableDesc tbd = work.getLoadTableWork(); - if (tbd != null) + if (tbd != null) { return false; + } loadFileDesc lfd = work.getLoadFileWork(); if (lfd != null) { if (lfd.getIsDfsDir()) { return false; + } else { + return true; } - else - return true; } return false; } + @Override public int getType() { return StageType.MOVE; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java (working copy) @@ -26,36 +26,39 @@ import org.apache.hadoop.hive.ql.plan.api.OperatorType; /** - * Limit operator implementation - * Limits the number of rows to be passed on. + * Limit operator implementation Limits the number of rows to be passed on. **/ public class LimitOperator extends Operator implements Serializable { private static final long serialVersionUID = 1L; - + transient protected int limit; transient protected int currCount; + @Override protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); limit = conf.getLimit(); currCount = 0; } + @Override public void processOp(Object row, int tag) throws HiveException { if (currCount < limit) { forward(row, inputObjInspectors[tag]); currCount++; + } else { + setDone(true); } - else - setDone(true); } - + + @Override public String getName() { return "LIM"; } - + + @Override public int getType() { return OperatorType.LIMIT; } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java (working copy) @@ -20,13 +20,13 @@ import java.io.Serializable; -import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.parse.LoadSemanticAnalyzer; import org.apache.hadoop.hive.ql.plan.copyWork; import org.apache.hadoop.hive.ql.plan.api.StageType; -import org.apache.hadoop.hive.ql.parse.LoadSemanticAnalyzer; import org.apache.hadoop.util.StringUtils; /** @@ -39,7 +39,8 @@ public CopyTask() { super(); } - + + @Override public int execute() { FileSystem dstFs = null; Path toPath = null; @@ -47,42 +48,46 @@ Path fromPath = new Path(work.getFromPath()); toPath = new Path(work.getToPath()); - console.printInfo("Copying data from " + fromPath.toString(), " to " + toPath.toString()); + console.printInfo("Copying data from " + fromPath.toString(), " to " + + toPath.toString()); FileSystem srcFs = fromPath.getFileSystem(conf); dstFs = toPath.getFileSystem(conf); - FileStatus [] srcs = LoadSemanticAnalyzer.matchFilesOrDir(srcFs, fromPath); + FileStatus[] srcs = LoadSemanticAnalyzer.matchFilesOrDir(srcFs, fromPath); - if(srcs == null || srcs.length == 0) { + if (srcs == null || srcs.length == 0) { console.printError("No files matching path: " + fromPath.toString()); return 3; } if (!dstFs.mkdirs(toPath)) { - console.printError("Cannot make target directory: " + toPath.toString()); + console + .printError("Cannot make target directory: " + toPath.toString()); return 2; - } + } - for(FileStatus oneSrc: srcs) { + for (FileStatus oneSrc : srcs) { LOG.debug("Copying file: " + oneSrc.getPath().toString()); - if(!FileUtil.copy(srcFs, oneSrc.getPath(), dstFs, toPath, - false, // delete source - true, // overwrite destination - conf)) { - console.printError("Failed to copy: '"+ oneSrc.getPath().toString() + - "to: '" + toPath.toString() + "'"); + if (!FileUtil.copy(srcFs, oneSrc.getPath(), dstFs, toPath, false, // delete + // source + true, // overwrite destination + conf)) { + console.printError("Failed to copy: '" + oneSrc.getPath().toString() + + "to: '" + toPath.toString() + "'"); return 1; } } return 0; } catch (Exception e) { - console.printError("Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e)); + console.printError("Failed with exception " + e.getMessage(), "\n" + + StringUtils.stringifyException(e)); return (1); } } - + + @Override public int getType() { return StageType.COPY; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExecReducer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecReducer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecReducer.java (working copy) @@ -18,23 +18,20 @@ package org.apache.hadoop.hive.ql.exec; -import java.io.*; +import java.io.IOException; import java.lang.management.ManagementFactory; import java.lang.management.MemoryMXBean; -import java.lang.management.MemoryUsage; import java.net.URLClassLoader; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; -import org.apache.hadoop.mapred.*; -import org.apache.hadoop.util.ReflectionUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - - +import org.apache.hadoop.hive.ql.exec.ExecMapper.reportStats; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.mapredWork; import org.apache.hadoop.hive.ql.plan.tableDesc; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.exec.ExecMapper.reportStats; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; @@ -43,12 +40,17 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.MapReduceBase; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reducer; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.util.ReflectionUtils; public class ExecReducer extends MapReduceBase implements Reducer { private JobConf jc; - private OutputCollector oc; + private OutputCollector oc; private Operator reducer; private Reporter rp; private boolean abort = false; @@ -56,101 +58,108 @@ private long cntr = 0; private long nextCntr = 1; - private static String [] fieldNames; + private static String[] fieldNames; public static final Log l4j = LogFactory.getLog("ExecReducer"); - + // used to log memory usage periodically private MemoryMXBean memoryMXBean; - + // TODO: move to DynamicSerDe when it's ready private Deserializer inputKeyDeserializer; - // Input value serde needs to be an array to support different SerDe + // Input value serde needs to be an array to support different SerDe // for different tags - private SerDe[] inputValueDeserializer = new SerDe[Byte.MAX_VALUE]; + private final SerDe[] inputValueDeserializer = new SerDe[Byte.MAX_VALUE]; static { - ArrayList fieldNameArray = new ArrayList (); - for(Utilities.ReduceField r: Utilities.ReduceField.values()) { + ArrayList fieldNameArray = new ArrayList(); + for (Utilities.ReduceField r : Utilities.ReduceField.values()) { fieldNameArray.add(r.toString()); } - fieldNames = fieldNameArray.toArray(new String [0]); + fieldNames = fieldNameArray.toArray(new String[0]); } tableDesc keyTableDesc; tableDesc[] valueTableDesc; - + + @Override public void configure(JobConf job) { ObjectInspector[] rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE]; ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE]; ObjectInspector keyObjectInspector; - // Allocate the bean at the beginning - + // Allocate the bean at the beginning - memoryMXBean = ManagementFactory.getMemoryMXBean(); l4j.info("maximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax()); - + try { - l4j.info("conf classpath = " - + Arrays.asList(((URLClassLoader)job.getClassLoader()).getURLs())); - l4j.info("thread classpath = " - + Arrays.asList(((URLClassLoader)Thread.currentThread().getContextClassLoader()).getURLs())); + l4j.info("conf classpath = " + + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs())); + l4j.info("thread classpath = " + + Arrays.asList(((URLClassLoader) Thread.currentThread() + .getContextClassLoader()).getURLs())); } catch (Exception e) { l4j.info("cannot get classpath: " + e.getMessage()); } jc = job; mapredWork gWork = Utilities.getMapRedWork(job); reducer = gWork.getReducer(); - reducer.setParentOperators(null); // clear out any parents as reducer is the root + reducer.setParentOperators(null); // clear out any parents as reducer is the + // root isTagged = gWork.getNeedsTagging(); try { keyTableDesc = gWork.getKeyDesc(); - inputKeyDeserializer = (SerDe)ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null); + inputKeyDeserializer = (SerDe) ReflectionUtils.newInstance(keyTableDesc + .getDeserializerClass(), null); inputKeyDeserializer.initialize(null, keyTableDesc.getProperties()); keyObjectInspector = inputKeyDeserializer.getObjectInspector(); - valueTableDesc = new tableDesc[gWork.getTagToValueDesc().size()]; - for(int tag=0; tag ois = new ArrayList(); ois.add(keyObjectInspector); ois.add(valueObjectInspector[tag]); ois.add(PrimitiveObjectInspectorFactory.writableByteObjectInspector); - rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector( - Arrays.asList(fieldNames), ois); + rowObjectInspector[tag] = ObjectInspectorFactory + .getStandardStructObjectInspector(Arrays.asList(fieldNames), ois); } } catch (Exception e) { throw new RuntimeException(e); } - - //initialize reduce operator tree + + // initialize reduce operator tree try { l4j.info(reducer.dump(0)); reducer.initialize(jc, rowObjectInspector); } catch (Throwable e) { abort = true; if (e instanceof OutOfMemoryError) { - // Don't create a new object if we are already out of memory - throw (OutOfMemoryError) e; + // Don't create a new object if we are already out of memory + throw (OutOfMemoryError) e; } else { - throw new RuntimeException ("Reduce operator initialization failed", e); + throw new RuntimeException("Reduce operator initialization failed", e); } } } private Object keyObject; - private Object[] valueObject = new Object[Byte.MAX_VALUE]; - + private final Object[] valueObject = new Object[Byte.MAX_VALUE]; + private BytesWritable groupKey; - + ArrayList row = new ArrayList(3); ByteWritable tag = new ByteWritable(); - public void reduce(Object key, Iterator values, - OutputCollector output, - Reporter reporter) throws IOException { - if(oc == null) { + public void reduce(Object key, Iterator values, OutputCollector output, + Reporter reporter) throws IOException { + + if (oc == null) { // propagete reporter and output collector to all operators oc = output; rp = reporter; @@ -159,34 +168,35 @@ } try { - BytesWritable keyWritable = (BytesWritable)key; - tag.set((byte)0); + BytesWritable keyWritable = (BytesWritable) key; + tag.set((byte) 0); if (isTagged) { // remove the tag int size = keyWritable.getSize() - 1; - tag.set(keyWritable.get()[size]); + tag.set(keyWritable.get()[size]); keyWritable.setSize(size); } - + if (!keyWritable.equals(groupKey)) { // If a operator wants to do some work at the beginning of a group - if (groupKey == null) { //the first group + if (groupKey == null) { // the first group groupKey = new BytesWritable(); } else { // If a operator wants to do some work at the end of a group l4j.trace("End Group"); reducer.endGroup(); } - + try { keyObject = inputKeyDeserializer.deserialize(keyWritable); } catch (Exception e) { - throw new HiveException("Unable to deserialize reduce input key from " + - Utilities.formatBinaryString(keyWritable.get(), 0, keyWritable.getSize()) - + " with properties " + keyTableDesc.getProperties(), - e); + throw new HiveException( + "Unable to deserialize reduce input key from " + + Utilities.formatBinaryString(keyWritable.get(), 0, + keyWritable.getSize()) + " with properties " + + keyTableDesc.getProperties(), e); } - + groupKey.set(keyWritable.get(), 0, keyWritable.getSize()); l4j.trace("Start Group"); reducer.startGroup(); @@ -195,15 +205,18 @@ // System.err.print(keyObject.toString()); while (values.hasNext()) { BytesWritable valueWritable = (BytesWritable) values.next(); - //System.err.print(who.getHo().toString()); + // System.err.print(who.getHo().toString()); try { - valueObject[tag.get()] = inputValueDeserializer[tag.get()].deserialize(valueWritable); + valueObject[tag.get()] = inputValueDeserializer[tag.get()] + .deserialize(valueWritable); } catch (SerDeException e) { - throw new HiveException("Unable to deserialize reduce input value (tag=" + tag.get() - + ") from " + - Utilities.formatBinaryString(valueWritable.get(), 0, valueWritable.getSize()) - + " with properties " + valueTableDesc[tag.get()].getProperties(), - e); + throw new HiveException( + "Unable to deserialize reduce input value (tag=" + + tag.get() + + ") from " + + Utilities.formatBinaryString(valueWritable.get(), 0, + valueWritable.getSize()) + " with properties " + + valueTableDesc[tag.get()].getProperties(), e); } row.clear(); row.add(keyObject); @@ -214,7 +227,8 @@ cntr++; if (cntr == nextCntr) { long used_memory = memoryMXBean.getHeapMemoryUsage().getUsed(); - l4j.info("ExecReducer: processing " + cntr + " rows: used memory = " + used_memory); + l4j.info("ExecReducer: processing " + cntr + + " rows: used memory = " + used_memory); nextCntr = getNextCntr(cntr); } } @@ -224,27 +238,30 @@ } catch (Throwable e) { abort = true; if (e instanceof OutOfMemoryError) { - // Don't create a new object if we are already out of memory - throw (OutOfMemoryError) e; + // Don't create a new object if we are already out of memory + throw (OutOfMemoryError) e; } else { - throw new IOException (e); + throw new IOException(e); } } } private long getNextCntr(long cntr) { - // A very simple counter to keep track of number of rows processed by the reducer. It dumps + // A very simple counter to keep track of number of rows processed by the + // reducer. It dumps // every 1 million times, and quickly before that - if (cntr >= 1000000) + if (cntr >= 1000000) { return cntr + 1000000; - + } + return 10 * cntr; } + @Override public void close() { // No row was processed - if(oc == null) { + if (oc == null) { l4j.trace("Close called no row"); } @@ -255,18 +272,20 @@ reducer.endGroup(); } if (l4j.isInfoEnabled()) { - l4j.info("ExecReducer: processed " + cntr + " rows: used memory = " + memoryMXBean.getHeapMemoryUsage().getUsed()); + l4j.info("ExecReducer: processed " + cntr + " rows: used memory = " + + memoryMXBean.getHeapMemoryUsage().getUsed()); } - + reducer.close(abort); - reportStats rps = new reportStats (rp); + reportStats rps = new reportStats(rp); reducer.preorderMap(rps); return; } catch (Exception e) { - if(!abort) { + if (!abort) { // signal new failure to map-reduce l4j.error("Hit error while closing operators - failing tree"); - throw new RuntimeException ("Error while closing operators: " + e.getMessage(), e); + throw new RuntimeException("Error while closing operators: " + + e.getMessage(), e); } } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/AutoProgressor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/AutoProgressor.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/AutoProgressor.java (working copy) @@ -26,55 +26,61 @@ import org.apache.hadoop.mapred.Reporter; /** - * AutoProgressor periodically sends updates to the job tracker so that - * it doesn't consider this task attempt dead if there is a long period of + * AutoProgressor periodically sends updates to the job tracker so that it + * doesn't consider this task attempt dead if there is a long period of * inactivity. */ public class AutoProgressor { protected Log LOG = LogFactory.getLog(this.getClass().getName()); - // Timer that reports every 5 minutes to the jobtracker. This ensures that - // even if the operator returning rows for greater than that - // duration, a progress report is sent to the tracker so that the tracker + // Timer that reports every 5 minutes to the jobtracker. This ensures that + // even if the operator returning rows for greater than that + // duration, a progress report is sent to the tracker so that the tracker // does not think that the job is dead. Timer rpTimer = null; // Name of the class to report for String logClassName = null; int notificationInterval; Reporter reporter; - + class ReporterTask extends TimerTask { - + /** * Reporter to report progress to the jobtracker. */ private Reporter rp; - + /** * Constructor. */ public ReporterTask(Reporter rp) { - if (rp != null) + if (rp != null) { this.rp = rp; + } } - + @Override public void run() { if (rp != null) { - LOG.info("ReporterTask calling reporter.progress() for " + logClassName); + LOG + .info("ReporterTask calling reporter.progress() for " + + logClassName); rp.progress(); } } } - - AutoProgressor(String logClassName, Reporter reporter, int notificationInterval) { + + AutoProgressor(String logClassName, Reporter reporter, + int notificationInterval) { this.logClassName = logClassName; this.reporter = reporter; } public void go() { - LOG.info("Running ReporterTask every " + notificationInterval + " miliseconds."); + LOG.info("Running ReporterTask every " + notificationInterval + + " miliseconds."); rpTimer = new Timer(true); - rpTimer.scheduleAtFixedRate(new ReporterTask(reporter), 0, notificationInterval); + rpTimer.scheduleAtFixedRate(new ReporterTask(reporter), 0, + notificationInterval); } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverSkewJoin.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverSkewJoin.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverSkewJoin.java (working copy) @@ -31,7 +31,8 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Task; -public class ConditionalResolverSkewJoin implements ConditionalResolver, Serializable { +public class ConditionalResolverSkewJoin implements ConditionalResolver, + Serializable { private static final long serialVersionUID = 1L; public static class ConditionalResolverSkewJoinCtx implements Serializable { @@ -41,7 +42,7 @@ // this map stores mapping from "big key dir" to its corresponding mapjoin // task. Map> dirToTaskMap; - + public ConditionalResolverSkewJoinCtx( Map> dirToTaskMap) { super(); @@ -57,31 +58,36 @@ this.dirToTaskMap = dirToTaskMap; } } - - public ConditionalResolverSkewJoin(){ + + public ConditionalResolverSkewJoin() { } - + @Override - public List> getTasks(HiveConf conf, Object objCtx) { - ConditionalResolverSkewJoinCtx ctx = (ConditionalResolverSkewJoinCtx)objCtx; + public List> getTasks(HiveConf conf, + Object objCtx) { + ConditionalResolverSkewJoinCtx ctx = (ConditionalResolverSkewJoinCtx) objCtx; List> resTsks = new ArrayList>(); - Map> dirToTaskMap = ctx.getDirToTaskMap(); - Iterator>> bigKeysPathsIter = dirToTaskMap.entrySet().iterator(); + Map> dirToTaskMap = ctx + .getDirToTaskMap(); + Iterator>> bigKeysPathsIter = dirToTaskMap + .entrySet().iterator(); try { - while(bigKeysPathsIter.hasNext()) { - Entry> entry = bigKeysPathsIter.next(); + while (bigKeysPathsIter.hasNext()) { + Entry> entry = bigKeysPathsIter + .next(); String path = entry.getKey(); Path dirPath = new Path(path); FileSystem inpFs = dirPath.getFileSystem(conf); FileStatus[] fstatus = inpFs.listStatus(dirPath); - if (fstatus.length > 0) + if (fstatus.length > 0) { resTsks.add(entry.getValue()); + } } - }catch (IOException e) { + } catch (IOException e) { e.printStackTrace(); } return resTsks; } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/explain.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/explain.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/explain.java (working copy) @@ -24,5 +24,6 @@ @Retention(RetentionPolicy.RUNTIME) public @interface explain { String displayName() default ""; + boolean normalExplain() default true; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeColumnDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeColumnDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeColumnDesc.java (working copy) @@ -37,77 +37,90 @@ * The alias of the table. */ private String tabAlias; - + /** * Is the column a partitioned column. */ private boolean isPartitionCol; - - public exprNodeColumnDesc() {} - public exprNodeColumnDesc(TypeInfo typeInfo, String column, - String tabAlias, boolean isPartitionCol) { + + public exprNodeColumnDesc() { + } + + public exprNodeColumnDesc(TypeInfo typeInfo, String column, String tabAlias, + boolean isPartitionCol) { super(typeInfo); this.column = column; this.tabAlias = tabAlias; this.isPartitionCol = isPartitionCol; } + public exprNodeColumnDesc(Class c, String column, String tabAlias, - boolean isPartitionCol) { + boolean isPartitionCol) { super(TypeInfoFactory.getPrimitiveTypeInfoFromJavaPrimitive(c)); this.column = column; this.tabAlias = tabAlias; this.isPartitionCol = isPartitionCol; } + public String getColumn() { - return this.column; + return column; } + public void setColumn(String column) { this.column = column; } public String getTabAlias() { - return this.tabAlias; + return tabAlias; } + public void setTabAlias(String tabAlias) { this.tabAlias = tabAlias; } public boolean getIsParititonCol() { - return this.isPartitionCol; + return isPartitionCol; } + public void setIsPartitionCol(boolean isPartitionCol) { this.isPartitionCol = isPartitionCol; } + @Override public String toString() { return "Column[" + column + "]"; } - - @explain(displayName="expr") + + @explain(displayName = "expr") @Override public String getExprString() { return getColumn(); } + @Override public List getCols() { - List lst = new ArrayList(); - lst.add(column); - return lst; + List lst = new ArrayList(); + lst.add(column); + return lst; } + @Override public exprNodeDesc clone() { - return new exprNodeColumnDesc(this.typeInfo, this.column, - this.tabAlias, this.isPartitionCol); + return new exprNodeColumnDesc(typeInfo, column, tabAlias, isPartitionCol); } + @Override public boolean isSame(Object o) { - if (!(o instanceof exprNodeColumnDesc)) + if (!(o instanceof exprNodeColumnDesc)) { return false; - exprNodeColumnDesc dest = (exprNodeColumnDesc)o; - if (!column.equals(dest.getColumn())) + } + exprNodeColumnDesc dest = (exprNodeColumnDesc) o; + if (!column.equals(dest.getColumn())) { return false; - if (!typeInfo.equals(dest.getTypeInfo())) + } + if (!typeInfo.equals(dest.getTypeInfo())) { return false; - return true; + } + return true; } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/udtfDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/udtfDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/udtfDesc.java (working copy) @@ -19,36 +19,38 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; /** - * All member variables should have a setters and getters of the form - * get and set or else they won't be recreated - * properly at run time. + * All member variables should have a setters and getters of the form get and set or else they won't be recreated properly at run + * time. * */ -@explain(displayName="UDTF Operator") +@explain(displayName = "UDTF Operator") public class udtfDesc implements Serializable { private static final long serialVersionUID = 1L; private GenericUDTF genericUDTF; - - public udtfDesc() { } + + public udtfDesc() { + } + public udtfDesc(final GenericUDTF genericUDTF) { this.genericUDTF = genericUDTF; } public GenericUDTF getGenericUDTF() { - return this.genericUDTF; + return genericUDTF; } + public void setGenericUDTF(final GenericUDTF genericUDTF) { - this.genericUDTF=genericUDTF; + this.genericUDTF = genericUDTF; } - @explain(displayName="function name") + + @explain(displayName = "function name") public String getUDTFName() { - return this.genericUDTF.toString(); + return genericUDTF.toString(); } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ddlDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ddlDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ddlDesc.java (working copy) @@ -20,7 +20,6 @@ import java.io.Serializable; -public abstract class ddlDesc implements Serializable -{ - private static final long serialVersionUID = 1L; +public abstract class ddlDesc implements Serializable { + private static final long serialVersionUID = 1L; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/fileSinkDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/fileSinkDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/fileSinkDesc.java (working copy) @@ -20,7 +20,7 @@ import java.io.Serializable; -@explain(displayName="File Output Operator") +@explain(displayName = "File Output Operator") public class fileSinkDesc implements Serializable { private static final long serialVersionUID = 1L; private String dirName; @@ -30,47 +30,46 @@ private String compressCodec; private String compressType; + public fileSinkDesc() { + } - public fileSinkDesc() { } - public fileSinkDesc( - final String dirName, - final tableDesc tableInfo, - final boolean compressed, int destTableId) { + public fileSinkDesc(final String dirName, final tableDesc tableInfo, + final boolean compressed, int destTableId) { this.dirName = dirName; this.tableInfo = tableInfo; this.compressed = compressed; this.destTableId = destTableId; } - - public fileSinkDesc( - final String dirName, - final tableDesc tableInfo, + + public fileSinkDesc(final String dirName, final tableDesc tableInfo, final boolean compressed) { - this.dirName = dirName; - this.tableInfo = tableInfo; - this.compressed = compressed; - this.destTableId = 0; - } - - @explain(displayName="directory", normalExplain=false) + this.dirName = dirName; + this.tableInfo = tableInfo; + this.compressed = compressed; + destTableId = 0; + } + + @explain(displayName = "directory", normalExplain = false) public String getDirName() { - return this.dirName; + return dirName; } + public void setDirName(final String dirName) { this.dirName = dirName; } - - @explain(displayName="table") + + @explain(displayName = "table") public tableDesc getTableInfo() { - return this.tableInfo; + return tableInfo; } + public void setTableInfo(final tableDesc tableInfo) { this.tableInfo = tableInfo; } - @explain(displayName="compressed") + @explain(displayName = "compressed") public boolean getCompressed() { return compressed; } @@ -78,25 +77,29 @@ public void setCompressed(boolean compressed) { this.compressed = compressed; } - - @explain(displayName="GlobalTableId") + + @explain(displayName = "GlobalTableId") public int getDestTableId() { return destTableId; } - + public void setDestTableId(int destTableId) { this.destTableId = destTableId; } + public String getCompressCodec() { return compressCodec; } + public void setCompressCodec(String intermediateCompressorCodec) { - this.compressCodec = intermediateCompressorCodec; + compressCodec = intermediateCompressorCodec; } + public String getCompressType() { return compressType; } + public void setCompressType(String intermediateCompressType) { - this.compressType = intermediateCompressType; + compressType = intermediateCompressType; } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/joinCond.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/joinCond.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/joinCond.java (working copy) @@ -19,8 +19,6 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; -import java.util.Vector; -import org.apache.hadoop.hive.ql.parse.joinType; /** * Join conditions Descriptor implementation. @@ -33,58 +31,60 @@ private int type; private boolean preserved; - public joinCond() {} + public joinCond() { + } public joinCond(int left, int right, int type) { - this.left = left; + this.left = left; this.right = right; - this.type = type; + this.type = type; } public joinCond(org.apache.hadoop.hive.ql.parse.joinCond condn) { - this.left = condn.getLeft(); - this.right = condn.getRight(); - this.preserved = condn.getPreserved(); - switch ( condn.getJoinType() ) { + left = condn.getLeft(); + right = condn.getRight(); + preserved = condn.getPreserved(); + switch (condn.getJoinType()) { case INNER: - this.type = joinDesc.INNER_JOIN; + type = joinDesc.INNER_JOIN; break; case LEFTOUTER: - this.type = joinDesc.LEFT_OUTER_JOIN; + type = joinDesc.LEFT_OUTER_JOIN; break; case RIGHTOUTER: - this.type = joinDesc.RIGHT_OUTER_JOIN; + type = joinDesc.RIGHT_OUTER_JOIN; break; case FULLOUTER: - this.type = joinDesc.FULL_OUTER_JOIN; + type = joinDesc.FULL_OUTER_JOIN; break; case UNIQUE: - this.type = joinDesc.UNIQUE_JOIN; + type = joinDesc.UNIQUE_JOIN; break; case LEFTSEMI: - this.type = joinDesc.LEFT_SEMI_JOIN; + type = joinDesc.LEFT_SEMI_JOIN; break; default: assert false; } } - + /** * @return true if table is preserved, false otherwise */ public boolean getPreserved() { - return this.preserved; + return preserved; } - + /** - * @param preserved if table is preserved, false otherwise + * @param preserved + * if table is preserved, false otherwise */ public void setPreserved(final boolean preserved) { this.preserved = preserved; } - + public int getLeft() { - return this.left; + return left; } public void setLeft(final int left) { @@ -92,7 +92,7 @@ } public int getRight() { - return this.right; + return right; } public void setRight(final int right) { @@ -100,18 +100,18 @@ } public int getType() { - return this.type; + return type; } public void setType(final int type) { this.type = type; } - + @explain public String getJoinCondString() { StringBuilder sb = new StringBuilder(); - - switch(type) { + + switch (type) { case joinDesc.INNER_JOIN: sb.append("Inner Join "); break; @@ -134,11 +134,11 @@ sb.append("Unknow Join "); break; } - + sb.append(left); sb.append(" to "); sb.append(right); - + return sb.toString(); } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalWork.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalWork.java (working copy) @@ -21,14 +21,14 @@ import java.io.Serializable; import java.util.List; -@explain(displayName="Conditional Operator") +@explain(displayName = "Conditional Operator") public class ConditionalWork implements Serializable { private static final long serialVersionUID = 1L; List listWorks; - + public ConditionalWork() { } - + public ConditionalWork(final List listWorks) { this.listWorks = listWorks; } @@ -41,7 +41,8 @@ } /** - * @param listWorks the listWorks to set + * @param listWorks + * the listWorks to set */ public void setListWorks(List listWorks) { this.listWorks = listWorks; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java (working copy) @@ -18,24 +18,24 @@ package org.apache.hadoop.hive.ql.plan; +import java.io.Serializable; +import java.util.Set; + import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; -import java.io.Serializable; -import java.util.Set; - public class DDLWork implements Serializable { private static final long serialVersionUID = 1L; - private createTableDesc createTblDesc; - private createTableLikeDesc createTblLikeDesc; - private createViewDesc createVwDesc; - private dropTableDesc dropTblDesc; - private alterTableDesc alterTblDesc; - private showTablesDesc showTblsDesc; - private showFunctionsDesc showFuncsDesc; - private descFunctionDesc descFunctionDesc; - private showPartitionsDesc showPartsDesc; - private descTableDesc descTblDesc; + private createTableDesc createTblDesc; + private createTableLikeDesc createTblLikeDesc; + private createViewDesc createVwDesc; + private dropTableDesc dropTblDesc; + private alterTableDesc alterTblDesc; + private showTablesDesc showTblsDesc; + private showFunctionsDesc showFuncsDesc; + private descFunctionDesc descFunctionDesc; + private showPartitionsDesc showPartsDesc; + private descTableDesc descTblDesc; private AddPartitionDesc addPartitionDesc; private MsckDesc msckDesc; private showTableStatusDesc showTblStatusDesc; @@ -58,44 +58,54 @@ } /** - * @param alterTblDesc alter table descriptor + * @param alterTblDesc + * alter table descriptor */ - public DDLWork(Set inputs, Set outputs, alterTableDesc alterTblDesc) { + public DDLWork(Set inputs, Set outputs, + alterTableDesc alterTblDesc) { this(inputs, outputs); this.alterTblDesc = alterTblDesc; } /** - * @param createTblDesc create table descriptor + * @param createTblDesc + * create table descriptor */ - public DDLWork(Set inputs, Set outputs, createTableDesc createTblDesc) { + public DDLWork(Set inputs, Set outputs, + createTableDesc createTblDesc) { this(inputs, outputs); this.createTblDesc = createTblDesc; } /** - * @param createTblLikeDesc create table like descriptor + * @param createTblLikeDesc + * create table like descriptor */ - public DDLWork(Set inputs, Set outputs, createTableLikeDesc createTblLikeDesc) { + public DDLWork(Set inputs, Set outputs, + createTableLikeDesc createTblLikeDesc) { this(inputs, outputs); this.createTblLikeDesc = createTblLikeDesc; } /** - * @param createVwDesc create view descriptor + * @param createVwDesc + * create view descriptor */ - public DDLWork(Set inputs, Set outputs, createViewDesc createVwDesc) { + public DDLWork(Set inputs, Set outputs, + createViewDesc createVwDesc) { this(inputs, outputs); this.createVwDesc = createVwDesc; } /** - * @param dropTblDesc drop table descriptor + * @param dropTblDesc + * drop table descriptor */ - public DDLWork(Set inputs, Set outputs, dropTableDesc dropTblDesc) { + public DDLWork(Set inputs, Set outputs, + dropTableDesc dropTblDesc) { this(inputs, outputs); this.dropTblDesc = dropTblDesc; @@ -104,7 +114,8 @@ /** * @param descTblDesc */ - public DDLWork(Set inputs, Set outputs, descTableDesc descTblDesc) { + public DDLWork(Set inputs, Set outputs, + descTableDesc descTblDesc) { this(inputs, outputs); this.descTblDesc = descTblDesc; @@ -113,7 +124,8 @@ /** * @param showTblsDesc */ - public DDLWork(Set inputs, Set outputs, showTablesDesc showTblsDesc) { + public DDLWork(Set inputs, Set outputs, + showTablesDesc showTblsDesc) { this(inputs, outputs); this.showTblsDesc = showTblsDesc; @@ -122,7 +134,8 @@ /** * @param showFuncsDesc */ - public DDLWork(Set inputs, Set outputs, showFunctionsDesc showFuncsDesc) { + public DDLWork(Set inputs, Set outputs, + showFunctionsDesc showFuncsDesc) { this(inputs, outputs); this.showFuncsDesc = showFuncsDesc; @@ -131,41 +144,47 @@ /** * @param descFuncDesc */ - public DDLWork(Set inputs, Set outputs, descFunctionDesc descFuncDesc) { + public DDLWork(Set inputs, Set outputs, + descFunctionDesc descFuncDesc) { this(inputs, outputs); - this.descFunctionDesc = descFuncDesc; + descFunctionDesc = descFuncDesc; } /** * @param showPartsDesc */ - public DDLWork(Set inputs, Set outputs, showPartitionsDesc showPartsDesc) { + public DDLWork(Set inputs, Set outputs, + showPartitionsDesc showPartsDesc) { this(inputs, outputs); this.showPartsDesc = showPartsDesc; } /** - * @param addPartitionDesc information about the partitions - * we want to add. + * @param addPartitionDesc + * information about the partitions we want to add. */ - public DDLWork(Set inputs, Set outputs, AddPartitionDesc addPartitionDesc) { + public DDLWork(Set inputs, Set outputs, + AddPartitionDesc addPartitionDesc) { this(inputs, outputs); this.addPartitionDesc = addPartitionDesc; } - public DDLWork(Set inputs, Set outputs, MsckDesc checkDesc) { + public DDLWork(Set inputs, Set outputs, + MsckDesc checkDesc) { this(inputs, outputs); - this.msckDesc = checkDesc; + msckDesc = checkDesc; } /** - * @param showTblStatusDesc show table status descriptor + * @param showTblStatusDesc + * show table status descriptor */ - public DDLWork(Set inputs, Set outputs, showTableStatusDesc showTblStatusDesc) { + public DDLWork(Set inputs, Set outputs, + showTableStatusDesc showTblStatusDesc) { this(inputs, outputs); this.showTblStatusDesc = showTblStatusDesc; @@ -174,13 +193,14 @@ /** * @return the createTblDesc */ - @explain(displayName="Create Table Operator") + @explain(displayName = "Create Table Operator") public createTableDesc getCreateTblDesc() { return createTblDesc; } /** - * @param createTblDesc the createTblDesc to set + * @param createTblDesc + * the createTblDesc to set */ public void setCreateTblDesc(createTableDesc createTblDesc) { this.createTblDesc = createTblDesc; @@ -189,29 +209,30 @@ /** * @return the createTblDesc */ - @explain(displayName="Create Table Operator") + @explain(displayName = "Create Table Operator") public createTableLikeDesc getCreateTblLikeDesc() { return createTblLikeDesc; } /** - * @param createTblLikeDesc the createTblDesc to set + * @param createTblLikeDesc + * the createTblDesc to set */ public void setCreateTblLikeDesc(createTableLikeDesc createTblLikeDesc) { this.createTblLikeDesc = createTblLikeDesc; } - /** * @return the createTblDesc */ - @explain(displayName="Create View Operator") + @explain(displayName = "Create View Operator") public createViewDesc getCreateViewDesc() { return createVwDesc; } /** - * @param createVwDesc the createViewDesc to set + * @param createVwDesc + * the createViewDesc to set */ public void setCreateViewDesc(createViewDesc createVwDesc) { this.createVwDesc = createVwDesc; @@ -220,13 +241,14 @@ /** * @return the dropTblDesc */ - @explain(displayName="Drop Table Operator") + @explain(displayName = "Drop Table Operator") public dropTableDesc getDropTblDesc() { return dropTblDesc; } /** - * @param dropTblDesc the dropTblDesc to set + * @param dropTblDesc + * the dropTblDesc to set */ public void setDropTblDesc(dropTableDesc dropTblDesc) { this.dropTblDesc = dropTblDesc; @@ -235,13 +257,14 @@ /** * @return the alterTblDesc */ - @explain(displayName="Alter Table Operator") + @explain(displayName = "Alter Table Operator") public alterTableDesc getAlterTblDesc() { return alterTblDesc; } /** - * @param alterTblDesc the alterTblDesc to set + * @param alterTblDesc + * the alterTblDesc to set */ public void setAlterTblDesc(alterTableDesc alterTblDesc) { this.alterTblDesc = alterTblDesc; @@ -250,13 +273,14 @@ /** * @return the showTblsDesc */ - @explain(displayName="Show Table Operator") + @explain(displayName = "Show Table Operator") public showTablesDesc getShowTblsDesc() { return showTblsDesc; } /** - * @param showTblsDesc the showTblsDesc to set + * @param showTblsDesc + * the showTblsDesc to set */ public void setShowTblsDesc(showTablesDesc showTblsDesc) { this.showTblsDesc = showTblsDesc; @@ -265,7 +289,7 @@ /** * @return the showFuncsDesc */ - @explain(displayName="Show Function Operator") + @explain(displayName = "Show Function Operator") public showFunctionsDesc getShowFuncsDesc() { return showFuncsDesc; } @@ -273,35 +297,38 @@ /** * @return the descFuncDesc */ - @explain(displayName="Show Function Operator") + @explain(displayName = "Show Function Operator") public descFunctionDesc getDescFunctionDesc() { return descFunctionDesc; } /** - * @param showFuncsDesc the showFuncsDesc to set + * @param showFuncsDesc + * the showFuncsDesc to set */ public void setShowFuncsDesc(showFunctionsDesc showFuncsDesc) { this.showFuncsDesc = showFuncsDesc; } /** - * @param descFuncDesc the showFuncsDesc to set + * @param descFuncDesc + * the showFuncsDesc to set */ public void setDescFuncDesc(descFunctionDesc descFuncDesc) { - this.descFunctionDesc = descFuncDesc; + descFunctionDesc = descFuncDesc; } /** * @return the showPartsDesc */ - @explain(displayName="Show Partitions Operator") + @explain(displayName = "Show Partitions Operator") public showPartitionsDesc getShowPartsDesc() { return showPartsDesc; } /** - * @param showPartsDesc the showPartsDesc to set + * @param showPartsDesc + * the showPartsDesc to set */ public void setShowPartsDesc(showPartitionsDesc showPartsDesc) { this.showPartsDesc = showPartsDesc; @@ -310,29 +337,29 @@ /** * @return the descTblDesc */ - @explain(displayName="Describe Table Operator") + @explain(displayName = "Describe Table Operator") public descTableDesc getDescTblDesc() { return descTblDesc; } /** - * @param descTblDesc the descTblDesc to set + * @param descTblDesc + * the descTblDesc to set */ public void setDescTblDesc(descTableDesc descTblDesc) { this.descTblDesc = descTblDesc; } /** - * @return information about the partitions - * we want to add. + * @return information about the partitions we want to add. */ public AddPartitionDesc getAddPartitionDesc() { return addPartitionDesc; } /** - * @param addPartitionDesc information about the partitions - * we want to add. + * @param addPartitionDesc + * information about the partitions we want to add. */ public void setAddPartitionDesc(AddPartitionDesc addPartitionDesc) { this.addPartitionDesc = addPartitionDesc; @@ -346,7 +373,8 @@ } /** - * @param msckDesc metastore check description + * @param msckDesc + * metastore check description */ public void setMsckDesc(MsckDesc msckDesc) { this.msckDesc = msckDesc; @@ -360,7 +388,8 @@ } /** - * @param showTblStatusDesc show table descriptor + * @param showTblStatusDesc + * show table descriptor */ public void setShowTblStatusDesc(showTableStatusDesc showTblStatusDesc) { this.showTblStatusDesc = showTblStatusDesc; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/showFunctionsDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/showFunctionsDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/showFunctionsDesc.java (working copy) @@ -19,14 +19,14 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; + import org.apache.hadoop.fs.Path; -@explain(displayName="Show Functions") -public class showFunctionsDesc extends ddlDesc implements Serializable -{ +@explain(displayName = "Show Functions") +public class showFunctionsDesc extends ddlDesc implements Serializable { private static final long serialVersionUID = 1L; - String pattern; - Path resFile; + String pattern; + Path resFile; /** * table name for the result of show tables */ @@ -43,7 +43,7 @@ public String getSchema() { return schema; } - + /** * @param resFile */ @@ -53,7 +53,8 @@ } /** - * @param pattern names of tables to show + * @param pattern + * names of tables to show */ public showFunctionsDesc(Path resFile, String pattern) { this.resFile = resFile; @@ -63,13 +64,14 @@ /** * @return the pattern */ - @explain(displayName="pattern") + @explain(displayName = "pattern") public String getPattern() { return pattern; } /** - * @param pattern the pattern to set + * @param pattern + * the pattern to set */ public void setPattern(String pattern) { this.pattern = pattern; @@ -82,12 +84,14 @@ return resFile; } - @explain(displayName="result file", normalExplain=false) + @explain(displayName = "result file", normalExplain = false) public String getResFileString() { return getResFile().getName(); } + /** - * @param resFile the resFile to set + * @param resFile + * the resFile to set */ public void setResFile(Path resFile) { this.resFile = resFile; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/extractDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/extractDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/extractDesc.java (working copy) @@ -20,18 +20,23 @@ import java.io.Serializable; -@explain(displayName="Extract") +@explain(displayName = "Extract") public class extractDesc implements Serializable { private static final long serialVersionUID = 1L; private exprNodeDesc col; - public extractDesc() { } + + public extractDesc() { + } + public extractDesc(final exprNodeDesc col) { this.col = col; } + public exprNodeDesc getCol() { - return this.col; + return col; } + public void setCol(final exprNodeDesc col) { - this.col=col; + this.col = col; } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/createTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/createTableDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/createTableDesc.java (working copy) @@ -26,62 +26,58 @@ import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.exec.Utilities; -@explain(displayName="Create Table") -public class createTableDesc extends ddlDesc implements Serializable -{ +@explain(displayName = "Create Table") +public class createTableDesc extends ddlDesc implements Serializable { private static final long serialVersionUID = 1L; - String tableName; - boolean isExternal; - List cols; - List partCols; - List bucketCols; - List sortCols; - int numBuckets; - String fieldDelim; - String fieldEscape; - String collItemDelim; - String mapKeyDelim; - String lineDelim; - String comment; - String inputFormat; - String outputFormat; - String location; - String serName; + String tableName; + boolean isExternal; + List cols; + List partCols; + List bucketCols; + List sortCols; + int numBuckets; + String fieldDelim; + String fieldEscape; + String collItemDelim; + String mapKeyDelim; + String lineDelim; + String comment; + String inputFormat; + String outputFormat; + String location; + String serName; Map mapProp; - boolean ifNotExists; - - public createTableDesc(String tableName, boolean isExternal, - List cols, List partCols, - List bucketCols, List sortCols, - int numBuckets, String fieldDelim, - String fieldEscape, - String collItemDelim, - String mapKeyDelim, String lineDelim, - String comment, String inputFormat, String outputFormat, - String location, String serName, Map mapProp, - boolean ifNotExists) { - this.tableName = tableName; - this.isExternal = isExternal; - this.bucketCols = bucketCols; - this.sortCols = sortCols; - this.collItemDelim = collItemDelim; - this.cols = cols; - this.comment = comment; - this.fieldDelim = fieldDelim; - this.fieldEscape = fieldEscape; - this.inputFormat = inputFormat; - this.outputFormat = outputFormat; - this.lineDelim = lineDelim; - this.location = location; - this.mapKeyDelim = mapKeyDelim; - this.numBuckets = numBuckets; - this.partCols = partCols; - this.serName = serName; - this.mapProp = mapProp; - this.ifNotExists = ifNotExists; + boolean ifNotExists; + + public createTableDesc(String tableName, boolean isExternal, + List cols, List partCols, + List bucketCols, List sortCols, int numBuckets, + String fieldDelim, String fieldEscape, String collItemDelim, + String mapKeyDelim, String lineDelim, String comment, String inputFormat, + String outputFormat, String location, String serName, + Map mapProp, boolean ifNotExists) { + this.tableName = tableName; + this.isExternal = isExternal; + this.bucketCols = bucketCols; + this.sortCols = sortCols; + this.collItemDelim = collItemDelim; + this.cols = cols; + this.comment = comment; + this.fieldDelim = fieldDelim; + this.fieldEscape = fieldEscape; + this.inputFormat = inputFormat; + this.outputFormat = outputFormat; + this.lineDelim = lineDelim; + this.location = location; + this.mapKeyDelim = mapKeyDelim; + this.numBuckets = numBuckets; + this.partCols = partCols; + this.serName = serName; + this.mapProp = mapProp; + this.ifNotExists = ifNotExists; } - @explain(displayName="if not exists") + @explain(displayName = "if not exists") public boolean getIfNotExists() { return ifNotExists; } @@ -90,7 +86,7 @@ this.ifNotExists = ifNotExists; } - @explain(displayName="name") + @explain(displayName = "name") public String getTableName() { return tableName; } @@ -103,11 +99,11 @@ return cols; } - @explain(displayName="columns") + @explain(displayName = "columns") public List getColsString() { return Utilities.getFieldSchemaString(getCols()); } - + public void setCols(List cols) { this.cols = cols; } @@ -116,7 +112,7 @@ return partCols; } - @explain(displayName="partition columns") + @explain(displayName = "partition columns") public List getPartColsString() { return Utilities.getFieldSchemaString(getPartCols()); } @@ -125,7 +121,7 @@ this.partCols = partCols; } - @explain(displayName="bucket columns") + @explain(displayName = "bucket columns") public List getBucketCols() { return bucketCols; } @@ -134,7 +130,7 @@ this.bucketCols = bucketCols; } - @explain(displayName="# buckets") + @explain(displayName = "# buckets") public int getNumBuckets() { return numBuckets; } @@ -143,7 +139,7 @@ this.numBuckets = numBuckets; } - @explain(displayName="field delimiter") + @explain(displayName = "field delimiter") public String getFieldDelim() { return fieldDelim; } @@ -152,7 +148,7 @@ this.fieldDelim = fieldDelim; } - @explain(displayName="field escape") + @explain(displayName = "field escape") public String getFieldEscape() { return fieldEscape; } @@ -161,7 +157,7 @@ this.fieldEscape = fieldEscape; } - @explain(displayName="collection delimiter") + @explain(displayName = "collection delimiter") public String getCollItemDelim() { return collItemDelim; } @@ -170,7 +166,7 @@ this.collItemDelim = collItemDelim; } - @explain(displayName="map key delimiter") + @explain(displayName = "map key delimiter") public String getMapKeyDelim() { return mapKeyDelim; } @@ -179,7 +175,7 @@ this.mapKeyDelim = mapKeyDelim; } - @explain(displayName="line delimiter") + @explain(displayName = "line delimiter") public String getLineDelim() { return lineDelim; } @@ -188,7 +184,7 @@ this.lineDelim = lineDelim; } - @explain(displayName="comment") + @explain(displayName = "comment") public String getComment() { return comment; } @@ -197,7 +193,7 @@ this.comment = comment; } - @explain(displayName="input format") + @explain(displayName = "input format") public String getInputFormat() { return inputFormat; } @@ -206,7 +202,7 @@ this.inputFormat = inputFormat; } - @explain(displayName="output format") + @explain(displayName = "output format") public String getOutputFormat() { return outputFormat; } @@ -215,7 +211,7 @@ this.outputFormat = outputFormat; } - @explain(displayName="location") + @explain(displayName = "location") public String getLocation() { return location; } @@ -224,7 +220,7 @@ this.location = location; } - @explain(displayName="isExternal") + @explain(displayName = "isExternal") public boolean isExternal() { return isExternal; } @@ -236,46 +232,49 @@ /** * @return the sortCols */ - @explain(displayName="sort columns") + @explain(displayName = "sort columns") public List getSortCols() { return sortCols; } /** - * @param sortCols the sortCols to set + * @param sortCols + * the sortCols to set */ public void setSortCols(List sortCols) { this.sortCols = sortCols; } - /** - * @return the serDeName - */ - @explain(displayName="serde name") - public String getSerName() { - return serName; - } + /** + * @return the serDeName + */ + @explain(displayName = "serde name") + public String getSerName() { + return serName; + } - /** - * @param serName the serName to set - */ - public void setSerName(String serName) { - this.serName = serName; - } + /** + * @param serName + * the serName to set + */ + public void setSerName(String serName) { + this.serName = serName; + } - /** - * @return the serDe properties - */ - @explain(displayName="serde properties") - public Map getMapProp() { - return mapProp; - } + /** + * @return the serDe properties + */ + @explain(displayName = "serde properties") + public Map getMapProp() { + return mapProp; + } - /** - * @param mapProp the map properties to set - */ - public void setMapProp(Map mapProp) { - this.mapProp = mapProp; - } - + /** + * @param mapProp + * the map properties to set + */ + public void setMapProp(Map mapProp) { + this.mapProp = mapProp; + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/showTablesDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/showTablesDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/showTablesDesc.java (working copy) @@ -19,14 +19,14 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; + import org.apache.hadoop.fs.Path; -@explain(displayName="Show Tables") -public class showTablesDesc extends ddlDesc implements Serializable -{ +@explain(displayName = "Show Tables") +public class showTablesDesc extends ddlDesc implements Serializable { private static final long serialVersionUID = 1L; - String pattern; - Path resFile; + String pattern; + Path resFile; /** * table name for the result of show tables */ @@ -43,7 +43,7 @@ public String getSchema() { return schema; } - + /** * @param resFile */ @@ -53,7 +53,8 @@ } /** - * @param pattern names of tables to show + * @param pattern + * names of tables to show */ public showTablesDesc(Path resFile, String pattern) { this.resFile = resFile; @@ -63,13 +64,14 @@ /** * @return the pattern */ - @explain(displayName="pattern") + @explain(displayName = "pattern") public String getPattern() { return pattern; } /** - * @param pattern the pattern to set + * @param pattern + * the pattern to set */ public void setPattern(String pattern) { this.pattern = pattern; @@ -82,12 +84,14 @@ return resFile; } - @explain(displayName="result file", normalExplain=false) + @explain(displayName = "result file", normalExplain = false) public String getResFileString() { return getResFile().getName(); } + /** - * @param resFile the resFile to set + * @param resFile + * the resFile to set */ public void setResFile(Path resFile) { this.resFile = resFile; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/mapredWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/mapredWork.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/mapredWork.java (working copy) @@ -18,25 +18,29 @@ package org.apache.hadoop.hive.ql.plan; -import java.util.*; -import java.io.*; +import java.io.ByteArrayOutputStream; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Utilities; -@explain(displayName="Map Reduce") +@explain(displayName = "Map Reduce") public class mapredWork implements Serializable { private static final long serialVersionUID = 1L; private String command; // map side work - // use LinkedHashMap to make sure the iteration order is - // deterministic, to ease testing - private LinkedHashMap> pathToAliases; - - private LinkedHashMap pathToPartitionInfo; - - private LinkedHashMap> aliasToWork; - + // use LinkedHashMap to make sure the iteration order is + // deterministic, to ease testing + private LinkedHashMap> pathToAliases; + + private LinkedHashMap pathToPartitionInfo; + + private LinkedHashMap> aliasToWork; + private LinkedHashMap aliasToPartnInfo; // map<->reduce interface @@ -47,117 +51,125 @@ private List tagToValueDesc; private Operator reducer; - + private Integer numReduceTasks; - + private boolean needsTagging; private mapredLocalWork mapLocalWork; - public mapredWork() { - this.aliasToPartnInfo = new LinkedHashMap(); + public mapredWork() { + aliasToPartnInfo = new LinkedHashMap(); } public mapredWork( - final String command, - final LinkedHashMap> pathToAliases, - final LinkedHashMap pathToPartitionInfo, - final LinkedHashMap> aliasToWork, - final tableDesc keyDesc, - List tagToValueDesc, - final Operator reducer, - final Integer numReduceTasks, - final mapredLocalWork mapLocalWork) { - this.command = command; - this.pathToAliases = pathToAliases; - this.pathToPartitionInfo = pathToPartitionInfo; - this.aliasToWork = aliasToWork; - this.keyDesc = keyDesc; - this.tagToValueDesc = tagToValueDesc; - this.reducer = reducer; - this.numReduceTasks = numReduceTasks; - this.mapLocalWork = mapLocalWork; - this.aliasToPartnInfo = new LinkedHashMap(); + final String command, + final LinkedHashMap> pathToAliases, + final LinkedHashMap pathToPartitionInfo, + final LinkedHashMap> aliasToWork, + final tableDesc keyDesc, List tagToValueDesc, + final Operator reducer, final Integer numReduceTasks, + final mapredLocalWork mapLocalWork) { + this.command = command; + this.pathToAliases = pathToAliases; + this.pathToPartitionInfo = pathToPartitionInfo; + this.aliasToWork = aliasToWork; + this.keyDesc = keyDesc; + this.tagToValueDesc = tagToValueDesc; + this.reducer = reducer; + this.numReduceTasks = numReduceTasks; + this.mapLocalWork = mapLocalWork; + aliasToPartnInfo = new LinkedHashMap(); } public String getCommand() { - return this.command; + return command; } + public void setCommand(final String command) { this.command = command; } - @explain(displayName="Path -> Alias", normalExplain=false) - public LinkedHashMap> getPathToAliases() { - return this.pathToAliases; + @explain(displayName = "Path -> Alias", normalExplain = false) + public LinkedHashMap> getPathToAliases() { + return pathToAliases; } - public void setPathToAliases(final LinkedHashMap> pathToAliases) { + + public void setPathToAliases( + final LinkedHashMap> pathToAliases) { this.pathToAliases = pathToAliases; } - @explain(displayName="Path -> Partition", normalExplain=false) - public LinkedHashMap getPathToPartitionInfo() { - return this.pathToPartitionInfo; + @explain(displayName = "Path -> Partition", normalExplain = false) + public LinkedHashMap getPathToPartitionInfo() { + return pathToPartitionInfo; } - public void setPathToPartitionInfo(final LinkedHashMap pathToPartitionInfo) { + public void setPathToPartitionInfo( + final LinkedHashMap pathToPartitionInfo) { this.pathToPartitionInfo = pathToPartitionInfo; } - + /** * @return the aliasToPartnInfo */ public LinkedHashMap getAliasToPartnInfo() { return aliasToPartnInfo; } - + /** - * @param aliasToPartnInfo the aliasToPartnInfo to set + * @param aliasToPartnInfo + * the aliasToPartnInfo to set */ public void setAliasToPartnInfo( LinkedHashMap aliasToPartnInfo) { this.aliasToPartnInfo = aliasToPartnInfo; } - - @explain(displayName="Alias -> Map Operator Tree") + + @explain(displayName = "Alias -> Map Operator Tree") public LinkedHashMap> getAliasToWork() { - return this.aliasToWork; + return aliasToWork; } - public void setAliasToWork(final LinkedHashMap> aliasToWork) { - this.aliasToWork=aliasToWork; + + public void setAliasToWork( + final LinkedHashMap> aliasToWork) { + this.aliasToWork = aliasToWork; } - /** * @return the mapredLocalWork */ - @explain(displayName="Local Work") + @explain(displayName = "Local Work") public mapredLocalWork getMapLocalWork() { return mapLocalWork; } /** - * @param mapLocalWork the mapredLocalWork to set + * @param mapLocalWork + * the mapredLocalWork to set */ public void setMapLocalWork(final mapredLocalWork mapLocalWork) { this.mapLocalWork = mapLocalWork; } public tableDesc getKeyDesc() { - return this.keyDesc; + return keyDesc; } + public void setKeyDesc(final tableDesc keyDesc) { this.keyDesc = keyDesc; } + public List getTagToValueDesc() { return tagToValueDesc; } + public void setTagToValueDesc(final List tagToValueDesc) { this.tagToValueDesc = tagToValueDesc; } - @explain(displayName="Reduce Operator Tree") + @explain(displayName = "Reduce Operator Tree") public Operator getReducer() { - return this.reducer; + return reducer; } public void setReducer(final Operator reducer) { @@ -165,59 +177,62 @@ } /** - * If the number of reducers is -1, the runtime will automatically - * figure it out by input data size. + * If the number of reducers is -1, the runtime will automatically figure it + * out by input data size. * - * The number of reducers will be a positive number only in case the - * target table is bucketed into N buckets (through CREATE TABLE). - * This feature is not supported yet, so the number of reducers will - * always be -1 for now. + * The number of reducers will be a positive number only in case the target + * table is bucketed into N buckets (through CREATE TABLE). This feature is + * not supported yet, so the number of reducers will always be -1 for now. */ public Integer getNumReduceTasks() { - return this.numReduceTasks; + return numReduceTasks; } + public void setNumReduceTasks(final Integer numReduceTasks) { this.numReduceTasks = numReduceTasks; } + @SuppressWarnings("nls") - public void addMapWork(String path, String alias, Operator work, partitionDesc pd) { - ArrayList curAliases = this.pathToAliases.get(path); - if(curAliases == null) { - assert(this.pathToPartitionInfo.get(path) == null); - curAliases = new ArrayList (); - this.pathToAliases.put(path, curAliases); - this.pathToPartitionInfo.put(path, pd); + public void addMapWork(String path, String alias, Operator work, + partitionDesc pd) { + ArrayList curAliases = pathToAliases.get(path); + if (curAliases == null) { + assert (pathToPartitionInfo.get(path) == null); + curAliases = new ArrayList(); + pathToAliases.put(path, curAliases); + pathToPartitionInfo.put(path, pd); } else { - assert(this.pathToPartitionInfo.get(path) != null); + assert (pathToPartitionInfo.get(path) != null); } - for(String oneAlias: curAliases) { - if(oneAlias.equals(alias)) { - throw new RuntimeException ("Multiple aliases named: " + alias + " for path: " + path); + for (String oneAlias : curAliases) { + if (oneAlias.equals(alias)) { + throw new RuntimeException("Multiple aliases named: " + alias + + " for path: " + path); } } curAliases.add(alias); - if(this.aliasToWork.get(alias) != null) { - throw new RuntimeException ("Existing work for alias: " + alias); + if (aliasToWork.get(alias) != null) { + throw new RuntimeException("Existing work for alias: " + alias); } - this.aliasToWork.put(alias, work); + aliasToWork.put(alias, work); } @SuppressWarnings("nls") - public String isInvalid () { - if((getNumReduceTasks() >= 1) && (getReducer() == null)) { + public String isInvalid() { + if ((getNumReduceTasks() >= 1) && (getReducer() == null)) { return "Reducers > 0 but no reduce operator"; } - if((getNumReduceTasks() == 0) && (getReducer() != null)) { + if ((getNumReduceTasks() == 0) && (getReducer() != null)) { return "Reducers == 0 but reduce operator specified"; } return null; } - public String toXML () { + public String toXML() { ByteArrayOutputStream baos = new ByteArrayOutputStream(); Utilities.serializeMapRedWork(this, baos); return (baos.toString()); @@ -226,13 +241,13 @@ // non bean /** - * For each map side operator - stores the alias the operator is working on behalf - * of in the operator runtime state. This is used by reducesink operator - but could - * be useful for debugging as well. + * For each map side operator - stores the alias the operator is working on + * behalf of in the operator runtime state. This is used by reducesink + * operator - but could be useful for debugging as well. */ - private void setAliases () { - for(String oneAlias: this.aliasToWork.keySet()) { - this.aliasToWork.get(oneAlias).setAlias(oneAlias); + private void setAliases() { + for (String oneAlias : aliasToWork.keySet()) { + aliasToWork.get(oneAlias).setAlias(oneAlias); } } @@ -240,24 +255,24 @@ * Derive additional attributes to be rendered by EXPLAIN. */ public void deriveExplainAttributes() { - if (this.pathToPartitionInfo == null) { + if (pathToPartitionInfo == null) { return; } - for (Map.Entry entry - : this.pathToPartitionInfo.entrySet()) { + for (Map.Entry entry : pathToPartitionInfo + .entrySet()) { entry.getValue().deriveBaseFileName(entry.getKey()); } } - public void initialize () { + public void initialize() { setAliases(); } - @explain(displayName="Needs Tagging", normalExplain=false) + @explain(displayName = "Needs Tagging", normalExplain = false) public boolean getNeedsTagging() { - return this.needsTagging; + return needsTagging; } - + public void setNeedsTagging(boolean needsTagging) { this.needsTagging = needsTagging; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/limitDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/limitDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/limitDesc.java (working copy) @@ -20,20 +20,24 @@ import java.io.Serializable; -@explain(displayName="Limit") +@explain(displayName = "Limit") public class limitDesc implements Serializable { private static final long serialVersionUID = 1L; private int limit; - public limitDesc() { } + + public limitDesc() { + } + public limitDesc(final int limit) { this.limit = limit; } public int getLimit() { - return this.limit; + return limit; } + public void setLimit(final int limit) { - this.limit=limit; + this.limit = limit; } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/selectDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/selectDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/selectDesc.java (working copy) @@ -20,72 +20,83 @@ import java.io.Serializable; -@explain(displayName="Select Operator") +@explain(displayName = "Select Operator") public class selectDesc implements Serializable { private static final long serialVersionUID = 1L; private java.util.ArrayList colList; private java.util.ArrayList outputColumnNames; private boolean selectStar; private boolean selStarNoCompute; - public selectDesc() { } + public selectDesc() { + } + public selectDesc(final boolean selStarNoCompute) { this.selStarNoCompute = selStarNoCompute; } - public selectDesc(final java.util.ArrayList colList, final java.util.ArrayList outputColumnNames) { + public selectDesc( + final java.util.ArrayList colList, + final java.util.ArrayList outputColumnNames) { this(colList, outputColumnNames, false); } - + public selectDesc( - final java.util.ArrayList colList,java.util.ArrayList outputColumnNames, final boolean selectStar) { + final java.util.ArrayList colList, + java.util.ArrayList outputColumnNames, + final boolean selectStar) { this.colList = colList; this.selectStar = selectStar; this.outputColumnNames = outputColumnNames; } public selectDesc( - final java.util.ArrayList colList, final boolean selectStar, - final boolean selStarNoCompute) { + final java.util.ArrayList colList, + final boolean selectStar, final boolean selStarNoCompute) { this.colList = colList; this.selectStar = selectStar; this.selStarNoCompute = selStarNoCompute; } - - @explain(displayName="expressions") + + @explain(displayName = "expressions") public java.util.ArrayList getColList() { - return this.colList; + return colList; } - public void setColList(final java.util.ArrayList colList) { - this.colList=colList; + + public void setColList( + final java.util.ArrayList colList) { + this.colList = colList; } - - @explain(displayName="outputColumnNames") + + @explain(displayName = "outputColumnNames") public java.util.ArrayList getOutputColumnNames() { return outputColumnNames; } + public void setOutputColumnNames( java.util.ArrayList outputColumnNames) { this.outputColumnNames = outputColumnNames; } - - @explain(displayName="SELECT * ") + + @explain(displayName = "SELECT * ") public String explainNoCompute() { - if(isSelStarNoCompute()) { + if (isSelStarNoCompute()) { return "(no compute)"; } else { return null; } } - + /** * @return the selectStar */ public boolean isSelectStar() { return selectStar; } + /** - * @param selectStar the selectStar to set + * @param selectStar + * the selectStar to set */ public void setSelectStar(boolean selectStar) { this.selectStar = selectStar; @@ -99,7 +110,8 @@ } /** - * @param selStarNoCompute the selStarNoCompute to set + * @param selStarNoCompute + * the selStarNoCompute to set */ public void setSelStarNoCompute(boolean selStarNoCompute) { this.selStarNoCompute = selStarNoCompute; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/loadFileDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/loadFileDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/loadFileDesc.java (working copy) @@ -20,68 +20,70 @@ import java.io.Serializable; -import org.apache.hadoop.hive.ql.plan.loadDesc; - public class loadFileDesc extends loadDesc implements Serializable { private static final long serialVersionUID = 1L; private String targetDir; private boolean isDfsDir; // list of columns, comma separated - private String columns; - private String columnTypes; + private String columns; + private String columnTypes; - public loadFileDesc() { } - public loadFileDesc( - final String sourceDir, - final String targetDir, - final boolean isDfsDir, - final String columns, - final String columnTypes) { + public loadFileDesc() { + } + public loadFileDesc(final String sourceDir, final String targetDir, + final boolean isDfsDir, final String columns, final String columnTypes) { + super(sourceDir); this.targetDir = targetDir; this.isDfsDir = isDfsDir; this.columns = columns; this.columnTypes = columnTypes; } - - @explain(displayName="destination") + + @explain(displayName = "destination") public String getTargetDir() { - return this.targetDir; + return targetDir; } + public void setTargetDir(final String targetDir) { - this.targetDir=targetDir; + this.targetDir = targetDir; } - - @explain(displayName="hdfs directory") + + @explain(displayName = "hdfs directory") public boolean getIsDfsDir() { - return this.isDfsDir; + return isDfsDir; } + public void setIsDfsDir(final boolean isDfsDir) { this.isDfsDir = isDfsDir; } - - /** - * @return the columns - */ - public String getColumns() { - return columns; - } - - /** - * @param columns the columns to set - */ - public void setColumns(String columns) { - this.columns = columns; - } + /** + * @return the columns + */ + public String getColumns() { + return columns; + } + + /** + * @param columns + * the columns to set + */ + public void setColumns(String columns) { + this.columns = columns; + } + + /** * @return the columnTypes */ public String getColumnTypes() { return columnTypes; } + /** - * @param columnTypes the columnTypes to set + * @param columnTypes + * the columnTypes to set */ public void setColumnTypes(String columnTypes) { this.columnTypes = columnTypes; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/loadDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/loadDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/loadDesc.java (working copy) @@ -24,18 +24,20 @@ private static final long serialVersionUID = 1L; private String sourceDir; - public loadDesc() { } - public loadDesc( - final String sourceDir) { + public loadDesc() { + } + public loadDesc(final String sourceDir) { + this.sourceDir = sourceDir; } - - @explain(displayName="source", normalExplain=false) + + @explain(displayName = "source", normalExplain = false) public String getSourceDir() { - return this.sourceDir; + return sourceDir; } + public void setSourceDir(final String source) { - this.sourceDir = source; + sourceDir = source; } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/alterTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/alterTableDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/alterTableDesc.java (working copy) @@ -26,54 +26,61 @@ import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.exec.Utilities; -@explain(displayName="Alter Table") -public class alterTableDesc extends ddlDesc implements Serializable -{ +@explain(displayName = "Alter Table") +public class alterTableDesc extends ddlDesc implements Serializable { private static final long serialVersionUID = 1L; - public static enum alterTableTypes {RENAME, ADDCOLS, REPLACECOLS, ADDPROPS, ADDSERDE, ADDSERDEPROPS, ADDFILEFORMAT, ADDCLUSTERSORTCOLUMN, RENAMECOLUMN}; - - alterTableTypes op; - String oldName; - String newName; - List newCols; - String serdeName; - Map props; - String inputFormat; - String outputFormat; - int numberBuckets; - List bucketColumns; - List sortColumns; + public static enum alterTableTypes { + RENAME, ADDCOLS, REPLACECOLS, ADDPROPS, ADDSERDE, ADDSERDEPROPS, ADDFILEFORMAT, ADDCLUSTERSORTCOLUMN, RENAMECOLUMN + }; - String oldColName; - String newColName; - String newColType; - String newColComment; - boolean first; - String afterCol; - + alterTableTypes op; + String oldName; + String newName; + List newCols; + String serdeName; + Map props; + String inputFormat; + String outputFormat; + int numberBuckets; + List bucketColumns; + List sortColumns; + + String oldColName; + String newColName; + String newColType; + String newColComment; + boolean first; + String afterCol; + /** - * @param tblName table name - * @param oldColName old column name - * @param newColName new column name - * @param newComment - * @param newType + * @param tblName + * table name + * @param oldColName + * old column name + * @param newColName + * new column name + * @param newComment + * @param newType */ - public alterTableDesc(String tblName, String oldColName, String newColName, String newType, String newComment, boolean first, String afterCol) { + public alterTableDesc(String tblName, String oldColName, String newColName, + String newType, String newComment, boolean first, String afterCol) { super(); - this.oldName = tblName; + oldName = tblName; this.oldColName = oldColName; this.newColName = newColName; - this.newColType = newType; - this.newColComment = newComment; + newColType = newType; + newColComment = newComment; this.first = first; this.afterCol = afterCol; - this.op = alterTableTypes.RENAMECOLUMN; + op = alterTableTypes.RENAMECOLUMN; } /** - * @param oldName old name of the table - * @param newName new name of the table + * @param oldName + * old name of the table + * @param newName + * new name of the table */ public alterTableDesc(String oldName, String newName) { op = alterTableTypes.RENAME; @@ -82,56 +89,65 @@ } /** - * @param name name of the table - * @param newCols new columns to be added + * @param name + * name of the table + * @param newCols + * new columns to be added */ - public alterTableDesc(String name, List newCols, alterTableTypes alterType) { - this.op = alterType; - this.oldName = name; + public alterTableDesc(String name, List newCols, + alterTableTypes alterType) { + op = alterType; + oldName = name; this.newCols = newCols; } - + /** - * @param alterType type of alter op + * @param alterType + * type of alter op */ public alterTableDesc(alterTableTypes alterType) { - this.op = alterType; + op = alterType; } /** * - * @param name name of the table - * @param inputFormat new table input format - * @param outputFormat new table output format + * @param name + * name of the table + * @param inputFormat + * new table input format + * @param outputFormat + * new table output format */ - public alterTableDesc(String name, String inputFormat, String outputFormat, String serdeName) { - super(); - this.op = alterTableTypes.ADDFILEFORMAT; - this.oldName = name; - this.inputFormat = inputFormat; - this.outputFormat = outputFormat; - this.serdeName = serdeName; + public alterTableDesc(String name, String inputFormat, String outputFormat, + String serdeName) { + super(); + op = alterTableTypes.ADDFILEFORMAT; + oldName = name; + this.inputFormat = inputFormat; + this.outputFormat = outputFormat; + this.serdeName = serdeName; } - + public alterTableDesc(String tableName, int numBuckets, List bucketCols, List sortCols) { - this.oldName = tableName; - this.op = alterTableTypes.ADDCLUSTERSORTCOLUMN; - this.numberBuckets = numBuckets; - this.bucketColumns = bucketCols; - this.sortColumns = sortCols; + oldName = tableName; + op = alterTableTypes.ADDCLUSTERSORTCOLUMN; + numberBuckets = numBuckets; + bucketColumns = bucketCols; + sortColumns = sortCols; } - /** + /** * @return the old name of the table */ - @explain(displayName="old name") + @explain(displayName = "old name") public String getOldName() { return oldName; } /** - * @param oldName the oldName to set + * @param oldName + * the oldName to set */ public void setOldName(String oldName) { this.oldName = oldName; @@ -140,13 +156,14 @@ /** * @return the newName */ - @explain(displayName="new name") + @explain(displayName = "new name") public String getNewName() { return newName; } /** - * @param newName the newName to set + * @param newName + * the newName to set */ public void setNewName(String newName) { this.newName = newName; @@ -159,9 +176,9 @@ return op; } - @explain(displayName="type") + @explain(displayName = "type") public String getAlterTableTypeString() { - switch(op) { + switch (op) { case RENAME: return "rename"; case ADDCOLS: @@ -169,11 +186,13 @@ case REPLACECOLS: return "replace columns"; } - + return "unknown"; } + /** - * @param op the op to set + * @param op + * the op to set */ public void setOp(alterTableTypes op) { this.op = op; @@ -186,12 +205,14 @@ return newCols; } - @explain(displayName="new columns") + @explain(displayName = "new columns") public List getNewColsString() { return Utilities.getFieldSchemaString(getNewCols()); } + /** - * @param newCols the newCols to set + * @param newCols + * the newCols to set */ public void setNewCols(List newCols) { this.newCols = newCols; @@ -200,13 +221,14 @@ /** * @return the serdeName */ - @explain(displayName="deserializer library") + @explain(displayName = "deserializer library") public String getSerdeName() { return serdeName; } /** - * @param serdeName the serdeName to set + * @param serdeName + * the serdeName to set */ public void setSerdeName(String serdeName) { this.serdeName = serdeName; @@ -215,99 +237,106 @@ /** * @return the props */ - @explain(displayName="properties") + @explain(displayName = "properties") public Map getProps() { return props; } /** - * @param props the props to set + * @param props + * the props to set */ public void setProps(Map props) { this.props = props; } - + /** * @return the input format */ - @explain(displayName="input format") - public String getInputFormat() { - return inputFormat; + @explain(displayName = "input format") + public String getInputFormat() { + return inputFormat; } /** - * @param inputFormat the input format to set + * @param inputFormat + * the input format to set */ - public void setInputFormat(String inputFormat) { - this.inputFormat = inputFormat; + public void setInputFormat(String inputFormat) { + this.inputFormat = inputFormat; } /** * @return the output format */ - @explain(displayName="output format") - public String getOutputFormat() { - return outputFormat; + @explain(displayName = "output format") + public String getOutputFormat() { + return outputFormat; } /** - * @param outputFormat the output format to set + * @param outputFormat + * the output format to set */ - public void setOutputFormat(String outputFormat) { - this.outputFormat = outputFormat; + public void setOutputFormat(String outputFormat) { + this.outputFormat = outputFormat; } - /** - * @return the number of buckets - */ - public int getNumberBuckets() { - return numberBuckets; + /** + * @return the number of buckets + */ + public int getNumberBuckets() { + return numberBuckets; } - /** - * @param numberBuckets the number of buckets to set - */ - public void setNumberBuckets(int numberBuckets) { - this.numberBuckets = numberBuckets; + /** + * @param numberBuckets + * the number of buckets to set + */ + public void setNumberBuckets(int numberBuckets) { + this.numberBuckets = numberBuckets; } - /** - * @return the bucket columns - */ - public List getBucketColumns() { - return bucketColumns; + /** + * @return the bucket columns + */ + public List getBucketColumns() { + return bucketColumns; } - /** - * @param bucketColumns the bucket columns to set - */ - public void setBucketColumns(List bucketColumns) { - this.bucketColumns = bucketColumns; + /** + * @param bucketColumns + * the bucket columns to set + */ + public void setBucketColumns(List bucketColumns) { + this.bucketColumns = bucketColumns; } - /** - * @return the sort columns - */ - public List getSortColumns() { - return sortColumns; + /** + * @return the sort columns + */ + public List getSortColumns() { + return sortColumns; } - /** - * @param sortColumns the sort columns to set - */ - public void setSortColumns(List sortColumns) { - this.sortColumns = sortColumns; + /** + * @param sortColumns + * the sort columns to set + */ + public void setSortColumns(List sortColumns) { + this.sortColumns = sortColumns; } -/** - * @return old column name - */ + /** + * @return old column name + */ public String getOldColName() { return oldColName; } /** - * @param oldColName the old column name + * @param oldColName + * the old column name */ public void setOldColName(String oldColName) { this.oldColName = oldColName; @@ -321,7 +350,8 @@ } /** - * @param newColName the new column name + * @param newColName + * the new column name */ public void setNewColName(String newColName) { this.newColName = newColName; @@ -335,10 +365,11 @@ } /** - * @param newType new column's type + * @param newType + * new column's type */ public void setNewColType(String newType) { - this.newColType = newType; + newColType = newType; } /** @@ -349,10 +380,11 @@ } /** - * @param newComment new column's comment + * @param newComment + * new column's comment */ public void setNewColComment(String newComment) { - this.newColComment = newComment; + newColComment = newComment; } /** @@ -363,7 +395,8 @@ } /** - * @param first set the column to position 0 + * @param first + * set the column to position 0 */ public void setFirst(boolean first) { this.first = first; @@ -377,7 +410,8 @@ } /** - * @param afterCol set the column's after position + * @param afterCol + * set the column's after position */ public void setAfterCol(String afterCol) { this.afterCol = afterCol; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeNullDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeNullDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeNullDesc.java (working copy) @@ -24,18 +24,19 @@ import org.apache.hadoop.io.NullWritable; public class exprNodeNullDesc extends exprNodeDesc implements Serializable { - + private static final long serialVersionUID = 1L; public exprNodeNullDesc() { - super(TypeInfoFactory.getPrimitiveTypeInfoFromPrimitiveWritable(NullWritable.class)); + super(TypeInfoFactory + .getPrimitiveTypeInfoFromPrimitiveWritable(NullWritable.class)); } public Object getValue() { return null; } - @explain(displayName="expr") + @explain(displayName = "expr") @Override public String getExprString() { return "null"; @@ -45,14 +46,16 @@ public exprNodeDesc clone() { return new exprNodeNullDesc(); } - + @Override public boolean isSame(Object o) { - if (!(o instanceof exprNodeNullDesc)) + if (!(o instanceof exprNodeNullDesc)) { return false; - if (!typeInfo.equals(((exprNodeNullDesc)o).getTypeInfo())) + } + if (!typeInfo.equals(((exprNodeNullDesc) o).getTypeInfo())) { return false; - - return true; + } + + return true; } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/createTableLikeDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/createTableLikeDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/createTableLikeDesc.java (working copy) @@ -20,28 +20,25 @@ import java.io.Serializable; -@explain(displayName="Create Table") -public class createTableLikeDesc extends ddlDesc implements Serializable -{ +@explain(displayName = "Create Table") +public class createTableLikeDesc extends ddlDesc implements Serializable { private static final long serialVersionUID = 1L; - String tableName; - boolean isExternal; - String location; - boolean ifNotExists; - String likeTableName; - - public createTableLikeDesc(String tableName, boolean isExternal, - String location, - boolean ifNotExists, - String likeTableName) { - this.tableName = tableName; - this.isExternal = isExternal; - this.location = location; - this.ifNotExists = ifNotExists; - this.likeTableName = likeTableName; + String tableName; + boolean isExternal; + String location; + boolean ifNotExists; + String likeTableName; + + public createTableLikeDesc(String tableName, boolean isExternal, + String location, boolean ifNotExists, String likeTableName) { + this.tableName = tableName; + this.isExternal = isExternal; + this.location = location; + this.ifNotExists = ifNotExists; + this.likeTableName = likeTableName; } - @explain(displayName="if not exists") + @explain(displayName = "if not exists") public boolean getIfNotExists() { return ifNotExists; } @@ -50,7 +47,7 @@ this.ifNotExists = ifNotExists; } - @explain(displayName="name") + @explain(displayName = "name") public String getTableName() { return tableName; } @@ -59,7 +56,7 @@ this.tableName = tableName; } - @explain(displayName="location") + @explain(displayName = "location") public String getLocation() { return location; } @@ -68,7 +65,7 @@ this.location = location; } - @explain(displayName="isExternal") + @explain(displayName = "isExternal") public boolean isExternal() { return isExternal; } @@ -77,7 +74,7 @@ this.isExternal = isExternal; } - @explain(displayName="like") + @explain(displayName = "like") public String getLikeTableName() { return likeTableName; } @@ -86,5 +83,4 @@ this.likeTableName = likeTableName; } - } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/explosionDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/explosionDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/explosionDesc.java (working copy) @@ -16,33 +16,37 @@ * limitations under the License. */ - package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; -@explain(displayName="Explosion") +@explain(displayName = "Explosion") public class explosionDesc implements Serializable { private static final long serialVersionUID = 1L; private String fieldName; private int position; - public explosionDesc() { } - public explosionDesc( - final String fieldName, - final int position) { + + public explosionDesc() { + } + + public explosionDesc(final String fieldName, final int position) { this.fieldName = fieldName; this.position = position; } + public String getFieldName() { - return this.fieldName; + return fieldName; } + public void setFieldName(final String fieldName) { - this.fieldName=fieldName; + this.fieldName = fieldName; } + public int getPosition() { - return this.position; + return position; } + public void setPosition(final int position) { - this.position=position; + this.position = position; } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/AddPartitionDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/AddPartitionDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AddPartitionDesc.java (working copy) @@ -20,7 +20,7 @@ import java.util.Map; /** - * Contains the information needed to add a partition. + * Contains the information needed to add a partition. */ public class AddPartitionDesc { @@ -28,12 +28,16 @@ String dbName; String location; Map partSpec; - + /** - * @param dbName database to add to. - * @param tableName table to add to. - * @param partSpec partition specification. - * @param location partition location, relative to table location. + * @param dbName + * database to add to. + * @param tableName + * table to add to. + * @param partSpec + * partition specification. + * @param location + * partition location, relative to table location. */ public AddPartitionDesc(String dbName, String tableName, Map partSpec, String location) { @@ -52,12 +56,13 @@ } /** - * @param dbName database name + * @param dbName + * database name */ public void setDbName(String dbName) { this.dbName = dbName; } - + /** * @return the table we're going to add the partitions to. */ @@ -66,7 +71,8 @@ } /** - * @param tableName the table we're going to add the partitions to. + * @param tableName + * the table we're going to add the partitions to. */ public void setTableName(String tableName) { this.tableName = tableName; @@ -80,7 +86,8 @@ } /** - * @param location location of partition in relation to table + * @param location + * location of partition in relation to table */ public void setLocation(String location) { this.location = location; @@ -94,7 +101,8 @@ } /** - * @param partSpec partition specification + * @param partSpec + * partition specification */ public void setPartSpec(Map partSpec) { this.partSpec = partSpec; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/descFunctionDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/descFunctionDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/descFunctionDesc.java (working copy) @@ -19,16 +19,16 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; + import org.apache.hadoop.fs.Path; -@explain(displayName="Describe Function") -public class descFunctionDesc extends ddlDesc implements Serializable -{ +@explain(displayName = "Describe Function") +public class descFunctionDesc extends ddlDesc implements Serializable { private static final long serialVersionUID = 1L; - String name; - Path resFile; - boolean isExtended; - + String name; + Path resFile; + boolean isExtended; + public boolean isExtended() { return isExtended; } @@ -36,7 +36,7 @@ public void setExtended(boolean isExtended) { this.isExtended = isExtended; } - + /** * table name for the result of show tables */ @@ -53,7 +53,7 @@ public String getSchema() { return schema; } - + /** * @param resFile */ @@ -63,7 +63,8 @@ } /** - * @param name of the function to describe + * @param name + * of the function to describe */ public descFunctionDesc(Path resFile, String name, boolean isExtended) { this.isExtended = isExtended; @@ -74,13 +75,14 @@ /** * @return the name */ - @explain(displayName="name") + @explain(displayName = "name") public String getName() { return name; } /** - * @param name is the function name + * @param name + * is the function name */ public void setName(String name) { this.name = name; @@ -93,12 +95,14 @@ return resFile; } - @explain(displayName="result file", normalExplain=false) + @explain(displayName = "result file", normalExplain = false) public String getResFileString() { return getResFile().getName(); } + /** - * @param resFile the resFile to set + * @param resFile + * the resFile to set */ public void setResFile(Path resFile) { this.resFile = resFile; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeDesc.java (working copy) @@ -24,38 +24,43 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -public abstract class exprNodeDesc implements Serializable, Node { +public abstract class exprNodeDesc implements Serializable, Node { private static final long serialVersionUID = 1L; TypeInfo typeInfo; - - public exprNodeDesc() {} + + public exprNodeDesc() { + } + public exprNodeDesc(TypeInfo typeInfo) { this.typeInfo = typeInfo; if (typeInfo == null) { throw new RuntimeException("typeInfo cannot be null!"); } } - + + @Override public abstract exprNodeDesc clone(); // Cant use equals because the walker depends on them being object equal - // The default graph walker processes a node after its kids have been processed. That comparison needs + // The default graph walker processes a node after its kids have been + // processed. That comparison needs // object equality - isSame means that the objects are semantically equal. public abstract boolean isSame(Object o); - + public TypeInfo getTypeInfo() { - return this.typeInfo; + return typeInfo; } + public void setTypeInfo(TypeInfo typeInfo) { this.typeInfo = typeInfo; } public String getExprString() { - assert(false); + assert (false); return null; } - - @explain(displayName="type") + + @explain(displayName = "type") public String getTypeString() { return typeInfo.getTypeName(); } @@ -63,12 +68,12 @@ public List getCols() { return null; } - + @Override public List getChildren() { return null; } - + @Override public String getName() { return this.getClass().getName(); Index: ql/src/java/org/apache/hadoop/hive/ql/plan/descTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/descTableDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/descTableDesc.java (working copy) @@ -23,15 +23,14 @@ import org.apache.hadoop.fs.Path; -@explain(displayName="Describe Table") -public class descTableDesc extends ddlDesc implements Serializable -{ +@explain(displayName = "Describe Table") +public class descTableDesc extends ddlDesc implements Serializable { private static final long serialVersionUID = 1L; - + String tableName; HashMap partSpec; - Path resFile; - boolean isExt; + Path resFile; + boolean isExt; /** * table name for the result of describe table */ @@ -48,14 +47,15 @@ public String getSchema() { return schema; } - + /** * @param isExt * @param partSpec * @param resFile * @param tableName */ - public descTableDesc(Path resFile, String tableName, HashMap partSpec, boolean isExt) { + public descTableDesc(Path resFile, String tableName, + HashMap partSpec, boolean isExt) { this.isExt = isExt; this.partSpec = partSpec; this.resFile = resFile; @@ -70,7 +70,8 @@ } /** - * @param isExt the isExt to set + * @param isExt + * the isExt to set */ public void setExt(boolean isExt) { this.isExt = isExt; @@ -79,13 +80,14 @@ /** * @return the tableName */ - @explain(displayName="table") + @explain(displayName = "table") public String getTableName() { return tableName; } /** - * @param tableName the tableName to set + * @param tableName + * the tableName to set */ public void setTableName(String tableName) { this.tableName = tableName; @@ -94,13 +96,14 @@ /** * @return the partSpec */ - @explain(displayName="partition") + @explain(displayName = "partition") public HashMap getPartSpec() { return partSpec; } /** - * @param partSpec the partSpec to set + * @param partSpec + * the partSpec to set */ public void setPartSpecs(HashMap partSpec) { this.partSpec = partSpec; @@ -113,13 +116,14 @@ return resFile; } - @explain(displayName="result file", normalExplain=false) + @explain(displayName = "result file", normalExplain = false) public String getResFileString() { return getResFile().getName(); } - + /** - * @param resFile the resFile to set + * @param resFile + * the resFile to set */ public void setResFile(Path resFile) { this.resFile = resFile; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/loadTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/loadTableDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/loadTableDesc.java (working copy) @@ -21,7 +21,8 @@ import java.io.Serializable; import java.util.HashMap; -public class loadTableDesc extends org.apache.hadoop.hive.ql.plan.loadDesc implements Serializable { +public class loadTableDesc extends org.apache.hadoop.hive.ql.plan.loadDesc + implements Serializable { private static final long serialVersionUID = 1L; private boolean replace; private String tmpDir; @@ -30,56 +31,58 @@ private org.apache.hadoop.hive.ql.plan.tableDesc table; private HashMap partitionSpec; - public loadTableDesc() { } - public loadTableDesc( - final String sourceDir, - final String tmpDir, - final org.apache.hadoop.hive.ql.plan.tableDesc table, - final HashMap partitionSpec, - final boolean replace) { + public loadTableDesc() { + } + public loadTableDesc(final String sourceDir, final String tmpDir, + final org.apache.hadoop.hive.ql.plan.tableDesc table, + final HashMap partitionSpec, final boolean replace) { + super(sourceDir); this.tmpDir = tmpDir; this.table = table; this.partitionSpec = partitionSpec; this.replace = replace; } - public loadTableDesc( - final String sourceDir, - final String tmpDir, - final org.apache.hadoop.hive.ql.plan.tableDesc table, - final HashMap partitionSpec) { + + public loadTableDesc(final String sourceDir, final String tmpDir, + final org.apache.hadoop.hive.ql.plan.tableDesc table, + final HashMap partitionSpec) { this(sourceDir, tmpDir, table, partitionSpec, true); } - @explain(displayName="tmp directory", normalExplain=false) + @explain(displayName = "tmp directory", normalExplain = false) public String getTmpDir() { - return this.tmpDir; + return tmpDir; } + public void setTmpDir(final String tmp) { - this.tmpDir = tmp; + tmpDir = tmp; } - @explain(displayName="table") + @explain(displayName = "table") public tableDesc getTable() { - return this.table; + return table; } + public void setTable(final org.apache.hadoop.hive.ql.plan.tableDesc table) { this.table = table; } - - @explain(displayName="partition") + + @explain(displayName = "partition") public HashMap getPartitionSpec() { - return this.partitionSpec; + return partitionSpec; } + public void setPartitionSpec(final HashMap partitionSpec) { this.partitionSpec = partitionSpec; } - @explain(displayName="replace") + @explain(displayName = "replace") public boolean getReplace() { return replace; } + public void setReplace(boolean replace) { this.replace = replace; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/groupByDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/groupByDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/groupByDesc.java (working copy) @@ -18,19 +18,24 @@ package org.apache.hadoop.hive.ql.plan; -@explain(displayName="Group By Operator") +@explain(displayName = "Group By Operator") public class groupByDesc implements java.io.Serializable { - /** Group-by Mode: - * COMPLETE: complete 1-phase aggregation: iterate, terminate - * PARTIAL1: partial aggregation - first phase: iterate, terminatePartial - * PARTIAL2: partial aggregation - second phase: merge, terminatePartial - * PARTIALS: For non-distinct the same as PARTIAL2, for distinct the same as PARTIAL1 - * FINAL: partial aggregation - final phase: merge, terminate - * HASH: For non-distinct the same as PARTIAL1 but use hash-table-based aggregation - * MERGEPARTIAL: FINAL for non-distinct aggregations, COMPLETE for distinct aggregations + /** + * Group-by Mode: COMPLETE: complete 1-phase aggregation: iterate, terminate + * PARTIAL1: partial aggregation - first phase: iterate, terminatePartial + * PARTIAL2: partial aggregation - second phase: merge, terminatePartial + * PARTIALS: For non-distinct the same as PARTIAL2, for distinct the same as + * PARTIAL1 FINAL: partial aggregation - final phase: merge, terminate HASH: + * For non-distinct the same as PARTIAL1 but use hash-table-based aggregation + * MERGEPARTIAL: FINAL for non-distinct aggregations, COMPLETE for distinct + * aggregations */ private static final long serialVersionUID = 1L; - public static enum Mode { COMPLETE, PARTIAL1, PARTIAL2, PARTIALS, FINAL, HASH, MERGEPARTIAL }; + + public static enum Mode { + COMPLETE, PARTIAL1, PARTIAL2, PARTIALS, FINAL, HASH, MERGEPARTIAL + }; + private Mode mode; private boolean groupKeyNotReductionKey; private boolean bucketGroup; @@ -38,36 +43,41 @@ private java.util.ArrayList keys; private java.util.ArrayList aggregators; private java.util.ArrayList outputColumnNames; - public groupByDesc() { } + + public groupByDesc() { + } + public groupByDesc( - final Mode mode, - final java.util.ArrayList outputColumnNames, - final java.util.ArrayList keys, - final java.util.ArrayList aggregators, - final boolean groupKeyNotReductionKey) { - this(mode, outputColumnNames, keys, aggregators, groupKeyNotReductionKey, false); + final Mode mode, + final java.util.ArrayList outputColumnNames, + final java.util.ArrayList keys, + final java.util.ArrayList aggregators, + final boolean groupKeyNotReductionKey) { + this(mode, outputColumnNames, keys, aggregators, groupKeyNotReductionKey, + false); } - + public groupByDesc( final Mode mode, final java.util.ArrayList outputColumnNames, final java.util.ArrayList keys, final java.util.ArrayList aggregators, - final boolean groupKeyNotReductionKey, final boolean bucketGroup) { - this.mode = mode; - this.outputColumnNames = outputColumnNames; - this.keys = keys; - this.aggregators = aggregators; - this.groupKeyNotReductionKey = groupKeyNotReductionKey; - this.bucketGroup = bucketGroup; - } - + final boolean groupKeyNotReductionKey, final boolean bucketGroup) { + this.mode = mode; + this.outputColumnNames = outputColumnNames; + this.keys = keys; + this.aggregators = aggregators; + this.groupKeyNotReductionKey = groupKeyNotReductionKey; + this.bucketGroup = bucketGroup; + } + public Mode getMode() { - return this.mode; + return mode; } - @explain(displayName="mode") + + @explain(displayName = "mode") public String getModeString() { - switch(mode) { + switch (mode) { case COMPLETE: return "complete"; case PARTIAL1: @@ -83,48 +93,57 @@ case MERGEPARTIAL: return "mergepartial"; } - + return "unknown"; } + public void setMode(final Mode mode) { this.mode = mode; } - @explain(displayName="keys") + + @explain(displayName = "keys") public java.util.ArrayList getKeys() { - return this.keys; + return keys; } + public void setKeys(final java.util.ArrayList keys) { this.keys = keys; } - - @explain(displayName="outputColumnNames") + + @explain(displayName = "outputColumnNames") public java.util.ArrayList getOutputColumnNames() { return outputColumnNames; } + public void setOutputColumnNames( java.util.ArrayList outputColumnNames) { this.outputColumnNames = outputColumnNames; } - - @explain(displayName="aggregations") + + @explain(displayName = "aggregations") public java.util.ArrayList getAggregators() { - return this.aggregators; + return aggregators; } - public void setAggregators(final java.util.ArrayList aggregators) { + + public void setAggregators( + final java.util.ArrayList aggregators) { this.aggregators = aggregators; } public boolean getGroupKeyNotReductionKey() { - return this.groupKeyNotReductionKey; + return groupKeyNotReductionKey; } + public void setGroupKeyNotReductionKey(final boolean groupKeyNotReductionKey) { this.groupKeyNotReductionKey = groupKeyNotReductionKey; } - @explain(displayName="bucketGroup") - public boolean getBucketGroup() { - return bucketGroup; + + @explain(displayName = "bucketGroup") + public boolean getBucketGroup() { + return bucketGroup; } - public void setBucketGroup(boolean dataSorted) { - this.bucketGroup = dataSorted; + + public void setBucketGroup(boolean dataSorted) { + bucketGroup = dataSorted; } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/FunctionWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/FunctionWork.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FunctionWork.java (working copy) @@ -22,9 +22,9 @@ public class FunctionWork implements Serializable { private static final long serialVersionUID = 1L; - private createFunctionDesc createFunctionDesc; - private dropFunctionDesc dropFunctionDesc; - + private createFunctionDesc createFunctionDesc; + private dropFunctionDesc dropFunctionDesc; + public FunctionWork(createFunctionDesc createFunctionDesc) { this.createFunctionDesc = createFunctionDesc; } @@ -36,6 +36,7 @@ public createFunctionDesc getCreateFunctionDesc() { return createFunctionDesc; } + public void setCreateFunctionDesc(createFunctionDesc createFunctionDesc) { this.createFunctionDesc = createFunctionDesc; } @@ -43,8 +44,9 @@ public dropFunctionDesc getDropFunctionDesc() { return dropFunctionDesc; } + public void setDropFunctionDesc(dropFunctionDesc dropFunctionDesc) { this.dropFunctionDesc = dropFunctionDesc; } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/tableScanDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/tableScanDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/tableScanDesc.java (working copy) @@ -21,28 +21,29 @@ import java.io.Serializable; /** - * Table Scan Descriptor - * Currently, data is only read from a base source as part of map-reduce framework. So, nothing is stored in the - * descriptor. But, more things will be added here as table scan is invoked as part of local work. + * Table Scan Descriptor Currently, data is only read from a base source as part + * of map-reduce framework. So, nothing is stored in the descriptor. But, more + * things will be added here as table scan is invoked as part of local work. **/ -@explain(displayName="TableScan") +@explain(displayName = "TableScan") public class tableScanDesc implements Serializable { private static final long serialVersionUID = 1L; - + private String alias; - + @SuppressWarnings("nls") - public tableScanDesc() { } - + public tableScanDesc() { + } + public tableScanDesc(final String alias) { this.alias = alias; } - - @explain(displayName="alias") + + @explain(displayName = "alias") public String getAlias() { return alias; } - + public void setAlias(String alias) { this.alias = alias; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/mapJoinDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/mapJoinDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/mapJoinDesc.java (working copy) @@ -19,9 +19,6 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; - -import org.apache.hadoop.hive.ql.plan.exprNodeDesc; - import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -34,30 +31,28 @@ * Map Join operator Descriptor implementation. * */ -@explain(displayName="Common Join Operator") +@explain(displayName = "Common Join Operator") public class mapJoinDesc extends joinDesc implements Serializable { private static final long serialVersionUID = 1L; private Map> keys; private tableDesc keyTblDesc; private List valueTblDescs; - + private int posBigTable; - + private Map> retainList; - - public mapJoinDesc() { } - public mapJoinDesc(final Map> keys, - final tableDesc keyTblDesc, - final Map> values, - final List valueTblDescs, - ArrayList outputColumnNames, - final int posBigTable, - final joinCond[] conds) { + public mapJoinDesc() { + } + + public mapJoinDesc(final Map> keys, + final tableDesc keyTblDesc, final Map> values, + final List valueTblDescs, ArrayList outputColumnNames, + final int posBigTable, final joinCond[] conds) { super(values, outputColumnNames, conds); - this.keys = keys; - this.keyTblDesc = keyTblDesc; + this.keys = keys; + this.keyTblDesc = keyTblDesc; this.valueTblDescs = valueTblDescs; this.posBigTable = posBigTable; initRetainExprList(); @@ -76,7 +71,7 @@ retainList.put(current.getKey(), list); } } - + public Map> getRetainList() { return retainList; } @@ -84,17 +79,18 @@ public void setRetainList(Map> retainList) { this.retainList = retainList; } - + /** * @return the keys */ - @explain(displayName="keys") + @explain(displayName = "keys") public Map> getKeys() { return keys; } /** - * @param keys the keys to set + * @param keys + * the keys to set */ public void setKeys(Map> keys) { this.keys = keys; @@ -103,13 +99,14 @@ /** * @return the position of the big table not in memory */ - @explain(displayName="Position of Big Table") + @explain(displayName = "Position of Big Table") public int getPosBigTable() { return posBigTable; } /** - * @param posBigTable the position of the big table not in memory + * @param posBigTable + * the position of the big table not in memory */ public void setPosBigTable(int posBigTable) { this.posBigTable = posBigTable; @@ -123,7 +120,8 @@ } /** - * @param keyTblDesc the keyTblDesc to set + * @param keyTblDesc + * the keyTblDesc to set */ public void setKeyTblDesc(tableDesc keyTblDesc) { this.keyTblDesc = keyTblDesc; @@ -137,7 +135,8 @@ } /** - * @param valueTblDescs the valueTblDescs to set + * @param valueTblDescs + * the valueTblDescs to set */ public void setValueTblDescs(List valueTblDescs) { this.valueTblDescs = valueTblDescs; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java (working copy) @@ -14,16 +14,21 @@ /** * Description of a msck command. - * @param tableName Table to check, can be null. - * @param partSpecs Partition specification, can be null. - * @param resFile Where to save the output of the command - * @param repairPartitions remove stale / add new partitions found during the check + * + * @param tableName + * Table to check, can be null. + * @param partSpecs + * Partition specification, can be null. + * @param resFile + * Where to save the output of the command + * @param repairPartitions + * remove stale / add new partitions found during the check */ - public MsckDesc(String tableName, List> partSpecs, Path resFile, - boolean repairPartitions) { + public MsckDesc(String tableName, List> partSpecs, + Path resFile, boolean repairPartitions) { super(); this.tableName = tableName; - this.partitionSpec = partSpecs; + partitionSpec = partSpecs; this.resFile = resFile; this.repairPartitions = repairPartitions; } @@ -36,7 +41,8 @@ } /** - * @param tableName the table to check + * @param tableName + * the table to check */ public void setTableName(String tableName) { this.tableName = tableName; @@ -50,7 +56,8 @@ } /** - * @param partitionSpec partitions to check. + * @param partitionSpec + * partitions to check. */ public void setPartitionSpec(List> partitionSpec) { this.partitionSpec = partitionSpec; @@ -64,7 +71,8 @@ } /** - * @param resFile file to save command output to + * @param resFile + * file to save command output to */ public void setResFile(Path resFile) { this.resFile = resFile; @@ -78,7 +86,8 @@ } /** - * @param remove stale / add new partitions found during the check + * @param remove + * stale / add new partitions found during the check */ public void setRepairPartitions(boolean repairPartitions) { this.repairPartitions = repairPartitions; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/createViewDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/createViewDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/createViewDesc.java (working copy) @@ -18,14 +18,13 @@ package org.apache.hadoop.hive.ql.plan; +import java.io.Serializable; +import java.util.List; + import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.Utilities; -import java.util.List; - -import java.io.Serializable; - -@explain(displayName="Create View") +@explain(displayName = "Create View") public class createViewDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -36,16 +35,15 @@ private String comment; private boolean ifNotExists; - public createViewDesc( - String viewName, List schema, String comment, - boolean ifNotExists) { + public createViewDesc(String viewName, List schema, + String comment, boolean ifNotExists) { this.viewName = viewName; this.schema = schema; this.comment = comment; this.ifNotExists = ifNotExists; } - @explain(displayName="name") + @explain(displayName = "name") public String getViewName() { return viewName; } @@ -54,7 +52,7 @@ this.viewName = viewName; } - @explain(displayName="original text") + @explain(displayName = "original text") public String getViewOriginalText() { return originalText; } @@ -63,7 +61,7 @@ this.originalText = originalText; } - @explain(displayName="expanded text") + @explain(displayName = "expanded text") public String getViewExpandedText() { return expandedText; } @@ -72,11 +70,11 @@ this.expandedText = expandedText; } - @explain(displayName="columns") + @explain(displayName = "columns") public List getSchemaString() { return Utilities.getFieldSchemaString(schema); } - + public List getSchema() { return schema; } @@ -85,7 +83,7 @@ this.schema = schema; } - @explain(displayName="comment") + @explain(displayName = "comment") public String getComment() { return comment; } @@ -94,7 +92,7 @@ this.comment = comment; } - @explain(displayName="if not exists") + @explain(displayName = "if not exists") public boolean getIfNotExists() { return ifNotExists; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/joinDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/joinDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/joinDesc.java (working copy) @@ -19,16 +19,7 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; - -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; -import org.apache.hadoop.hive.ql.plan.exprNodeDesc; -import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.mapred.SequenceFileInputFormat; - import java.util.ArrayList; -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -37,92 +28,96 @@ * Join operator Descriptor implementation. * */ -@explain(displayName="Join Operator") +@explain(displayName = "Join Operator") public class joinDesc implements Serializable { private static final long serialVersionUID = 1L; - public static final int INNER_JOIN = 0; - public static final int LEFT_OUTER_JOIN = 1; + public static final int INNER_JOIN = 0; + public static final int LEFT_OUTER_JOIN = 1; public static final int RIGHT_OUTER_JOIN = 2; - public static final int FULL_OUTER_JOIN = 3; - public static final int UNIQUE_JOIN = 4; - public static final int LEFT_SEMI_JOIN = 5; + public static final int FULL_OUTER_JOIN = 3; + public static final int UNIQUE_JOIN = 4; + public static final int LEFT_SEMI_JOIN = 5; - //used to handle skew join + // used to handle skew join private boolean handleSkewJoin = false; private int skewKeyDefinition = -1; private Map bigKeysDirMap; private Map> smallKeysDirMap; private Map skewKeysValuesTables; - + // alias to key mapping private Map> exprs; - - //used for create joinOutputObjectInspector + + // used for create joinOutputObjectInspector protected java.util.ArrayList outputColumnNames; - + // key:column output name, value:tag transient private Map reversedExprs; - + // No outer join involved protected boolean noOuterJoin; protected joinCond[] conds; - + protected Byte[] tagOrder; private tableDesc keyTableDesc; - - public joinDesc() { } - - public joinDesc(final Map> exprs, ArrayList outputColumnNames, final boolean noOuterJoin, final joinCond[] conds) { + + public joinDesc() { + } + + public joinDesc(final Map> exprs, + ArrayList outputColumnNames, final boolean noOuterJoin, + final joinCond[] conds) { this.exprs = exprs; this.outputColumnNames = outputColumnNames; this.noOuterJoin = noOuterJoin; this.conds = conds; - + tagOrder = new Byte[exprs.size()]; - for(int i = 0; i> exprs, ArrayList outputColumnNames) { + + public joinDesc(final Map> exprs, + ArrayList outputColumnNames) { this(exprs, outputColumnNames, true, null); } - public joinDesc(final Map> exprs, ArrayList outputColumnNames, final joinCond[] conds) { + public joinDesc(final Map> exprs, + ArrayList outputColumnNames, final joinCond[] conds) { this(exprs, outputColumnNames, false, conds); } - + public Map> getExprs() { - return this.exprs; + return exprs; } - + public Map getReversedExprs() { return reversedExprs; } public void setReversedExprs(Map reversed_Exprs) { - this.reversedExprs = reversed_Exprs; + reversedExprs = reversed_Exprs; } - - @explain(displayName="condition expressions") + + @explain(displayName = "condition expressions") public Map getExprsStringMap() { if (getExprs() == null) { return null; } - + LinkedHashMap ret = new LinkedHashMap(); - - for(Map.Entry> ent: getExprs().entrySet()) { + + for (Map.Entry> ent : getExprs().entrySet()) { StringBuilder sb = new StringBuilder(); boolean first = true; if (ent.getValue() != null) { - for(exprNodeDesc expr: ent.getValue()) { + for (exprNodeDesc expr : ent.getValue()) { if (!first) { sb.append(" "); } - + first = false; sb.append("{"); sb.append(expr.getExprString()); @@ -131,15 +126,15 @@ } ret.put(ent.getKey(), sb.toString()); } - + return ret; } - + public void setExprs(final Map> exprs) { this.exprs = exprs; } - - @explain(displayName="outputColumnNames") + + @explain(displayName = "outputColumnNames") public java.util.ArrayList getOutputColumnNames() { return outputColumnNames; } @@ -150,21 +145,21 @@ } public boolean getNoOuterJoin() { - return this.noOuterJoin; + return noOuterJoin; } public void setNoOuterJoin(final boolean noOuterJoin) { this.noOuterJoin = noOuterJoin; } - @explain(displayName="condition map") + @explain(displayName = "condition map") public List getCondsList() { if (conds == null) { return null; } ArrayList l = new ArrayList(); - for(joinCond cond: conds) { + for (joinCond cond : conds) { l.add(cond); } @@ -172,7 +167,7 @@ } public joinCond[] getConds() { - return this.conds; + return conds; } public void setConds(final joinCond[] conds) { @@ -191,19 +186,21 @@ /** * The order in which tables should be processed when joining * - * @param tagOrder Array of tags + * @param tagOrder + * Array of tags */ public void setTagOrder(Byte[] tagOrder) { this.tagOrder = tagOrder; } - @explain(displayName="handleSkewJoin") + @explain(displayName = "handleSkewJoin") public boolean getHandleSkewJoin() { return handleSkewJoin; } /** * set to handle skew join in this join op + * * @param handleSkewJoin */ public void setHandleSkewJoin(boolean handleSkewJoin) { @@ -219,6 +216,7 @@ /** * set the mapping from tbl to dir for big keys + * * @param bigKeysDirMap */ public void setBigKeysDirMap(Map bigKeysDirMap) { @@ -234,6 +232,7 @@ /** * set the mapping from tbl to dir for small keys + * * @param bigKeysDirMap */ public void setSmallKeysDirMap(Map> smallKeysDirMap) { @@ -250,6 +249,7 @@ /** * set skew key definition + * * @param skewKeyDefinition */ public void setSkewKeyDefinition(int skewKeyDefinition) { @@ -264,24 +264,27 @@ } /** - * @param skewKeysValuesTable set the table desc for storing skew keys and their corresponding value; + * @param skewKeysValuesTable + * set the table desc for storing skew keys and their corresponding + * value; */ public void setSkewKeysValuesTables(Map skewKeysValuesTables) { this.skewKeysValuesTables = skewKeysValuesTables; } - + public boolean isNoOuterJoin() { for (org.apache.hadoop.hive.ql.plan.joinCond cond : conds) { if (cond.getType() == joinDesc.FULL_OUTER_JOIN || (cond.getType() == joinDesc.LEFT_OUTER_JOIN) - || cond.getType() == joinDesc.RIGHT_OUTER_JOIN) + || cond.getType() == joinDesc.RIGHT_OUTER_JOIN) { return false; + } } return true; } public void setKeyTableDesc(tableDesc keyTblDesc) { - this.keyTableDesc = keyTblDesc; + keyTableDesc = keyTblDesc; } public tableDesc getKeyTableDesc() { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/schemaDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/schemaDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/schemaDesc.java (working copy) @@ -24,16 +24,19 @@ private static final long serialVersionUID = 1L; private String schema; - public schemaDesc() { } + public schemaDesc() { + } + public schemaDesc(final String schema) { this.schema = schema; } public String getSchema() { - return this.schema; + return schema; } + public void setSchema(final String schema) { - this.schema=schema; + this.schema = schema; } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/explainWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/explainWork.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/explainWork.java (working copy) @@ -31,49 +31,48 @@ private List> rootTasks; private String astStringTree; boolean extended; - - public explainWork() { } - - public explainWork(Path resFile, - List> rootTasks, - String astStringTree, - boolean extended) { + + public explainWork() { + } + + public explainWork(Path resFile, + List> rootTasks, String astStringTree, + boolean extended) { this.resFile = resFile; this.rootTasks = rootTasks; this.astStringTree = astStringTree; this.extended = extended; } - + public Path getResFile() { return resFile; } - + public void setResFile(Path resFile) { this.resFile = resFile; } - + public List> getRootTasks() { return rootTasks; } - + public void setRootTasks(List> rootTasks) { this.rootTasks = rootTasks; } - + public String getAstStringTree() { return astStringTree; } - + public void setAstStringTree(String astStringTree) { this.astStringTree = astStringTree; } - + public boolean getExtended() { return extended; } - + public void setExtended(boolean extended) { this.extended = extended; } } - Index: ql/src/java/org/apache/hadoop/hive/ql/plan/forwardDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/forwardDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/forwardDesc.java (working copy) @@ -20,11 +20,13 @@ import java.io.Serializable; -@explain(displayName="Forward") +@explain(displayName = "Forward") public class forwardDesc implements Serializable { private static final long serialVersionUID = 1L; + @SuppressWarnings("nls") public forwardDesc() { - // throw new RuntimeException("This class does not need to be instantiated"); + // throw new + // RuntimeException("This class does not need to be instantiated"); } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/scriptDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/scriptDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/scriptDesc.java (working copy) @@ -23,7 +23,7 @@ import org.apache.hadoop.hive.ql.exec.RecordReader; import org.apache.hadoop.hive.ql.exec.RecordWriter; -@explain(displayName="Transform Operator") +@explain(displayName = "Transform Operator") public class scriptDesc implements Serializable { private static final long serialVersionUID = 1L; private String scriptCmd; @@ -35,63 +35,73 @@ private tableDesc scriptInputInfo; private Class outRecordReaderClass; - public scriptDesc() { } - public scriptDesc( - final String scriptCmd, - final tableDesc scriptInputInfo, - final Class inRecordWriterClass, - final tableDesc scriptOutputInfo, - final Class outRecordReaderClass) { - + public scriptDesc() { + } + + public scriptDesc(final String scriptCmd, final tableDesc scriptInputInfo, + final Class inRecordWriterClass, + final tableDesc scriptOutputInfo, + final Class outRecordReaderClass) { + this.scriptCmd = scriptCmd; this.scriptInputInfo = scriptInputInfo; this.inRecordWriterClass = inRecordWriterClass; this.scriptOutputInfo = scriptOutputInfo; this.outRecordReaderClass = outRecordReaderClass; } - - @explain(displayName="command") + + @explain(displayName = "command") public String getScriptCmd() { - return this.scriptCmd; + return scriptCmd; } + public void setScriptCmd(final String scriptCmd) { - this.scriptCmd=scriptCmd; + this.scriptCmd = scriptCmd; } - - @explain(displayName="output info") + + @explain(displayName = "output info") public tableDesc getScriptOutputInfo() { - return this.scriptOutputInfo; + return scriptOutputInfo; } + public void setScriptOutputInfo(final tableDesc scriptOutputInfo) { this.scriptOutputInfo = scriptOutputInfo; } + public tableDesc getScriptInputInfo() { return scriptInputInfo; } + public void setScriptInputInfo(tableDesc scriptInputInfo) { this.scriptInputInfo = scriptInputInfo; } + /** * @return the outRecordReaderClass */ public Class getOutRecordReaderClass() { return outRecordReaderClass; } + /** - * @param outRecordReaderClass the outRecordReaderClass to set + * @param outRecordReaderClass + * the outRecordReaderClass to set */ public void setOutRecordReaderClass( Class outRecordReaderClass) { this.outRecordReaderClass = outRecordReaderClass; } + /** * @return the inRecordWriterClass */ public Class getInRecordWriterClass() { return inRecordWriterClass; } + /** - * @param inRecordWriterClass the inRecordWriterClass to set + * @param inRecordWriterClass + * the inRecordWriterClass to set */ public void setInRecordWriterClass( Class inRecordWriterClass) { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/createFunctionDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/createFunctionDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/createFunctionDesc.java (working copy) @@ -20,19 +20,19 @@ import java.io.Serializable; -@explain(displayName="Create Function") +@explain(displayName = "Create Function") public class createFunctionDesc implements Serializable { private static final long serialVersionUID = 1L; - + private String functionName; private String className; - + public createFunctionDesc(String functionName, String className) { this.functionName = functionName; this.className = className; } - @explain(displayName="name") + @explain(displayName = "name") public String getFunctionName() { return functionName; } @@ -41,7 +41,7 @@ this.functionName = functionName; } - @explain(displayName="class") + @explain(displayName = "class") public String getClassName() { return className; } @@ -49,5 +49,5 @@ public void setClassName(String className) { this.className = className; } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/partitionDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/partitionDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/partitionDesc.java (working copy) @@ -31,163 +31,184 @@ import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.mapred.InputFormat; -@explain(displayName="Partition") +@explain(displayName = "Partition") public class partitionDesc implements Serializable, Cloneable { private static final long serialVersionUID = 2L; - private tableDesc table; + private tableDesc table; private java.util.LinkedHashMap partSpec; - private java.lang.Class deserializerClass; + private java.lang.Class deserializerClass; private Class inputFileFormatClass; private Class outputFileFormatClass; private java.util.Properties properties; private String serdeClassName; private transient String baseFileName; - public partitionDesc() { } + public partitionDesc() { + } - public partitionDesc( - final tableDesc table, - final java.util.LinkedHashMap partSpec) { + public partitionDesc(final tableDesc table, + final java.util.LinkedHashMap partSpec) { this(table, partSpec, null, null, null, null, null); } - public partitionDesc( - final tableDesc table, - final java.util.LinkedHashMap partSpec, + public partitionDesc(final tableDesc table, + final java.util.LinkedHashMap partSpec, final Class serdeClass, final Class inputFileFormatClass, - final Class outputFormat, - final java.util.Properties properties, final String serdeClassName) { - this.table = table; + final Class outputFormat, final java.util.Properties properties, + final String serdeClassName) { + this.table = table; this.partSpec = partSpec; - this.deserializerClass = serdeClass; + deserializerClass = serdeClass; this.inputFileFormatClass = inputFileFormatClass; - if (outputFormat != null) - this.outputFileFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(outputFormat); + if (outputFormat != null) { + outputFileFormatClass = HiveFileFormatUtils + .getOutputFormatSubstitute(outputFormat); + } this.properties = properties; - if (properties != null) - this.serdeClassName = properties.getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_LIB); + if (properties != null) { + this.serdeClassName = properties + .getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_LIB); + } } - public partitionDesc(final org.apache.hadoop.hive.ql.metadata.Partition part) throws HiveException{ - this.table = Utilities.getTableDesc(part.getTable()); - this.partSpec = part.getSpec(); - this.deserializerClass = part.getDeserializer().getClass(); - this.inputFileFormatClass = part.getInputFormatClass(); - this.outputFileFormatClass = part.getOutputFormatClass(); - this.properties = part.getSchema(); - this.serdeClassName = properties.getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_LIB);; + public partitionDesc(final org.apache.hadoop.hive.ql.metadata.Partition part) + throws HiveException { + table = Utilities.getTableDesc(part.getTable()); + partSpec = part.getSpec(); + deserializerClass = part.getDeserializer().getClass(); + inputFileFormatClass = part.getInputFormatClass(); + outputFileFormatClass = part.getOutputFormatClass(); + properties = part.getSchema(); + serdeClassName = properties + .getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_LIB); + ; } - @explain(displayName="") + @explain(displayName = "") public tableDesc getTableDesc() { - return this.table; + return table; } + public void setTableDesc(final tableDesc table) { this.table = table; } - @explain(displayName="partition values") + @explain(displayName = "partition values") public java.util.LinkedHashMap getPartSpec() { - return this.partSpec; + return partSpec; } + public void setPartSpec(final java.util.LinkedHashMap partSpec) { - this.partSpec=partSpec; + this.partSpec = partSpec; } - public java.lang.Class getDeserializerClass() { - if (this.deserializerClass == null && this.table !=null) - setDeserializerClass(this.table.getDeserializerClass()); - return this.deserializerClass; + public java.lang.Class getDeserializerClass() { + if (deserializerClass == null && table != null) { + setDeserializerClass(table.getDeserializerClass()); + } + return deserializerClass; } - public void setDeserializerClass(final java.lang.Class serdeClass) { - this.deserializerClass = serdeClass; + public void setDeserializerClass( + final java.lang.Class serdeClass) { + deserializerClass = serdeClass; } public Class getInputFileFormatClass() { - if (this.inputFileFormatClass == null && this.table !=null) - setInputFileFormatClass (this.table.getInputFileFormatClass()); - return this.inputFileFormatClass; + if (inputFileFormatClass == null && table != null) { + setInputFileFormatClass(table.getInputFileFormatClass()); + } + return inputFileFormatClass; } /** * Return a deserializer object corresponding to the tableDesc */ public Deserializer getDeserializer() throws Exception { - Deserializer de = this.deserializerClass.newInstance(); + Deserializer de = deserializerClass.newInstance(); de.initialize(null, properties); return de; } - public void setInputFileFormatClass(final Class inputFileFormatClass) { - this.inputFileFormatClass=inputFileFormatClass; + public void setInputFileFormatClass( + final Class inputFileFormatClass) { + this.inputFileFormatClass = inputFileFormatClass; } public Class getOutputFileFormatClass() { - if (this.outputFileFormatClass == null && this.table !=null) - setOutputFileFormatClass( this.table.getOutputFileFormatClass()); - return this.outputFileFormatClass; + if (outputFileFormatClass == null && table != null) { + setOutputFileFormatClass(table.getOutputFileFormatClass()); + } + return outputFileFormatClass; } public void setOutputFileFormatClass(final Class outputFileFormatClass) { - this.outputFileFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(outputFileFormatClass); + this.outputFileFormatClass = HiveFileFormatUtils + .getOutputFormatSubstitute(outputFileFormatClass); } - @explain(displayName="properties", normalExplain=false) + @explain(displayName = "properties", normalExplain = false) public java.util.Properties getProperties() { - if(this.table !=null) - return this.table.getProperties(); - return this.properties; + if (table != null) { + return table.getProperties(); + } + return properties; } public void setProperties(final java.util.Properties properties) { this.properties = properties; } + /** * @return the serdeClassName */ - @explain(displayName="serde") + @explain(displayName = "serde") public String getSerdeClassName() { - if(this.serdeClassName == null && this.table !=null) - setSerdeClassName(this.table.getSerdeClassName()); - return this.serdeClassName; + if (serdeClassName == null && table != null) { + setSerdeClassName(table.getSerdeClassName()); + } + return serdeClassName; } + /** - * @param serdeClassName the serde Class Name to set + * @param serdeClassName + * the serde Class Name to set */ public void setSerdeClassName(String serdeClassName) { this.serdeClassName = serdeClassName; } - @explain(displayName="name") + @explain(displayName = "name") public String getTableName() { - return getProperties().getProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME); + return getProperties().getProperty( + org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME); } - @explain(displayName="input format") + @explain(displayName = "input format") public String getInputFileFormatClassName() { return getInputFileFormatClass().getName(); } - @explain(displayName="output format") + @explain(displayName = "output format") public String getOutputFileFormatClassName() { return getOutputFileFormatClass().getName(); } - @explain(displayName="base file name", normalExplain=false) + @explain(displayName = "base file name", normalExplain = false) public String getBaseFileName() { - return this.baseFileName; + return baseFileName; } + @Override public partitionDesc clone() { - partitionDesc ret = new partitionDesc(); + partitionDesc ret = new partitionDesc(); ret.setSerdeClassName(serdeClassName); ret.setDeserializerClass(deserializerClass); - ret.inputFileFormatClass = this.inputFileFormatClass; - ret.outputFileFormatClass = this.outputFileFormatClass; - if(this.properties != null) { + ret.inputFileFormatClass = inputFileFormatClass; + ret.outputFileFormatClass = outputFileFormatClass; + if (properties != null) { Properties newProp = new Properties(); Enumeration keysProp = properties.keys(); while (keysProp.hasMoreElements()) { @@ -196,23 +217,23 @@ } ret.setProperties(newProp); } - ret.table = (tableDesc)this.table.clone(); + ret.table = (tableDesc) table.clone(); // The partition spec is not present - if (this.partSpec != null) { + if (partSpec != null) { ret.partSpec = new java.util.LinkedHashMap(); - ret.partSpec.putAll(this.partSpec); + ret.partSpec.putAll(partSpec); } - return ret; + return ret; } /** * Attempt to derive a virtual base file name property from the - * path. If path format is unrecognized, just use the full path. - * - * @param path URI to the partition file + * path. If path format is unrecognized, just use the full path. + * + * @param path + * URI to the partition file */ - void deriveBaseFileName(String path) - { + void deriveBaseFileName(String path) { if (path == null) { return; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/dropFunctionDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/dropFunctionDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/dropFunctionDesc.java (working copy) @@ -20,17 +20,17 @@ import java.io.Serializable; -@explain(displayName="Drop Function") +@explain(displayName = "Drop Function") public class dropFunctionDesc implements Serializable { private static final long serialVersionUID = 1L; - + private String functionName; - + public dropFunctionDesc(String functionName) { this.functionName = functionName; } - @explain(displayName="name") + @explain(displayName = "name") public String getFunctionName() { return functionName; } @@ -38,5 +38,5 @@ public void setFunctionName(String functionName) { this.functionName = functionName; } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/moveWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/moveWork.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/moveWork.java (working copy) @@ -18,14 +18,13 @@ package org.apache.hadoop.hive.ql.plan; -import java.io.*; +import java.io.Serializable; +import java.util.Set; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; -import java.util.Set; - -@explain(displayName="Move Operator") +@explain(displayName = "Move Operator") public class moveWork implements Serializable { private static final long serialVersionUID = 1L; private loadTableDesc loadTableWork; @@ -50,36 +49,37 @@ this.outputs = outputs; } - public moveWork( - Set inputs, - Set outputs, - final loadTableDesc loadTableWork, - final loadFileDesc loadFileWork, - boolean checkFileFormat) { + public moveWork(Set inputs, Set outputs, + final loadTableDesc loadTableWork, final loadFileDesc loadFileWork, + boolean checkFileFormat) { this(inputs, outputs); this.loadTableWork = loadTableWork; this.loadFileWork = loadFileWork; this.checkFileFormat = checkFileFormat; } - @explain(displayName="tables") + + @explain(displayName = "tables") public loadTableDesc getLoadTableWork() { - return this.loadTableWork; + return loadTableWork; } + public void setLoadTableWork(final loadTableDesc loadTableWork) { this.loadTableWork = loadTableWork; } - @explain(displayName="files") + @explain(displayName = "files") public loadFileDesc getLoadFileWork() { - return this.loadFileWork; + return loadFileWork; } + public void setLoadFileWork(final loadFileDesc loadFileWork) { - this.loadFileWork=loadFileWork; + this.loadFileWork = loadFileWork; } public boolean getCheckFileFormat() { return checkFileFormat; } + public void setCheckFileFormat(boolean checkFileFormat) { this.checkFileFormat = checkFileFormat; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/dropTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/dropTableDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/dropTableDesc.java (working copy) @@ -19,16 +19,14 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; -import java.util.HashMap; import java.util.List; import java.util.Map; -@explain(displayName="Drop Table") -public class dropTableDesc extends ddlDesc implements Serializable -{ +@explain(displayName = "Drop Table") +public class dropTableDesc extends ddlDesc implements Serializable { private static final long serialVersionUID = 1L; - - String tableName; + + String tableName; List> partSpecs; boolean expectView; @@ -37,26 +35,27 @@ */ public dropTableDesc(String tableName, boolean expectView) { this.tableName = tableName; - this.partSpecs = null; + partSpecs = null; this.expectView = expectView; } public dropTableDesc(String tableName, List> partSpecs) { this.tableName = tableName; this.partSpecs = partSpecs; - this.expectView = false; + expectView = false; } /** * @return the tableName */ - @explain(displayName="table") + @explain(displayName = "table") public String getTableName() { return tableName; } /** - * @param tableName the tableName to set + * @param tableName + * the tableName to set */ public void setTableName(String tableName) { this.tableName = tableName; @@ -70,7 +69,8 @@ } /** - * @param partSpecs the partSpecs to set + * @param partSpecs + * the partSpecs to set */ public void setPartSpecs(List> partSpecs) { this.partSpecs = partSpecs; @@ -84,7 +84,8 @@ } /** - * @param expectView set whether to expect a view being dropped + * @param expectView + * set whether to expect a view being dropped */ public void setExpectView(boolean expectView) { this.expectView = expectView; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (working copy) @@ -18,182 +18,203 @@ package org.apache.hadoop.hive.ql.plan; -import java.util.*; -import java.io.*; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Properties; +import java.util.Vector; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; import org.apache.hadoop.hive.serde.Constants; +import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; +import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; -import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; -import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; +import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.TextInputFormat; -import org.apache.hadoop.hive.serde2.Deserializer; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.mapred.InputFormat; -import org.apache.hadoop.hive.ql.io.HiveOutputFormat; public class PlanUtils { - protected final static Log LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.plan.PlanUtils"); + protected final static Log LOG = LogFactory + .getLog("org.apache.hadoop.hive.ql.plan.PlanUtils"); - public static enum ExpressionTypes {FIELD, JEXL}; + public static enum ExpressionTypes { + FIELD, JEXL + }; @SuppressWarnings("nls") public static mapredWork getMapRedWork() { - return new mapredWork("", - new LinkedHashMap> (), - new LinkedHashMap (), - new LinkedHashMap> (), - new tableDesc(), - new ArrayList (), - null, - Integer.valueOf (1), null); + return new mapredWork("", new LinkedHashMap>(), + new LinkedHashMap(), + new LinkedHashMap>(), + new tableDesc(), new ArrayList(), null, Integer.valueOf(1), + null); } /** - * Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode - * and column names (comma separated string). + * Generate the table descriptor of MetadataTypedColumnsetSerDe with the + * separatorCode and column names (comma separated string). */ - public static tableDesc getDefaultTableDesc(String separatorCode, String columns) { + public static tableDesc getDefaultTableDesc(String separatorCode, + String columns) { return getDefaultTableDesc(separatorCode, columns, false); } /** - * Generate the table descriptor of given serde with the separatorCode - * and column names (comma separated string). + * Generate the table descriptor of given serde with the separatorCode and + * column names (comma separated string). */ - public static tableDesc getTableDesc(Class serdeClass, String separatorCode, String columns) { + public static tableDesc getTableDesc( + Class serdeClass, String separatorCode, + String columns) { return getTableDesc(serdeClass, separatorCode, columns, false); } /** - * Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode - * and column names (comma separated string), and whether the last column should take - * the rest of the line. + * Generate the table descriptor of MetadataTypedColumnsetSerDe with the + * separatorCode and column names (comma separated string), and whether the + * last column should take the rest of the line. */ - public static tableDesc getDefaultTableDesc(String separatorCode, String columns, - boolean lastColumnTakesRestOfTheLine) { - return getDefaultTableDesc(separatorCode, columns, null, lastColumnTakesRestOfTheLine); + public static tableDesc getDefaultTableDesc(String separatorCode, + String columns, boolean lastColumnTakesRestOfTheLine) { + return getDefaultTableDesc(separatorCode, columns, null, + lastColumnTakesRestOfTheLine); } /** * Generate the table descriptor of the serde specified with the separatorCode - * and column names (comma separated string), and whether the last column should take - * the rest of the line. + * and column names (comma separated string), and whether the last column + * should take the rest of the line. */ - public static tableDesc getTableDesc(Class serdeClass, - String separatorCode, String columns, - boolean lastColumnTakesRestOfTheLine) { - return getTableDesc(serdeClass, separatorCode, columns, null, lastColumnTakesRestOfTheLine); + public static tableDesc getTableDesc( + Class serdeClass, String separatorCode, + String columns, boolean lastColumnTakesRestOfTheLine) { + return getTableDesc(serdeClass, separatorCode, columns, null, + lastColumnTakesRestOfTheLine); } /** - * Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode - * and column names (comma separated string), and whether the last column should take - * the rest of the line. + * Generate the table descriptor of MetadataTypedColumnsetSerDe with the + * separatorCode and column names (comma separated string), and whether the + * last column should take the rest of the line. */ - public static tableDesc getDefaultTableDesc(String separatorCode, String columns, String columnTypes, - boolean lastColumnTakesRestOfTheLine) { - return getTableDesc(LazySimpleSerDe.class, separatorCode, columns, columnTypes, - lastColumnTakesRestOfTheLine); + public static tableDesc getDefaultTableDesc(String separatorCode, + String columns, String columnTypes, boolean lastColumnTakesRestOfTheLine) { + return getTableDesc(LazySimpleSerDe.class, separatorCode, columns, + columnTypes, lastColumnTakesRestOfTheLine); } - public static tableDesc getTableDesc(Class serdeClass, - String separatorCode, String columns, String columnTypes, - boolean lastColumnTakesRestOfTheLine) { - return getTableDesc(serdeClass, separatorCode, columns, columnTypes, lastColumnTakesRestOfTheLine, false); + public static tableDesc getTableDesc( + Class serdeClass, String separatorCode, + String columns, String columnTypes, boolean lastColumnTakesRestOfTheLine) { + return getTableDesc(serdeClass, separatorCode, columns, columnTypes, + lastColumnTakesRestOfTheLine, false); } - public static tableDesc getTableDesc(Class serdeClass, - String separatorCode, String columns, String columnTypes, - boolean lastColumnTakesRestOfTheLine, boolean useJSONForLazy) { + public static tableDesc getTableDesc( + Class serdeClass, String separatorCode, + String columns, String columnTypes, boolean lastColumnTakesRestOfTheLine, + boolean useJSONForLazy) { Properties properties = Utilities.makeProperties( - Constants.SERIALIZATION_FORMAT, separatorCode, - Constants.LIST_COLUMNS, columns); + Constants.SERIALIZATION_FORMAT, separatorCode, Constants.LIST_COLUMNS, + columns); - if ( ! separatorCode.equals(Integer.toString(Utilities.ctrlaCode)) ) + if (!separatorCode.equals(Integer.toString(Utilities.ctrlaCode))) { properties.setProperty(Constants.FIELD_DELIM, separatorCode); + } - if (columnTypes != null) + if (columnTypes != null) { properties.setProperty(Constants.LIST_COLUMN_TYPES, columnTypes); + } if (lastColumnTakesRestOfTheLine) { - properties.setProperty( - Constants.SERIALIZATION_LAST_COLUMN_TAKES_REST, + properties.setProperty(Constants.SERIALIZATION_LAST_COLUMN_TAKES_REST, "true"); } - // It is not a very clean way, and should be modified later - due to compatiblity reasons, - // user sees the results as json for custom scripts and has no way for specifying that. + // It is not a very clean way, and should be modified later - due to + // compatiblity reasons, + // user sees the results as json for custom scripts and has no way for + // specifying that. // Right now, it is hard-coded in the code - if (useJSONForLazy) - properties.setProperty( - Constants.SERIALIZATION_USE_JSON_OBJECTS, - "true"); + if (useJSONForLazy) { + properties.setProperty(Constants.SERIALIZATION_USE_JSON_OBJECTS, "true"); + } - - return new tableDesc( - serdeClass, - TextInputFormat.class, - IgnoreKeyTextOutputFormat.class, - properties); + return new tableDesc(serdeClass, TextInputFormat.class, + IgnoreKeyTextOutputFormat.class, properties); } /** * Generate a table descriptor from a createTableDesc. */ - public static tableDesc getTableDesc(createTableDesc crtTblDesc, String cols, String colTypes) { + public static tableDesc getTableDesc(createTableDesc crtTblDesc, String cols, + String colTypes) { Class serdeClass = LazySimpleSerDe.class; - String separatorCode = Integer.toString(Utilities.ctrlaCode); - String columns = cols; - String columnTypes = colTypes; - boolean lastColumnTakesRestOfTheLine = false; + String separatorCode = Integer.toString(Utilities.ctrlaCode); + String columns = cols; + String columnTypes = colTypes; + boolean lastColumnTakesRestOfTheLine = false; tableDesc ret; try { - if ( crtTblDesc.getSerName() != null ) { + if (crtTblDesc.getSerName() != null) { Class c = Class.forName(crtTblDesc.getSerName()); serdeClass = c; } - if ( crtTblDesc.getFieldDelim() != null ) + if (crtTblDesc.getFieldDelim() != null) { separatorCode = crtTblDesc.getFieldDelim(); + } ret = getTableDesc(serdeClass, separatorCode, columns, columnTypes, - lastColumnTakesRestOfTheLine, false); + lastColumnTakesRestOfTheLine, false); // set other table properties Properties properties = ret.getProperties(); - if ( crtTblDesc.getCollItemDelim() != null ) - properties.setProperty(Constants.COLLECTION_DELIM, crtTblDesc.getCollItemDelim()); + if (crtTblDesc.getCollItemDelim() != null) { + properties.setProperty(Constants.COLLECTION_DELIM, crtTblDesc + .getCollItemDelim()); + } - if ( crtTblDesc.getMapKeyDelim() != null ) - properties.setProperty(Constants.MAPKEY_DELIM, crtTblDesc.getMapKeyDelim()); + if (crtTblDesc.getMapKeyDelim() != null) { + properties.setProperty(Constants.MAPKEY_DELIM, crtTblDesc + .getMapKeyDelim()); + } - if ( crtTblDesc.getFieldEscape() != null ) - properties.setProperty(Constants.ESCAPE_CHAR, crtTblDesc.getFieldEscape()); + if (crtTblDesc.getFieldEscape() != null) { + properties.setProperty(Constants.ESCAPE_CHAR, crtTblDesc + .getFieldEscape()); + } - if ( crtTblDesc.getLineDelim() != null ) + if (crtTblDesc.getLineDelim() != null) { properties.setProperty(Constants.LINE_DELIM, crtTblDesc.getLineDelim()); + } - // replace the default input & output file format with those found in crtTblDesc + // replace the default input & output file format with those found in + // crtTblDesc Class c1 = Class.forName(crtTblDesc.getInputFormat()); Class c2 = Class.forName(crtTblDesc.getOutputFormat()); - Class in_class = c1; + Class in_class = c1; Class out_class = c2; ret.setInputFileFormatClass(in_class); @@ -206,111 +227,95 @@ } /** - * Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode. - * MetaDataTypedColumnsetSerDe is used because LazySimpleSerDe does not support a table - * with a single column "col" with type "array". + * Generate the table descriptor of MetadataTypedColumnsetSerDe with the + * separatorCode. MetaDataTypedColumnsetSerDe is used because LazySimpleSerDe + * does not support a table with a single column "col" with type + * "array". */ public static tableDesc getDefaultTableDesc(String separatorCode) { - return new tableDesc( - MetadataTypedColumnsetSerDe.class, - TextInputFormat.class, - IgnoreKeyTextOutputFormat.class, - Utilities.makeProperties( - org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, separatorCode)); + return new tableDesc(MetadataTypedColumnsetSerDe.class, + TextInputFormat.class, IgnoreKeyTextOutputFormat.class, Utilities + .makeProperties( + org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, + separatorCode)); } /** * Generate the table descriptor for reduce key. */ - public static tableDesc getReduceKeyTableDesc(List fieldSchemas, String order) { - return new tableDesc( - BinarySortableSerDe.class, - SequenceFileInputFormat.class, - SequenceFileOutputFormat.class, - Utilities.makeProperties( - Constants.LIST_COLUMNS, - MetaStoreUtils.getColumnNamesFromFieldSchema(fieldSchemas), - Constants.LIST_COLUMN_TYPES, - MetaStoreUtils.getColumnTypesFromFieldSchema(fieldSchemas), - Constants.SERIALIZATION_SORT_ORDER, - order - )); + public static tableDesc getReduceKeyTableDesc(List fieldSchemas, + String order) { + return new tableDesc(BinarySortableSerDe.class, + SequenceFileInputFormat.class, SequenceFileOutputFormat.class, + Utilities.makeProperties(Constants.LIST_COLUMNS, MetaStoreUtils + .getColumnNamesFromFieldSchema(fieldSchemas), + Constants.LIST_COLUMN_TYPES, MetaStoreUtils + .getColumnTypesFromFieldSchema(fieldSchemas), + Constants.SERIALIZATION_SORT_ORDER, order)); } /** * Generate the table descriptor for Map-side join key. */ public static tableDesc getMapJoinKeyTableDesc(List fieldSchemas) { - return new tableDesc( - LazyBinarySerDe.class, - SequenceFileInputFormat.class, - SequenceFileOutputFormat.class, - Utilities.makeProperties( - "columns", MetaStoreUtils.getColumnNamesFromFieldSchema(fieldSchemas), - "columns.types", MetaStoreUtils.getColumnTypesFromFieldSchema(fieldSchemas), - Constants.ESCAPE_CHAR, "\\" - )); + return new tableDesc(LazyBinarySerDe.class, SequenceFileInputFormat.class, + SequenceFileOutputFormat.class, Utilities.makeProperties("columns", + MetaStoreUtils.getColumnNamesFromFieldSchema(fieldSchemas), + "columns.types", MetaStoreUtils + .getColumnTypesFromFieldSchema(fieldSchemas), + Constants.ESCAPE_CHAR, "\\")); } /** * Generate the table descriptor for Map-side join key. */ - public static tableDesc getMapJoinValueTableDesc(List fieldSchemas) { - return new tableDesc( - LazyBinarySerDe.class, - SequenceFileInputFormat.class, - SequenceFileOutputFormat.class, - Utilities.makeProperties( - "columns", MetaStoreUtils.getColumnNamesFromFieldSchema(fieldSchemas), - "columns.types", MetaStoreUtils.getColumnTypesFromFieldSchema(fieldSchemas), - Constants.ESCAPE_CHAR, "\\" - )); + public static tableDesc getMapJoinValueTableDesc( + List fieldSchemas) { + return new tableDesc(LazyBinarySerDe.class, SequenceFileInputFormat.class, + SequenceFileOutputFormat.class, Utilities.makeProperties("columns", + MetaStoreUtils.getColumnNamesFromFieldSchema(fieldSchemas), + "columns.types", MetaStoreUtils + .getColumnTypesFromFieldSchema(fieldSchemas), + Constants.ESCAPE_CHAR, "\\")); } /** * Generate the table descriptor for intermediate files. */ - public static tableDesc getIntermediateFileTableDesc(List fieldSchemas) { - return new tableDesc( - LazyBinarySerDe.class, - SequenceFileInputFormat.class, - SequenceFileOutputFormat.class, - Utilities.makeProperties( - Constants.LIST_COLUMNS, - MetaStoreUtils.getColumnNamesFromFieldSchema(fieldSchemas), - Constants.LIST_COLUMN_TYPES, - MetaStoreUtils.getColumnTypesFromFieldSchema(fieldSchemas), - Constants.ESCAPE_CHAR, - "\\" - )); + public static tableDesc getIntermediateFileTableDesc( + List fieldSchemas) { + return new tableDesc(LazyBinarySerDe.class, SequenceFileInputFormat.class, + SequenceFileOutputFormat.class, Utilities.makeProperties( + Constants.LIST_COLUMNS, MetaStoreUtils + .getColumnNamesFromFieldSchema(fieldSchemas), + Constants.LIST_COLUMN_TYPES, MetaStoreUtils + .getColumnTypesFromFieldSchema(fieldSchemas), + Constants.ESCAPE_CHAR, "\\")); } /** * Generate the table descriptor for intermediate files. */ public static tableDesc getReduceValueTableDesc(List fieldSchemas) { - return new tableDesc( - LazyBinarySerDe.class, - SequenceFileInputFormat.class, - SequenceFileOutputFormat.class, - Utilities.makeProperties( - Constants.LIST_COLUMNS, - MetaStoreUtils.getColumnNamesFromFieldSchema(fieldSchemas), - Constants.LIST_COLUMN_TYPES, - MetaStoreUtils.getColumnTypesFromFieldSchema(fieldSchemas), - Constants.ESCAPE_CHAR, - "\\" - )); + return new tableDesc(LazyBinarySerDe.class, SequenceFileInputFormat.class, + SequenceFileOutputFormat.class, Utilities.makeProperties( + Constants.LIST_COLUMNS, MetaStoreUtils + .getColumnNamesFromFieldSchema(fieldSchemas), + Constants.LIST_COLUMN_TYPES, MetaStoreUtils + .getColumnTypesFromFieldSchema(fieldSchemas), + Constants.ESCAPE_CHAR, "\\")); } /** * Convert the ColumnList to FieldSchema list. */ - public static List getFieldSchemasFromColumnList(List cols, List outputColumnNames, int start, + public static List getFieldSchemasFromColumnList( + List cols, List outputColumnNames, int start, String fieldPrefix) { List schemas = new ArrayList(cols.size()); - for (int i=0; i getFieldSchemasFromColumnList(List cols, - String fieldPrefix) { + public static List getFieldSchemasFromColumnList( + List cols, String fieldPrefix) { List schemas = new ArrayList(cols.size()); - for (int i=0; i getFieldSchemasFromRowSchema(RowSchema row, String fieldPrefix) { + public static List getFieldSchemasFromRowSchema(RowSchema row, + String fieldPrefix) { Vector c = row.getSignature(); return getFieldSchemasFromColumnInfo(c, fieldPrefix); } @@ -338,23 +345,26 @@ /** * Convert the ColumnInfo to FieldSchema. */ - public static List getFieldSchemasFromColumnInfo(Vector cols, String fieldPrefix) { - if ((cols == null) || (cols.size() == 0)) + public static List getFieldSchemasFromColumnInfo( + Vector cols, String fieldPrefix) { + if ((cols == null) || (cols.size() == 0)) { return new ArrayList(); + } List schemas = new ArrayList(cols.size()); - for (int i=0; i sortFieldSchemas(List schema) { - Collections.sort(schema, new Comparator(){ + Collections.sort(schema, new Comparator() { @Override public int compare(FieldSchema o1, FieldSchema o2) { @@ -367,89 +377,101 @@ /** * Create the reduce sink descriptor. - * @param keyCols The columns to be stored in the key - * @param valueCols The columns to be stored in the value - * @param outputColumnNames The output columns names - * @param tag The tag for this reducesink - * @param partitionCols The columns for partitioning. - * @param numReducers The number of reducers, set to -1 for automatic inference - * based on input data size. + * + * @param keyCols + * The columns to be stored in the key + * @param valueCols + * The columns to be stored in the value + * @param outputColumnNames + * The output columns names + * @param tag + * The tag for this reducesink + * @param partitionCols + * The columns for partitioning. + * @param numReducers + * The number of reducers, set to -1 for automatic inference based on + * input data size. * @return The reduceSinkDesc object. */ - public static reduceSinkDesc getReduceSinkDesc(ArrayList keyCols, - ArrayList valueCols, - List outputColumnNames, - boolean includeKeyCols, - int tag, - ArrayList partitionCols, - String order, - int numReducers) { + public static reduceSinkDesc getReduceSinkDesc( + ArrayList keyCols, ArrayList valueCols, + List outputColumnNames, boolean includeKeyCols, int tag, + ArrayList partitionCols, String order, int numReducers) { tableDesc keyTable = null; tableDesc valueTable = null; ArrayList outputKeyCols = new ArrayList(); ArrayList outputValCols = new ArrayList(); if (includeKeyCols) { - keyTable = getReduceKeyTableDesc(getFieldSchemasFromColumnList( - keyCols, outputColumnNames, 0, ""), order); + keyTable = getReduceKeyTableDesc(getFieldSchemasFromColumnList(keyCols, + outputColumnNames, 0, ""), order); outputKeyCols.addAll(outputColumnNames.subList(0, keyCols.size())); valueTable = getReduceValueTableDesc(getFieldSchemasFromColumnList( valueCols, outputColumnNames, keyCols.size(), "")); - outputValCols.addAll(outputColumnNames.subList(keyCols.size(), outputColumnNames.size())); + outputValCols.addAll(outputColumnNames.subList(keyCols.size(), + outputColumnNames.size())); } else { - keyTable = getReduceKeyTableDesc(getFieldSchemasFromColumnList( - keyCols, "reducesinkkey"), order); + keyTable = getReduceKeyTableDesc(getFieldSchemasFromColumnList(keyCols, + "reducesinkkey"), order); for (int i = 0; i < keyCols.size(); i++) { - outputKeyCols.add("reducesinkkey"+i); + outputKeyCols.add("reducesinkkey" + i); } valueTable = getReduceValueTableDesc(getFieldSchemasFromColumnList( valueCols, outputColumnNames, 0, "")); outputValCols.addAll(outputColumnNames); } - return new reduceSinkDesc(keyCols, valueCols, outputKeyCols, outputValCols, tag, partitionCols, numReducers, - keyTable, - // Revert to DynamicSerDe: getBinaryTableDesc(getFieldSchemasFromColumnList(valueCols, "reducesinkvalue"))); + return new reduceSinkDesc(keyCols, valueCols, outputKeyCols, outputValCols, + tag, partitionCols, numReducers, keyTable, + // Revert to DynamicSerDe: + // getBinaryTableDesc(getFieldSchemasFromColumnList(valueCols, + // "reducesinkvalue"))); valueTable); } /** * Create the reduce sink descriptor. - * @param keyCols The columns to be stored in the key - * @param valueCols The columns to be stored in the value - * @param outputColumnNames The output columns names - * @param tag The tag for this reducesink - * @param numPartitionFields The first numPartitionFields of keyCols will be partition columns. - * If numPartitionFields=-1, then partition randomly. - * @param numReducers The number of reducers, set to -1 for automatic inference - * based on input data size. + * + * @param keyCols + * The columns to be stored in the key + * @param valueCols + * The columns to be stored in the value + * @param outputColumnNames + * The output columns names + * @param tag + * The tag for this reducesink + * @param numPartitionFields + * The first numPartitionFields of keyCols will be partition columns. + * If numPartitionFields=-1, then partition randomly. + * @param numReducers + * The number of reducers, set to -1 for automatic inference based on + * input data size. * @return The reduceSinkDesc object. */ public static reduceSinkDesc getReduceSinkDesc( ArrayList keyCols, ArrayList valueCols, - List outputColumnNames, boolean includeKey, int tag, int numPartitionFields, - int numReducers) { + List outputColumnNames, boolean includeKey, int tag, + int numPartitionFields, int numReducers) { ArrayList partitionCols = null; if (numPartitionFields >= keyCols.size()) { partitionCols = keyCols; } else if (numPartitionFields >= 0) { partitionCols = new ArrayList(numPartitionFields); - for (int i=0; i(1); - partitionCols.add(TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand")); + partitionCols.add(TypeCheckProcFactory.DefaultExprProcessor + .getFuncExprNodeDesc("rand")); } StringBuilder order = new StringBuilder(); - for (int i=0; i tabBucketCols; - // Input files can be pruned private boolean inputPruning; public sampleDesc() { } - public sampleDesc(int numerator, int denominator, List tabBucketCols, boolean inputPruning) { + public sampleDesc(int numerator, int denominator, + List tabBucketCols, boolean inputPruning) { this.numerator = numerator; this.denominator = denominator; - this.tabBucketCols = tabBucketCols; this.inputPruning = inputPruning; } public int getNumerator() { - return this.numerator; + return numerator; } public int getDenominator() { - return this.denominator; + return denominator; } public boolean getInputPruning() { @@ -68,41 +65,49 @@ private boolean isSamplingPred; private transient sampleDesc sampleDescr; - public filterDesc() { } + public filterDesc() { + } + public filterDesc( - final org.apache.hadoop.hive.ql.plan.exprNodeDesc predicate, boolean isSamplingPred) { + final org.apache.hadoop.hive.ql.plan.exprNodeDesc predicate, + boolean isSamplingPred) { this.predicate = predicate; this.isSamplingPred = isSamplingPred; - this.sampleDescr = null; + sampleDescr = null; } public filterDesc( - final org.apache.hadoop.hive.ql.plan.exprNodeDesc predicate, boolean isSamplingPred, final sampleDesc sampleDescr) { + final org.apache.hadoop.hive.ql.plan.exprNodeDesc predicate, + boolean isSamplingPred, final sampleDesc sampleDescr) { this.predicate = predicate; this.isSamplingPred = isSamplingPred; this.sampleDescr = sampleDescr; } - @explain(displayName="predicate") + @explain(displayName = "predicate") public org.apache.hadoop.hive.ql.plan.exprNodeDesc getPredicate() { - return this.predicate; + return predicate; } - public void setPredicate(final org.apache.hadoop.hive.ql.plan.exprNodeDesc predicate) { + + public void setPredicate( + final org.apache.hadoop.hive.ql.plan.exprNodeDesc predicate) { this.predicate = predicate; } - @explain(displayName="isSamplingPred", normalExplain=false) + @explain(displayName = "isSamplingPred", normalExplain = false) public boolean getIsSamplingPred() { - return this.isSamplingPred; + return isSamplingPred; } + public void setIsSamplingPred(final boolean isSamplingPred) { this.isSamplingPred = isSamplingPred; } - @explain(displayName="sampleDesc", normalExplain=false) + @explain(displayName = "sampleDesc", normalExplain = false) public sampleDesc getSampleDescr() { - return this.sampleDescr; + return sampleDescr; } + public void setSampleDescr(final sampleDesc sampleDescr) { this.sampleDescr = sampleDescr; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolver.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolver.java (working copy) @@ -25,15 +25,18 @@ import org.apache.hadoop.hive.ql.exec.Task; /** - * Conditional task resolution interface. This is invoked at run time to get the task to invoke. - * Developers can plug in their own resolvers + * Conditional task resolution interface. This is invoked at run time to get the + * task to invoke. Developers can plug in their own resolvers */ public interface ConditionalResolver { - /** - * All conditional resolvers implement this interface - * @param conf configuration - * @param ctx opaque context - * @return position of the task - */ - public List> getTasks(HiveConf conf, Object ctx); + /** + * All conditional resolvers implement this interface + * + * @param conf + * configuration + * @param ctx + * opaque context + * @return position of the task + */ + public List> getTasks(HiveConf conf, Object ctx); } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/aggregationDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/aggregationDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/aggregationDesc.java (working copy) @@ -18,75 +18,80 @@ package org.apache.hadoop.hive.ql.plan; -import java.io.Serializable; - -import org.apache.hadoop.hive.ql.exec.FunctionInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.exec.UDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; -import org.apache.hadoop.util.ReflectionUtils; public class aggregationDesc implements java.io.Serializable { private static final long serialVersionUID = 1L; private String genericUDAFName; - + /** * In case genericUDAFEvaluator is Serializable, we will serialize the object. * - * In case genericUDAFEvaluator does not implement Serializable, Java will remember the - * class of genericUDAFEvaluator and creates a new instance when deserialized. This is - * exactly what we want. + * In case genericUDAFEvaluator does not implement Serializable, Java will + * remember the class of genericUDAFEvaluator and creates a new instance when + * deserialized. This is exactly what we want. */ private GenericUDAFEvaluator genericUDAFEvaluator; private java.util.ArrayList parameters; private boolean distinct; private GenericUDAFEvaluator.Mode mode; - public aggregationDesc() {} - public aggregationDesc( - final String genericUDAFName, - final GenericUDAFEvaluator genericUDAFEvaluator, - final java.util.ArrayList parameters, - final boolean distinct, - final GenericUDAFEvaluator.Mode mode) { + + public aggregationDesc() { + } + + public aggregationDesc(final String genericUDAFName, + final GenericUDAFEvaluator genericUDAFEvaluator, + final java.util.ArrayList parameters, + final boolean distinct, final GenericUDAFEvaluator.Mode mode) { this.genericUDAFName = genericUDAFName; this.genericUDAFEvaluator = genericUDAFEvaluator; this.parameters = parameters; this.distinct = distinct; this.mode = mode; } + public void setGenericUDAFName(final String genericUDAFName) { this.genericUDAFName = genericUDAFName; } + public String getGenericUDAFName() { return genericUDAFName; } - public void setGenericUDAFEvaluator(final GenericUDAFEvaluator genericUDAFEvaluator) { + + public void setGenericUDAFEvaluator( + final GenericUDAFEvaluator genericUDAFEvaluator) { this.genericUDAFEvaluator = genericUDAFEvaluator; } + public GenericUDAFEvaluator getGenericUDAFEvaluator() { return genericUDAFEvaluator; } + public java.util.ArrayList getParameters() { - return this.parameters; + return parameters; } + public void setParameters(final java.util.ArrayList parameters) { - this.parameters=parameters; + this.parameters = parameters; } + public boolean getDistinct() { - return this.distinct; + return distinct; } + public void setDistinct(final boolean distinct) { this.distinct = distinct; } + public void setMode(final GenericUDAFEvaluator.Mode mode) { this.mode = mode; } - + public GenericUDAFEvaluator.Mode getMode() { return mode; } - - @explain(displayName="expr") + + @explain(displayName = "expr") public String getExprString() { StringBuilder sb = new StringBuilder(); sb.append(genericUDAFName); @@ -95,7 +100,7 @@ sb.append("DISTINCT "); } boolean first = true; - for(exprNodeDesc exp: parameters) { + for (exprNodeDesc exp : parameters) { if (first) { first = false; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/mapredLocalWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/mapredLocalWork.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/mapredLocalWork.java (working copy) @@ -18,47 +18,52 @@ package org.apache.hadoop.hive.ql.plan; -import java.util.*; -import java.io.*; +import java.io.Serializable; +import java.util.LinkedHashMap; import org.apache.hadoop.hive.ql.exec.Operator; -@explain(displayName="Map Reduce Local Work") +@explain(displayName = "Map Reduce Local Work") public class mapredLocalWork implements Serializable { private static final long serialVersionUID = 1L; private LinkedHashMap> aliasToWork; private LinkedHashMap aliasToFetchWork; - public mapredLocalWork() { } + public mapredLocalWork() { + } - public mapredLocalWork(final LinkedHashMap> aliasToWork, - final LinkedHashMap aliasToFetchWork) { + public mapredLocalWork( + final LinkedHashMap> aliasToWork, + final LinkedHashMap aliasToFetchWork) { this.aliasToWork = aliasToWork; this.aliasToFetchWork = aliasToFetchWork; } - @explain(displayName="Alias -> Map Local Operator Tree") + @explain(displayName = "Alias -> Map Local Operator Tree") public LinkedHashMap> getAliasToWork() { return aliasToWork; } - public void setAliasToWork(final LinkedHashMap> aliasToWork) { + public void setAliasToWork( + final LinkedHashMap> aliasToWork) { this.aliasToWork = aliasToWork; } /** * @return the aliasToFetchWork */ - @explain(displayName="Alias -> Map Local Tables") + @explain(displayName = "Alias -> Map Local Tables") public LinkedHashMap getAliasToFetchWork() { return aliasToFetchWork; } /** - * @param aliasToFetchWork the aliasToFetchWork to set + * @param aliasToFetchWork + * the aliasToFetchWork to set */ - public void setAliasToFetchWork(final LinkedHashMap aliasToFetchWork) { + public void setAliasToFetchWork( + final LinkedHashMap aliasToFetchWork) { this.aliasToFetchWork = aliasToFetchWork; } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/tableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/tableDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/tableDesc.java (working copy) @@ -22,10 +22,10 @@ import java.util.Enumeration; import java.util.Properties; -import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.mapred.InputFormat; public class tableDesc implements Serializable, Cloneable { private static final long serialVersionUID = 1L; @@ -34,82 +34,101 @@ private Class outputFileFormatClass; private java.util.Properties properties; private String serdeClassName; - public tableDesc() { } - public tableDesc( - final Class serdeClass, + + public tableDesc() { + } + + public tableDesc(final Class serdeClass, final Class inputFileFormatClass, - final Class class1, - final java.util.Properties properties) { - this.deserializerClass = serdeClass; + final Class class1, final java.util.Properties properties) { + deserializerClass = serdeClass; this.inputFileFormatClass = inputFileFormatClass; - this.outputFileFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(class1); + outputFileFormatClass = HiveFileFormatUtils + .getOutputFormatSubstitute(class1); this.properties = properties; - this.serdeClassName = properties.getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_LIB);; + serdeClassName = properties + .getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_LIB); + ; } - + public Class getDeserializerClass() { - return this.deserializerClass; + return deserializerClass; } - public void setDeserializerClass(final Class serdeClass) { - this.deserializerClass = serdeClass; + + public void setDeserializerClass( + final Class serdeClass) { + deserializerClass = serdeClass; } + public Class getInputFileFormatClass() { - return this.inputFileFormatClass; + return inputFileFormatClass; } + /** * Return a deserializer object corresponding to the tableDesc */ public Deserializer getDeserializer() throws Exception { - Deserializer de = this.deserializerClass.newInstance(); + Deserializer de = deserializerClass.newInstance(); de.initialize(null, properties); return de; } - public void setInputFileFormatClass(final Class inputFileFormatClass) { - this.inputFileFormatClass=inputFileFormatClass; + + public void setInputFileFormatClass( + final Class inputFileFormatClass) { + this.inputFileFormatClass = inputFileFormatClass; } + public Class getOutputFileFormatClass() { - return this.outputFileFormatClass; + return outputFileFormatClass; } + public void setOutputFileFormatClass(final Class outputFileFormatClass) { - this.outputFileFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(outputFileFormatClass); + this.outputFileFormatClass = HiveFileFormatUtils + .getOutputFormatSubstitute(outputFileFormatClass); } - - @explain(displayName="properties", normalExplain=false) + + @explain(displayName = "properties", normalExplain = false) public java.util.Properties getProperties() { - return this.properties; + return properties; } + public void setProperties(final java.util.Properties properties) { this.properties = properties; } + /** * @return the serdeClassName */ - @explain(displayName="serde") + @explain(displayName = "serde") public String getSerdeClassName() { - return this.serdeClassName; + return serdeClassName; } + /** - * @param serdeClassName the serde Class Name to set + * @param serdeClassName + * the serde Class Name to set */ public void setSerdeClassName(String serdeClassName) { this.serdeClassName = serdeClassName; } - - @explain(displayName="name") + + @explain(displayName = "name") public String getTableName() { - return this.properties.getProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME); + return properties + .getProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME); } - - @explain(displayName="input format") + + @explain(displayName = "input format") public String getInputFileFormatClassName() { return getInputFileFormatClass().getName(); } - - @explain(displayName="output format") + + @explain(displayName = "output format") public String getOutputFileFormatClassName() { return getOutputFileFormatClass().getName(); } - + + @Override public Object clone() { tableDesc ret = new tableDesc(); ret.setSerdeClassName(serdeClassName); @@ -117,7 +136,7 @@ ret.setInputFileFormatClass(inputFileFormatClass); ret.setOutputFileFormatClass(outputFileFormatClass); Properties newProp = new Properties(); - Enumeration keysProp = properties.keys(); + Enumeration keysProp = properties.keys(); while (keysProp.hasMoreElements()) { Object key = keysProp.nextElement(); newProp.put(key, properties.get(key)); Index: ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFieldDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFieldDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFieldDesc.java (working copy) @@ -16,93 +16,106 @@ * limitations under the License. */ -package org.apache.hadoop.hive.ql.plan; - -import java.io.Serializable; +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.ql.exec.Utilities; - -public class exprNodeFieldDesc extends exprNodeDesc implements Serializable { - private static final long serialVersionUID = 1L; - exprNodeDesc desc; +public class exprNodeFieldDesc extends exprNodeDesc implements Serializable { + private static final long serialVersionUID = 1L; + exprNodeDesc desc; String fieldName; - - // Used to support a.b where a is a list of struct that contains a field called b. - // a.b will return an array that contains field b of all elements of array a. + + // Used to support a.b where a is a list of struct that contains a field + // called b. + // a.b will return an array that contains field b of all elements of array a. Boolean isList; - - public exprNodeFieldDesc() {} - public exprNodeFieldDesc(TypeInfo typeInfo, exprNodeDesc desc, String fieldName, Boolean isList) { - super(typeInfo); - this.desc = desc; + + public exprNodeFieldDesc() { + } + + public exprNodeFieldDesc(TypeInfo typeInfo, exprNodeDesc desc, + String fieldName, Boolean isList) { + super(typeInfo); + this.desc = desc; this.fieldName = fieldName; - this.isList = isList; - } - + this.isList = isList; + } + @Override public List getChildren() { List children = new ArrayList(2); children.add(desc); return children; } - - public exprNodeDesc getDesc() { - return this.desc; - } - public void setDesc(exprNodeDesc desc) { - this.desc = desc; - } - public String getFieldName() { - return this.fieldName; - } - public void setFieldName(String fieldName) { - this.fieldName = fieldName; + + public exprNodeDesc getDesc() { + return desc; } + + public void setDesc(exprNodeDesc desc) { + this.desc = desc; + } + + public String getFieldName() { + return fieldName; + } + + public void setFieldName(String fieldName) { + this.fieldName = fieldName; + } + public Boolean getIsList() { return isList; } + public void setIsList(Boolean isList) { this.isList = isList; } - - @Override - public String toString() { - return this.desc.toString() + "." + this.fieldName; - } - - @explain(displayName="expr") + @Override + public String toString() { + return desc.toString() + "." + fieldName; + } + + @explain(displayName = "expr") + @Override public String getExprString() { - return this.desc.getExprString() + "." + this.fieldName; + return desc.getExprString() + "." + fieldName; } + @Override public List getCols() { List colList = new ArrayList(); - if (desc != null) - colList = Utilities.mergeUniqElems(colList, desc.getCols()); + if (desc != null) { + colList = Utilities.mergeUniqElems(colList, desc.getCols()); + } return colList; } + @Override public exprNodeDesc clone() { - return new exprNodeFieldDesc(this.typeInfo, this.desc, this.fieldName, this.isList); + return new exprNodeFieldDesc(typeInfo, desc, fieldName, isList); } @Override public boolean isSame(Object o) { - if (!(o instanceof exprNodeFieldDesc)) + if (!(o instanceof exprNodeFieldDesc)) { return false; - exprNodeFieldDesc dest = (exprNodeFieldDesc)o; - if (!typeInfo.equals(dest.getTypeInfo())) + } + exprNodeFieldDesc dest = (exprNodeFieldDesc) o; + if (!typeInfo.equals(dest.getTypeInfo())) { return false; - if (!fieldName.equals(dest.getFieldName()) || - !isList.equals(dest.getIsList()) || - !desc.isSame(dest.getDesc())) + } + if (!fieldName.equals(dest.getFieldName()) + || !isList.equals(dest.getIsList()) || !desc.isSame(dest.getDesc())) { return false; - - return true; + } + + return true; } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/unionDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/unionDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/unionDesc.java (working copy) @@ -21,17 +21,17 @@ import java.io.Serializable; /** - * unionDesc is a empty class currently. - * However, union has more than one input (as compared with forward), and therefore, we need a separate class. + * unionDesc is a empty class currently. However, union has more than one input + * (as compared with forward), and therefore, we need a separate class. **/ -@explain(displayName="Union") +@explain(displayName = "Union") public class unionDesc implements Serializable { private static final long serialVersionUID = 1L; transient private int numInputs; - + @SuppressWarnings("nls") - public unionDesc() { + public unionDesc() { numInputs = 2; } @@ -43,7 +43,8 @@ } /** - * @param numInputs the numInputs to set + * @param numInputs + * the numInputs to set */ public void setNumInputs(int numInputs) { this.numInputs = numInputs; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java (working copy) @@ -30,30 +30,33 @@ import org.apache.hadoop.hive.ql.exec.Task; /** - * Conditional task resolution interface. This is invoked at run time to get the task to invoke. - * Developers can plug in their own resolvers + * Conditional task resolution interface. This is invoked at run time to get the + * task to invoke. Developers can plug in their own resolvers */ -public class ConditionalResolverMergeFiles implements ConditionalResolver, Serializable { +public class ConditionalResolverMergeFiles implements ConditionalResolver, + Serializable { private static final long serialVersionUID = 1L; - public ConditionalResolverMergeFiles() { + + public ConditionalResolverMergeFiles() { } - + public static class ConditionalResolverMergeFilesCtx implements Serializable { private static final long serialVersionUID = 1L; List> listTasks; private String dir; - - public ConditionalResolverMergeFilesCtx() { + + public ConditionalResolverMergeFilesCtx() { } - + /** * @param dir */ - public ConditionalResolverMergeFilesCtx(List> listTasks, String dir) { + public ConditionalResolverMergeFilesCtx( + List> listTasks, String dir) { this.listTasks = listTasks; this.dir = dir; } - + /** * @return the dir */ @@ -62,12 +65,13 @@ } /** - * @param dir the dir to set + * @param dir + * the dir to set */ public void setDir(String dir) { this.dir = dir; } - + /** * @return the listTasks */ @@ -76,43 +80,47 @@ } /** - * @param listTasks the listTasks to set + * @param listTasks + * the listTasks to set */ public void setListTasks(List> listTasks) { this.listTasks = listTasks; } } - - public List> getTasks(HiveConf conf, Object objCtx) { - ConditionalResolverMergeFilesCtx ctx = (ConditionalResolverMergeFilesCtx)objCtx; + + public List> getTasks(HiveConf conf, + Object objCtx) { + ConditionalResolverMergeFilesCtx ctx = (ConditionalResolverMergeFilesCtx) objCtx; String dirName = ctx.getDir(); - + List> resTsks = new ArrayList>(); // check if a map-reduce job is needed to merge the files // If the current size is smaller than the target, merge long trgtSize = conf.getLongVar(HiveConf.ConfVars.HIVEMERGEMAPFILESSIZE); - long avgConditionSize = conf.getLongVar(HiveConf.ConfVars.HIVEMERGEMAPFILESAVGSIZE); - trgtSize = trgtSize > avgConditionSize ? trgtSize : avgConditionSize; - + long avgConditionSize = conf + .getLongVar(HiveConf.ConfVars.HIVEMERGEMAPFILESAVGSIZE); + trgtSize = trgtSize > avgConditionSize ? trgtSize : avgConditionSize; + try { // If the input file does not exist, replace it by a empty file Path dirPath = new Path(dirName); FileSystem inpFs = dirPath.getFileSystem(conf); - + if (inpFs.exists(dirPath)) { FileStatus[] fStats = inpFs.listStatus(dirPath); long totalSz = 0; - for (FileStatus fStat : fStats) + for (FileStatus fStat : fStats) { totalSz += fStat.getLen(); - + } + long currAvgSz = totalSz / fStats.length; if ((currAvgSz < avgConditionSize) && (fStats.length > 1)) { // also set the number of reducers Task tsk = ctx.getListTasks().get(1); - mapredWork work = (mapredWork)tsk.getWork(); - + mapredWork work = (mapredWork) tsk.getWork(); + int maxReducers = conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS); - int reducers = (int)((totalSz + trgtSize - 1) / trgtSize); + int reducers = (int) ((totalSz + trgtSize - 1) / trgtSize); reducers = Math.max(1, reducers); reducers = Math.min(maxReducers, reducers); work.setNumReduceTasks(reducers); Index: ql/src/java/org/apache/hadoop/hive/ql/plan/reduceSinkDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/reduceSinkDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/reduceSinkDesc.java (working copy) @@ -20,20 +20,20 @@ import java.io.Serializable; -@explain(displayName="Reduce Output Operator") +@explain(displayName = "Reduce Output Operator") public class reduceSinkDesc implements Serializable { private static final long serialVersionUID = 1L; /** - * Key columns are passed to reducer in the "key". + * Key columns are passed to reducer in the "key". */ private java.util.ArrayList keyCols; private java.util.ArrayList outputKeyColumnNames; /** - * Value columns are passed to reducer in the "value". + * Value columns are passed to reducer in the "value". */ private java.util.ArrayList valueCols; private java.util.ArrayList outputValueColumnNames; - /** + /** * Describe how to serialize the key. */ private tableDesc keySerializeInfo; @@ -41,37 +41,34 @@ * Describe how to serialize the value. */ private tableDesc valueSerializeInfo; - + /** * The tag for this reducesink descriptor. */ private int tag; - + /** * The partition columns (CLUSTER BY or DISTRIBUTE BY in Hive language). * Partition columns decide the reducer that the current row goes to. * Partition columns are not passed to reducer. */ private java.util.ArrayList partitionCols; - + private int numReducers; - public reduceSinkDesc() { } + public reduceSinkDesc() { + } - public reduceSinkDesc - (java.util.ArrayList keyCols, - java.util.ArrayList valueCols, - java.util.ArrayList outputKeyColumnNames, - java.util.ArrayList outputValueolumnNames, - int tag, - java.util.ArrayList partitionCols, - int numReducers, - final tableDesc keySerializeInfo, - final tableDesc valueSerializeInfo) { + public reduceSinkDesc(java.util.ArrayList keyCols, + java.util.ArrayList valueCols, + java.util.ArrayList outputKeyColumnNames, + java.util.ArrayList outputValueolumnNames, int tag, + java.util.ArrayList partitionCols, int numReducers, + final tableDesc keySerializeInfo, final tableDesc valueSerializeInfo) { this.keyCols = keyCols; this.valueCols = valueCols; this.outputKeyColumnNames = outputKeyColumnNames; - this.outputValueColumnNames = outputValueolumnNames; + outputValueColumnNames = outputValueolumnNames; this.tag = tag; this.numReducers = numReducers; this.partitionCols = partitionCols; @@ -97,49 +94,53 @@ this.outputValueColumnNames = outputValueColumnNames; } - @explain(displayName="key expressions") + @explain(displayName = "key expressions") public java.util.ArrayList getKeyCols() { - return this.keyCols; + return keyCols; } - public void setKeyCols - (final java.util.ArrayList keyCols) { - this.keyCols=keyCols; + + public void setKeyCols(final java.util.ArrayList keyCols) { + this.keyCols = keyCols; } - @explain(displayName="value expressions") + @explain(displayName = "value expressions") public java.util.ArrayList getValueCols() { - return this.valueCols; + return valueCols; } - public void setValueCols - (final java.util.ArrayList valueCols) { - this.valueCols=valueCols; + + public void setValueCols(final java.util.ArrayList valueCols) { + this.valueCols = valueCols; } - - @explain(displayName="Map-reduce partition columns") + + @explain(displayName = "Map-reduce partition columns") public java.util.ArrayList getPartitionCols() { - return this.partitionCols; + return partitionCols; } - public void setPartitionCols(final java.util.ArrayList partitionCols) { + + public void setPartitionCols( + final java.util.ArrayList partitionCols) { this.partitionCols = partitionCols; } - - @explain(displayName="tag") + + @explain(displayName = "tag") public int getTag() { - return this.tag; + return tag; } + public void setTag(int tag) { this.tag = tag; } /** - * Returns the number of reducers for the map-reduce job. - * -1 means to decide the number of reducers at runtime. This enables Hive to estimate - * the number of reducers based on the map-reduce input data size, which is only - * available right before we start the map-reduce job. + * Returns the number of reducers for the map-reduce job. -1 means to decide + * the number of reducers at runtime. This enables Hive to estimate the number + * of reducers based on the map-reduce input data size, which is only + * available right before we start the map-reduce job. */ public int getNumReducers() { - return this.numReducers; + return numReducers; } + public void setNumReducers(int numReducers) { this.numReducers = numReducers; } @@ -162,14 +163,15 @@ /** * Returns the sort order of the key columns. - * @return null, which means ascending order for all key columns, - * or a String of the same length as key columns, that consists of only - * "+" (ascending order) and "-" (descending order). + * + * @return null, which means ascending order for all key columns, or a String + * of the same length as key columns, that consists of only "+" + * (ascending order) and "-" (descending order). */ - @explain(displayName="sort order") + @explain(displayName = "sort order") public String getOrder() { return keySerializeInfo.getProperties().getProperty( org.apache.hadoop.hive.serde.Constants.SERIALIZATION_SORT_ORDER); } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/showTableStatusDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/showTableStatusDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/showTableStatusDesc.java (working copy) @@ -30,7 +30,7 @@ Path resFile; String dbName; HashMap partSpec; - + /** * table name for the result of show tables */ @@ -60,9 +60,12 @@ /** * @param resFile - * @param dbName data base name - * @param pattern names of tables to show - * @param part partition specification + * @param dbName + * data base name + * @param pattern + * names of tables to show + * @param part + * partition specification */ public showTableStatusDesc(Path resFile, String dbName, String pattern, HashMap partSpec) { @@ -107,7 +110,7 @@ public void setResFile(Path resFile) { this.resFile = resFile; } - + /** * @return the database name */ @@ -123,17 +126,18 @@ public void setDbName(String dbName) { this.dbName = dbName; } - + /** * @return the partSpec */ - @explain(displayName="partition") + @explain(displayName = "partition") public HashMap getPartSpec() { return partSpec; } /** - * @param partSpec the partSpec to set + * @param partSpec + * the partSpec to set */ public void setPartSpecs(HashMap partSpec) { this.partSpec = partSpec; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/fetchWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/fetchWork.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/fetchWork.java (working copy) @@ -23,16 +23,15 @@ import java.util.List; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.plan.tableDesc; -@explain(displayName="Fetch Operator") +@explain(displayName = "Fetch Operator") public class fetchWork implements Serializable { private static final long serialVersionUID = 1L; - private String tblDir; + private String tblDir; private tableDesc tblDesc; - private List partDir; + private List partDir; private List partDesc; private int limit; @@ -42,28 +41,29 @@ */ private String serializationNullFormat = "NULL"; - public fetchWork() { } + public fetchWork() { + } public fetchWork(String tblDir, tableDesc tblDesc) { this(tblDir, tblDesc, -1); } - public fetchWork(String tblDir, tableDesc tblDesc, int limit) { - this.tblDir = tblDir; - this.tblDesc = tblDesc; - this.limit = limit; - } + public fetchWork(String tblDir, tableDesc tblDesc, int limit) { + this.tblDir = tblDir; + this.tblDesc = tblDesc; + this.limit = limit; + } - public fetchWork(List partDir, List partDesc) { - this(partDir, partDesc, -1); - } - - public fetchWork(List partDir, List partDesc, int limit) { - this.partDir = partDir; - this.partDesc = partDesc; - this.limit = limit; - } + public fetchWork(List partDir, List partDesc) { + this(partDir, partDesc, -1); + } + public fetchWork(List partDir, List partDesc, int limit) { + this.partDir = partDir; + this.partDesc = partDesc; + this.limit = limit; + } + public String getSerializationNullFormat() { return serializationNullFormat; } @@ -71,123 +71,135 @@ public void setSerializationNullFormat(String format) { serializationNullFormat = format; } - - /** - * @return the tblDir - */ - public String getTblDir() { - return tblDir; - } - /** + /** * @return the tblDir */ + public String getTblDir() { + return tblDir; + } + + /** + * @return the tblDir + */ public Path getTblDirPath() { return new Path(tblDir); } - /** - * @param tblDir the tblDir to set - */ - public void setTblDir(String tblDir) { - this.tblDir = tblDir; - } + /** + * @param tblDir + * the tblDir to set + */ + public void setTblDir(String tblDir) { + this.tblDir = tblDir; + } - /** - * @return the tblDesc - */ - public tableDesc getTblDesc() { - return tblDesc; - } + /** + * @return the tblDesc + */ + public tableDesc getTblDesc() { + return tblDesc; + } - /** - * @param tblDesc the tblDesc to set - */ - public void setTblDesc(tableDesc tblDesc) { - this.tblDesc = tblDesc; - } + /** + * @param tblDesc + * the tblDesc to set + */ + public void setTblDesc(tableDesc tblDesc) { + this.tblDesc = tblDesc; + } - /** - * @return the partDir - */ - public List getPartDir() { - return partDir; - } + /** + * @return the partDir + */ + public List getPartDir() { + return partDir; + } + public List getPartDirPath() { + return fetchWork.convertStringToPathArray(partDir); + } - public List getPartDirPath() { - return fetchWork.convertStringToPathArray(partDir); - } - - public static List convertPathToStringArray(List paths) { - if (paths == null) - return null; - - List pathsStr = new ArrayList(); - for (Path path : paths) - pathsStr.add(path.toString()); - - return pathsStr; - } - - public static List convertStringToPathArray(List paths) { - if (paths == null) - return null; - - List pathsStr = new ArrayList(); - for (String path : paths) - pathsStr.add(new Path(path)); - - return pathsStr; + public static List convertPathToStringArray(List paths) { + if (paths == null) { + return null; + } + + List pathsStr = new ArrayList(); + for (Path path : paths) { + pathsStr.add(path.toString()); + } + + return pathsStr; } - /** - * @param partDir the partDir to set - */ - public void setPartDir(List partDir) { - this.partDir = partDir; - } + public static List convertStringToPathArray(List paths) { + if (paths == null) { + return null; + } - /** - * @return the partDesc - */ - public List getPartDesc() { - return partDesc; - } + List pathsStr = new ArrayList(); + for (String path : paths) { + pathsStr.add(new Path(path)); + } - /** - * @param partDesc the partDesc to set - */ - public void setPartDesc(List partDesc) { - this.partDesc = partDesc; - } + return pathsStr; + } - /** - * @return the limit - */ - @explain(displayName="limit") - public int getLimit() { - return limit; - } + /** + * @param partDir + * the partDir to set + */ + public void setPartDir(List partDir) { + this.partDir = partDir; + } - /** - * @param limit the limit to set - */ - public void setLimit(int limit) { - this.limit = limit; - } - - public String toString() { - if (tblDir != null) - return new String ("table = " + tblDir); - - if (partDir == null) - return "null fetchwork"; - - String ret = new String("partition = "); - for (String part : partDir) - ret = ret.concat(part); - + /** + * @return the partDesc + */ + public List getPartDesc() { + return partDesc; + } + + /** + * @param partDesc + * the partDesc to set + */ + public void setPartDesc(List partDesc) { + this.partDesc = partDesc; + } + + /** + * @return the limit + */ + @explain(displayName = "limit") + public int getLimit() { + return limit; + } + + /** + * @param limit + * the limit to set + */ + public void setLimit(int limit) { + this.limit = limit; + } + + @Override + public String toString() { + if (tblDir != null) { + return new String("table = " + tblDir); + } + + if (partDir == null) { + return "null fetchwork"; + } + + String ret = new String("partition = "); + for (String part : partDir) { + ret = ret.concat(part); + } + return ret; } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeGenericFuncDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeGenericFuncDesc.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeGenericFuncDesc.java (working copy) @@ -22,84 +22,94 @@ import java.util.ArrayList; import java.util.List; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * Describes a GenericFunc node. */ -public class exprNodeGenericFuncDesc extends exprNodeDesc implements Serializable { +public class exprNodeGenericFuncDesc extends exprNodeDesc implements + Serializable { private static final long serialVersionUID = 1L; - + /** * In case genericUDF is Serializable, we will serialize the object. * * In case genericUDF does not implement Serializable, Java will remember the - * class of genericUDF and creates a new instance when deserialized. This is + * class of genericUDF and creates a new instance when deserialized. This is * exactly what we want. */ private GenericUDF genericUDF; - private List childExprs; - - public exprNodeGenericFuncDesc() {} - public exprNodeGenericFuncDesc(TypeInfo typeInfo, GenericUDF genericUDF, - List children) { + private List childExprs; + + public exprNodeGenericFuncDesc() { + } + + public exprNodeGenericFuncDesc(TypeInfo typeInfo, GenericUDF genericUDF, + List children) { super(typeInfo); - assert(genericUDF != null); + assert (genericUDF != null); this.genericUDF = genericUDF; - this.childExprs = children; + childExprs = children; } - + public GenericUDF getGenericUDF() { return genericUDF; } - + public void setGenericUDF(GenericUDF genericUDF) { this.genericUDF = genericUDF; } - + public List getChildExprs() { - return this.childExprs; + return childExprs; } + public void setChildExprs(List children) { - this.childExprs = children; + childExprs = children; } + @Override public List getChildren() { return childExprs; } + + @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append(genericUDF.getClass().toString()); sb.append("("); - for(int i=0; i0) sb.append(", "); + for (int i = 0; i < childExprs.size(); i++) { + if (i > 0) { + sb.append(", "); + } sb.append(childExprs.get(i).toString()); } sb.append("("); sb.append(")"); return sb.toString(); } - - @explain(displayName="expr") + + @explain(displayName = "expr") @Override public String getExprString() { // Get the children expr strings String[] childrenExprStrings = new String[childExprs.size()]; - for (int i=0; i getCols() { List colList = new ArrayList(); if (childExprs != null) { @@ -113,63 +123,70 @@ return colList; } - + @Override public exprNodeDesc clone() { List cloneCh = new ArrayList(childExprs.size()); - for(exprNodeDesc ch : childExprs) { + for (exprNodeDesc ch : childExprs) { cloneCh.add(ch.clone()); } - exprNodeGenericFuncDesc clone = new exprNodeGenericFuncDesc(this.typeInfo, + exprNodeGenericFuncDesc clone = new exprNodeGenericFuncDesc(typeInfo, FunctionRegistry.cloneGenericUDF(genericUDF), cloneCh); return clone; } - + /** - * Create a exprNodeGenericFuncDesc based on the genericUDFClass and the children - * parameters. + * Create a exprNodeGenericFuncDesc based on the genericUDFClass and the + * children parameters. + * * @throws UDFArgumentException */ - public static exprNodeGenericFuncDesc newInstance(GenericUDF genericUDF, + public static exprNodeGenericFuncDesc newInstance(GenericUDF genericUDF, List children) throws UDFArgumentException { ObjectInspector[] childrenOIs = new ObjectInspector[children.size()]; - for(int i=0; i outputInternalColNames; - public lateralViewJoinDesc() { + + public lateralViewJoinDesc() { } + public lateralViewJoinDesc(ArrayList outputInternalColNames) { this.outputInternalColNames = outputInternalColNames; } - public void setOutputInternalColNames( - ArrayList outputInternalColNames) { + + public void setOutputInternalColNames(ArrayList outputInternalColNames) { this.outputInternalColNames = outputInternalColNames; } - @explain(displayName="outputColumnNames") + + @explain(displayName = "outputColumnNames") public ArrayList getOutputInternalColNames() { - return this.outputInternalColNames; + return outputInternalColNames; } } Index: ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java (working copy) @@ -47,29 +47,25 @@ import org.apache.thrift.protocol.TJSONProtocol; import org.apache.thrift.transport.TMemoryBuffer; - public class QueryPlan implements Serializable { private static final long serialVersionUID = 1L; - + static final private Log LOG = LogFactory.getLog(QueryPlan.class.getName()); - private String queryString; - private BaseSemanticAnalyzer plan; - private String queryId; - private org.apache.hadoop.hive.ql.plan.api.Query query; - private Map> counters; - private Set done; - private Set started; - - private boolean add; + private final String queryString; + private final BaseSemanticAnalyzer plan; + private final String queryId; + private final org.apache.hadoop.hive.ql.plan.api.Query query; + private final Map> counters; + private final Set done; + private final Set started; - public QueryPlan(String queryString, BaseSemanticAnalyzer plan) { this.queryString = queryString; this.plan = plan; - this.queryId = makeQueryId(); + queryId = makeQueryId(); query = new org.apache.hadoop.hive.ql.plan.api.Query(); - query.setQueryId(this.queryId); + query.setQueryId(queryId); query.putToQueryAttributes("queryString", this.queryString); counters = new HashMap>(); done = new HashSet(); @@ -92,27 +88,31 @@ GregorianCalendar gc = new GregorianCalendar(); String userid = System.getProperty("user.name"); - return userid + "_" + - String.format("%1$4d%2$02d%3$02d%4$02d%5$02d%5$02d", gc.get(Calendar.YEAR), - gc.get(Calendar.MONTH) + 1, - gc.get(Calendar.DAY_OF_MONTH), - gc.get(Calendar.HOUR_OF_DAY), - gc.get(Calendar.MINUTE), gc.get(Calendar.SECOND)); + return userid + + "_" + + String.format("%1$4d%2$02d%3$02d%4$02d%5$02d%5$02d", gc + .get(Calendar.YEAR), gc.get(Calendar.MONTH) + 1, gc + .get(Calendar.DAY_OF_MONTH), gc.get(Calendar.HOUR_OF_DAY), gc + .get(Calendar.MINUTE), gc.get(Calendar.SECOND)); } /** * generate the operator graph and operator list for the given task based on * the operators corresponding to that task - * @param task api.Task which needs its operator graph populated - * @param topOps the set of top operators from which the operator graph for the task - * is hanging + * + * @param task + * api.Task which needs its operator graph populated + * @param topOps + * the set of top operators from which the operator graph for the + * task is hanging */ - private void populateOperatorGraph(org.apache.hadoop.hive.ql.plan.api.Task task, + private void populateOperatorGraph( + org.apache.hadoop.hive.ql.plan.api.Task task, Collection> topOps) { - + task.setOperatorGraph(new org.apache.hadoop.hive.ql.plan.api.Graph()); task.getOperatorGraph().setNodeType(NodeType.OPERATOR); - + Queue> opsToVisit = new LinkedList>(); Set> opsVisited = new HashSet>(); opsToVisit.addAll(topOps); @@ -129,7 +129,7 @@ org.apache.hadoop.hive.ql.plan.api.Adjacency entry = new org.apache.hadoop.hive.ql.plan.api.Adjacency(); entry.setAdjacencyType(AdjacencyType.CONJUNCTIVE); entry.setNode(op.getOperatorId()); - for (Operator childOp: op.getChildOperators()) { + for (Operator childOp : op.getChildOperators()) { entry.addToChildren(childOp.getOperatorId()); if (!opsVisited.contains(childOp)) { opsToVisit.add(childOp); @@ -139,7 +139,7 @@ } } } - + /** * Populate api.QueryPlan from exec structures. This includes constructing the * dependency graphs of stages and operators. @@ -161,16 +161,17 @@ stage.setStageId(task.getId()); stage.setStageType(task.getType()); query.addToStageList(stage); - + if (task instanceof ExecDriver) { // populate map task - ExecDriver mrTask = (ExecDriver)task; + ExecDriver mrTask = (ExecDriver) task; org.apache.hadoop.hive.ql.plan.api.Task mapTask = new org.apache.hadoop.hive.ql.plan.api.Task(); mapTask.setTaskId(stage.getStageId() + "_MAP"); mapTask.setTaskType(TaskType.MAP); stage.addToTaskList(mapTask); - populateOperatorGraph(mapTask, mrTask.getWork().getAliasToWork().values()); - + populateOperatorGraph(mapTask, mrTask.getWork().getAliasToWork() + .values()); + // populate reduce task if (mrTask.hasReduce()) { org.apache.hadoop.hive.ql.plan.api.Task reduceTask = new org.apache.hadoop.hive.ql.plan.api.Task(); @@ -181,8 +182,7 @@ reducerTopOps.add(mrTask.getWork().getReducer()); populateOperatorGraph(reduceTask, reducerTopOps); } - } - else { + } else { org.apache.hadoop.hive.ql.plan.api.Task otherTask = new org.apache.hadoop.hive.ql.plan.api.Task(); otherTask.setTaskId(stage.getStageId() + "_OTHER"); otherTask.setTaskType(TaskType.OTHER); @@ -192,15 +192,15 @@ org.apache.hadoop.hive.ql.plan.api.Adjacency listEntry = new org.apache.hadoop.hive.ql.plan.api.Adjacency(); listEntry.setAdjacencyType(AdjacencyType.DISJUNCTIVE); listEntry.setNode(task.getId()); - ConditionalTask t = (ConditionalTask)task; - - for (Task listTask: t.getListTasks()) { + ConditionalTask t = (ConditionalTask) task; + + for (Task listTask : t.getListTasks()) { if (t.getChildTasks() != null) { org.apache.hadoop.hive.ql.plan.api.Adjacency childEntry = new org.apache.hadoop.hive.ql.plan.api.Adjacency(); childEntry.setAdjacencyType(AdjacencyType.DISJUNCTIVE); childEntry.setNode(listTask.getId()); // done processing the task - for (Task childTask: t.getChildTasks()) { + for (Task childTask : t.getChildTasks()) { childEntry.addToChildren(childTask.getId()); if (!tasksVisited.contains(childTask)) { tasksToVisit.add(childTask); @@ -208,20 +208,19 @@ } query.getStageGraph().addToAdjacencyList(childEntry); } - + listEntry.addToChildren(listTask.getId()); if (!tasksVisited.contains(listTask)) { tasksToVisit.add(listTask); } } query.getStageGraph().addToAdjacencyList(listEntry); - } - else if (task.getChildTasks() != null) { + } else if (task.getChildTasks() != null) { org.apache.hadoop.hive.ql.plan.api.Adjacency entry = new org.apache.hadoop.hive.ql.plan.api.Adjacency(); entry.setAdjacencyType(AdjacencyType.CONJUNCTIVE); entry.setNode(task.getId()); // done processing the task - for (Task childTask: task.getChildTasks()) { + for (Task childTask : task.getChildTasks()) { entry.addToChildren(childTask.getId()); if (!tasksVisited.contains(childTask)) { tasksToVisit.add(childTask); @@ -233,37 +232,41 @@ } /** - * From the counters extracted via extractCounters(), update the counters - * in the query plan + * From the counters extracted via extractCounters(), update the counters in + * the query plan */ private void updateCountersInQueryPlan() { query.setStarted(started.contains(query.getQueryId())); query.setDone(done.contains(query.getQueryId())); - if (query.getStageList() != null) - for (org.apache.hadoop.hive.ql.plan.api.Stage stage: query.getStageList()) { - stage.setStarted(started.contains(stage.getStageId())); - stage.setStageCounters(counters.get(stage.getStageId())); - stage.setDone(done.contains(stage.getStageId())); - for (org.apache.hadoop.hive.ql.plan.api.Task task: stage.getTaskList()) { - task.setTaskCounters(counters.get(task.getTaskId())); - if (task.getTaskType() == TaskType.OTHER) { - task.setStarted(started.contains(stage.getStageId())); - task.setDone(done.contains(stage.getStageId())); - } else { - task.setStarted(started.contains(task.getTaskId())); - task.setDone(done.contains(task.getTaskId())); - for (org.apache.hadoop.hive.ql.plan.api.Operator op: task.getOperatorList()) { - // if the task has started, all operators within the task have started - op.setStarted(started.contains(task.getTaskId())); - op.setOperatorCounters(counters.get(op.getOperatorId())); - // if the task is done, all operators are done as well - op.setDone(done.contains(task.getTaskId())); + if (query.getStageList() != null) { + for (org.apache.hadoop.hive.ql.plan.api.Stage stage : query + .getStageList()) { + stage.setStarted(started.contains(stage.getStageId())); + stage.setStageCounters(counters.get(stage.getStageId())); + stage.setDone(done.contains(stage.getStageId())); + for (org.apache.hadoop.hive.ql.plan.api.Task task : stage.getTaskList()) { + task.setTaskCounters(counters.get(task.getTaskId())); + if (task.getTaskType() == TaskType.OTHER) { + task.setStarted(started.contains(stage.getStageId())); + task.setDone(done.contains(stage.getStageId())); + } else { + task.setStarted(started.contains(task.getTaskId())); + task.setDone(done.contains(task.getTaskId())); + for (org.apache.hadoop.hive.ql.plan.api.Operator op : task + .getOperatorList()) { + // if the task has started, all operators within the task have + // started + op.setStarted(started.contains(task.getTaskId())); + op.setOperatorCounters(counters.get(op.getOperatorId())); + // if the task is done, all operators are done as well + op.setDone(done.contains(task.getTaskId())); + } } } } } } - + /** * extract all the counters from tasks and operators */ @@ -276,7 +279,7 @@ tasksVisited.add(task); // add children to tasksToVisit if (task.getChildTasks() != null) { - for (Task childTask: task.getChildTasks()) { + for (Task childTask : task.getChildTasks()) { if (!tasksVisited.contains(childTask)) { tasksToVisit.add(childTask); } @@ -298,8 +301,9 @@ done.add(task.getId()); } if (task instanceof ExecDriver) { - ExecDriver mrTask = (ExecDriver)task; - extractOperatorCounters(mrTask.getWork().getAliasToWork().values(), task.getId() + "_MAP"); + ExecDriver mrTask = (ExecDriver) task; + extractOperatorCounters(mrTask.getWork().getAliasToWork().values(), + task.getId() + "_MAP"); if (mrTask.mapStarted()) { started.add(task.getId() + "_MAP"); } @@ -317,10 +321,9 @@ done.add(task.getId() + "_REDUCE"); } } - } - else if (task instanceof ConditionalTask) { - ConditionalTask cTask = (ConditionalTask)task; - for (Task listTask: cTask.getListTasks()) { + } else if (task instanceof ConditionalTask) { + ConditionalTask cTask = (ConditionalTask) task; + for (Task listTask : cTask.getListTasks()) { if (!tasksVisited.contains(listTask)) { tasksToVisit.add(listTask); } @@ -329,7 +332,8 @@ } } - private void extractOperatorCounters(Collection> topOps, String taskId) { + private void extractOperatorCounters( + Collection> topOps, String taskId) { Queue> opsToVisit = new LinkedList>(); Set> opsVisited = new HashSet>(); opsToVisit.addAll(topOps); @@ -341,7 +345,7 @@ done.add(op.getOperatorId()); } if (op.getChildOperators() != null) { - for (Operator childOp: op.getChildOperators()) { + for (Operator childOp : op.getChildOperators()) { if (!opsVisited.contains(childOp)) { opsToVisit.add(childOp); } @@ -351,7 +355,8 @@ } - public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan() throws IOException { + public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan() + throws IOException { if (query.getStageGraph() == null) { populateQueryPlan(); } @@ -372,7 +377,7 @@ } public String getJSONKeyValue(Object key, Object value) { - return "\"" + key + "\":" + getJSONValue(value) + ","; + return "\"" + key + "\":" + getJSONValue(value) + ","; } @SuppressWarnings("unchecked") @@ -382,11 +387,11 @@ } StringBuilder sb = new StringBuilder(); sb.append("["); - for (Object entry: list) { + for (Object entry : list) { sb.append(getJSONValue(entry)); sb.append(","); } - sb.deleteCharAt(sb.length()-1); + sb.deleteCharAt(sb.length() - 1); sb.append("]"); return sb.toString(); } @@ -398,11 +403,11 @@ } StringBuilder sb = new StringBuilder(); sb.append("{"); - for (Object entry: map.entrySet()) { - Map.Entry e = (Map.Entry)entry; + for (Object entry : map.entrySet()) { + Map.Entry e = (Map.Entry) entry; sb.append(getJSONKeyValue(e.getKey(), e.getValue())); } - sb.deleteCharAt(sb.length()-1); + sb.deleteCharAt(sb.length() - 1); sb.append("}"); return sb.toString(); } @@ -418,17 +423,19 @@ // adjacency list List adjList = new ArrayList(); if (graph.getAdjacencyList() != null) { - for (org.apache.hadoop.hive.ql.plan.api.Adjacency adj: graph.getAdjacencyList()) { + for (org.apache.hadoop.hive.ql.plan.api.Adjacency adj : graph + .getAdjacencyList()) { adjList.add(getJSONAdjacency(adj)); } } sb.append(getJSONKeyValue("adjacencyList", getJSONList(adjList))); - sb.deleteCharAt(sb.length()-1); + sb.deleteCharAt(sb.length() - 1); sb.append("}"); return sb.toString(); } - private String getJSONAdjacency(org.apache.hadoop.hive.ql.plan.api.Adjacency adj) { + private String getJSONAdjacency( + org.apache.hadoop.hive.ql.plan.api.Adjacency adj) { if (adj == null) { return "null"; } @@ -437,7 +444,7 @@ sb.append(getJSONKeyValue("node", adj.getNode())); sb.append(getJSONKeyValue("children", getJSONList(adj.getChildren()))); sb.append(getJSONKeyValue("adjacencyType", adj.getAdjacencyType())); - sb.deleteCharAt(sb.length()-1); + sb.deleteCharAt(sb.length() - 1); sb.append("}"); return sb.toString(); } @@ -447,11 +454,13 @@ sb.append("{"); sb.append(getJSONKeyValue("operatorId", op.getOperatorId())); sb.append(getJSONKeyValue("operatorType", op.getOperatorType())); - sb.append(getJSONKeyValue("operatorAttributes", getJSONMap(op.getOperatorAttributes()))); - sb.append(getJSONKeyValue("operatorCounters", getJSONMap(op.getOperatorCounters()))); + sb.append(getJSONKeyValue("operatorAttributes", getJSONMap(op + .getOperatorAttributes()))); + sb.append(getJSONKeyValue("operatorCounters", getJSONMap(op + .getOperatorCounters()))); sb.append(getJSONKeyValue("done", op.isDone())); sb.append(getJSONKeyValue("started", op.isStarted())); - sb.deleteCharAt(sb.length()-1); + sb.deleteCharAt(sb.length() - 1); sb.append("}"); return sb.toString(); } @@ -461,20 +470,24 @@ sb.append("{"); sb.append(getJSONKeyValue("taskId", task.getTaskId())); sb.append(getJSONKeyValue("taskType", task.getTaskType())); - sb.append(getJSONKeyValue("taskAttributes", getJSONMap(task.getTaskAttributes()))); - sb.append(getJSONKeyValue("taskCounters", getJSONMap(task.getTaskCounters()))); - sb.append(getJSONKeyValue("operatorGraph", getJSONGraph(task.getOperatorGraph()))); + sb.append(getJSONKeyValue("taskAttributes", getJSONMap(task + .getTaskAttributes()))); + sb.append(getJSONKeyValue("taskCounters", + getJSONMap(task.getTaskCounters()))); + sb.append(getJSONKeyValue("operatorGraph", getJSONGraph(task + .getOperatorGraph()))); // operator list List opList = new ArrayList(); if (task.getOperatorList() != null) { - for (org.apache.hadoop.hive.ql.plan.api.Operator op: task.getOperatorList()) { + for (org.apache.hadoop.hive.ql.plan.api.Operator op : task + .getOperatorList()) { opList.add(getJSONOperator(op)); } } sb.append(getJSONKeyValue("operatorList", getJSONList(opList))); sb.append(getJSONKeyValue("done", task.isDone())); sb.append(getJSONKeyValue("started", task.isStarted())); - sb.deleteCharAt(sb.length()-1); + sb.deleteCharAt(sb.length() - 1); sb.append("}"); return sb.toString(); } @@ -484,18 +497,20 @@ sb.append("{"); sb.append(getJSONKeyValue("stageId", stage.getStageId())); sb.append(getJSONKeyValue("stageType", stage.getStageType())); - sb.append(getJSONKeyValue("stageAttributes", getJSONMap(stage.getStageAttributes()))); - sb.append(getJSONKeyValue("stageCounters", getJSONMap(stage.getStageCounters()))); + sb.append(getJSONKeyValue("stageAttributes", getJSONMap(stage + .getStageAttributes()))); + sb.append(getJSONKeyValue("stageCounters", getJSONMap(stage + .getStageCounters()))); List taskList = new ArrayList(); if (stage.getTaskList() != null) { - for (org.apache.hadoop.hive.ql.plan.api.Task task: stage.getTaskList()) { + for (org.apache.hadoop.hive.ql.plan.api.Task task : stage.getTaskList()) { taskList.add(getJSONTask(task)); } } sb.append(getJSONKeyValue("taskList", getJSONList(taskList))); sb.append(getJSONKeyValue("done", stage.isDone())); sb.append(getJSONKeyValue("started", stage.isStarted())); - sb.deleteCharAt(sb.length()-1); + sb.deleteCharAt(sb.length() - 1); sb.append("}"); return sb.toString(); } @@ -505,20 +520,25 @@ sb.append("{"); sb.append(getJSONKeyValue("queryId", query.getQueryId())); sb.append(getJSONKeyValue("queryType", query.getQueryType())); - sb.append(getJSONKeyValue("queryAttributes", getJSONMap(query.getQueryAttributes()))); - sb.append(getJSONKeyValue("queryCounters", getJSONMap(query.getQueryCounters()))); - sb.append(getJSONKeyValue("stageGraph", getJSONGraph(query.getStageGraph()))); + sb.append(getJSONKeyValue("queryAttributes", getJSONMap(query + .getQueryAttributes()))); + sb.append(getJSONKeyValue("queryCounters", getJSONMap(query + .getQueryCounters()))); + sb + .append(getJSONKeyValue("stageGraph", getJSONGraph(query + .getStageGraph()))); // stageList List stageList = new ArrayList(); if (query.getStageList() != null) { - for (org.apache.hadoop.hive.ql.plan.api.Stage stage: query.getStageList()) { + for (org.apache.hadoop.hive.ql.plan.api.Stage stage : query + .getStageList()) { stageList.add(getJSONStage(stage)); } } sb.append(getJSONKeyValue("stageList", getJSONList(stageList))); sb.append(getJSONKeyValue("done", query.isDone())); sb.append(getJSONKeyValue("started", query.isStarted())); - sb.deleteCharAt(sb.length()-1); + sb.deleteCharAt(sb.length() - 1); sb.append("}"); return sb.toString(); } @@ -527,16 +547,15 @@ public String toString() { try { return getJSONQuery(getQueryPlan()); - } - catch (Exception e) { + } catch (Exception e) { e.printStackTrace(); return e.toString(); } - } + } public String toThriftJSONString() throws IOException { org.apache.hadoop.hive.ql.plan.api.Query q = getQueryPlan(); - TMemoryBuffer tmb = new TMemoryBuffer(q.toString().length()*5); + TMemoryBuffer tmb = new TMemoryBuffer(q.toString().length() * 5); TJSONProtocol oprot = new TJSONProtocol(tmb); try { q.write(oprot); @@ -550,7 +569,7 @@ public String toBinaryString() throws IOException { org.apache.hadoop.hive.ql.plan.api.Query q = getQueryPlan(); - TMemoryBuffer tmb = new TMemoryBuffer(q.toString().length()*5); + TMemoryBuffer tmb = new TMemoryBuffer(q.toString().length() * 5); TBinaryProtocol oprot = new TBinaryProtocol(tmb); try { q.write(oprot); @@ -562,8 +581,8 @@ byte[] buf = new byte[tmb.length()]; tmb.read(buf, 0, tmb.length()); return new String(buf); - //return getQueryPlan().toString(); - + // return getQueryPlan().toString(); + } public void setStarted() { Index: ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java (working copy) @@ -16,7 +16,6 @@ * limitations under the License. */ - package org.apache.hadoop.hive.ql.tools; import java.io.IOException; @@ -29,9 +28,9 @@ import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.parse.ASTNode; @@ -42,13 +41,12 @@ /** * - * This class prints out the lineage info. - * It takes sql as input and prints lineage info. - * Currently this prints only input and output tables for a given sql. - * Later we can expand to add join tables etc. - * + * This class prints out the lineage info. It takes sql as input and prints + * lineage info. Currently this prints only input and output tables for a given + * sql. Later we can expand to add join tables etc. + * */ -public class LineageInfo implements NodeProcessor { +public class LineageInfo implements NodeProcessor { /** * Stores input tables in sql @@ -57,11 +55,11 @@ /** * Stores output tables in sql */ - TreeSet OutputTableList= new TreeSet(); + TreeSet OutputTableList = new TreeSet(); /** * - * @return java.util.TreeSet + * @return java.util.TreeSet */ public TreeSet getInputTableList() { return inputTableList; @@ -77,18 +75,18 @@ /** * Implements the process method for the NodeProcessor interface. */ - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) - throws SemanticException { - ASTNode pt = (ASTNode)nd; + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + ASTNode pt = (ASTNode) nd; switch (pt.getToken().getType()) { case HiveParser.TOK_TAB: - OutputTableList.add(pt.getChild(0).getText()) ; + OutputTableList.add(pt.getChild(0).getText()); break; case HiveParser.TOK_TABREF: - String table_name = ((ASTNode)pt.getChild(0)).getText(); + String table_name = ((ASTNode) pt.getChild(0)).getText(); inputTableList.add(table_name); break; } @@ -96,14 +94,16 @@ } /** - * parses given query and gets the lineage info. + * parses given query and gets the lineage info. + * * @param query * @throws ParseException */ - public void getLineageInfo(String query) throws ParseException, SemanticException { + public void getLineageInfo(String query) throws ParseException, + SemanticException { /* - * Get the AST tree + * Get the AST tree */ ParseDriver pd = new ParseDriver(); ASTNode tree = pd.parse(query); @@ -118,11 +118,13 @@ inputTableList.clear(); OutputTableList.clear(); - // create a walker which walks the tree in a DFS manner while maintaining the operator stack. The dispatcher + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. The dispatcher // generates the plan from the operator tree Map rules = new LinkedHashMap(); - // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(this, rules, null); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -133,7 +135,7 @@ } public static void main(String[] args) throws IOException, ParseException, - SemanticException { + SemanticException { String query = args[0]; Index: ql/src/java/org/apache/hadoop/hive/ql/history/HiveHistoryViewer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/history/HiveHistoryViewer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/history/HiveHistoryViewer.java (working copy) @@ -22,8 +22,8 @@ import java.util.HashMap; import java.util.Map; +import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; import org.apache.hadoop.hive.ql.history.HiveHistory.QueryInfo; -import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; import org.apache.hadoop.hive.ql.history.HiveHistory.RecordTypes; import org.apache.hadoop.hive.ql.history.HiveHistory.TaskInfo; @@ -35,10 +35,10 @@ String sessionId; // Job Hash Map - private HashMap jobInfoMap = new HashMap(); + private final HashMap jobInfoMap = new HashMap(); // Task Hash Map - private HashMap taskInfoMap = new HashMap(); + private final HashMap taskInfoMap = new HashMap(); public HiveHistoryViewer(String path) { historyFile = path; @@ -81,7 +81,8 @@ if (recType == RecordTypes.SessionStart) { sessionId = values.get(Keys.SESSION_ID.name()); - } else if (recType == RecordTypes.QueryStart || recType == RecordTypes.QueryEnd) { + } else if (recType == RecordTypes.QueryStart + || recType == RecordTypes.QueryEnd) { String key = values.get(Keys.QUERY_ID.name()); QueryInfo ji; if (jobInfoMap.containsKey(key)) { Index: ql/src/java/org/apache/hadoop/hive/ql/history/HiveHistory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/history/HiveHistory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/history/HiveHistory.java (working copy) @@ -19,8 +19,8 @@ package org.apache.hadoop.hive.ql.history; import java.io.BufferedReader; +import java.io.File; import java.io.FileInputStream; -import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; @@ -43,7 +43,6 @@ import org.apache.hadoop.mapred.Counters.Counter; import org.apache.hadoop.mapred.Counters.Group; - public class HiveHistory { PrintWriter histStream; // History File stream @@ -54,14 +53,13 @@ private LogHelper console; - private Map idToTableMap = null; - + private Map idToTableMap = null; + // Job Hash Map - private HashMap queryInfoMap = new HashMap(); + private final HashMap queryInfoMap = new HashMap(); // Task Hash Map - private HashMap taskInfoMap = new HashMap(); - + private final HashMap taskInfoMap = new HashMap(); private static final String DELIMITER = " "; @@ -70,8 +68,7 @@ }; public static enum Keys { - SESSION_ID, QUERY_ID, TASK_ID, QUERY_RET_CODE, QUERY_NUM_TASKS, QUERY_STRING, TIME, - TASK_RET_CODE, TASK_NAME, TASK_HADOOP_ID, TASK_HADOOP_PROGRESS, TASK_COUNTERS, TASK_NUM_REDUCERS, ROWS_INSERTED + SESSION_ID, QUERY_ID, TASK_ID, QUERY_RET_CODE, QUERY_NUM_TASKS, QUERY_STRING, TIME, TASK_RET_CODE, TASK_NAME, TASK_HADOOP_ID, TASK_HADOOP_PROGRESS, TASK_COUNTERS, TASK_NUM_REDUCERS, ROWS_INSERTED }; private static final String KEY = "(\\w+)"; @@ -80,10 +77,10 @@ private static final Pattern pattern = Pattern.compile(KEY + "=" + "\"" + VALUE + "\""); - - private static final Pattern rowCountPattern = Pattern.compile(ROW_COUNT_PATTERN); - + private static final Pattern rowCountPattern = Pattern + .compile(ROW_COUNT_PATTERN); + // temp buffer for parsed dataa private static Map parseBuffer = new HashMap(); @@ -112,7 +109,7 @@ StringBuffer buf = new StringBuffer(); while ((line = reader.readLine()) != null) { buf.append(line); - //if it does not end with " then it is line continuation + // if it does not end with " then it is line continuation if (!line.trim().endsWith("\"")) { continue; } @@ -183,23 +180,22 @@ console = new LogHelper(LOG); String conf_file_loc = ss.getConf().getVar( HiveConf.ConfVars.HIVEHISTORYFILELOC); - if ((conf_file_loc == null) || conf_file_loc.length() == 0) - { + if ((conf_file_loc == null) || conf_file_loc.length() == 0) { console.printError("No history file location given"); return; } - - //Create directory + + // Create directory File f = new File(conf_file_loc); - if (!f.exists()){ - if (!f.mkdir()){ - console.printError("Unable to create log directory "+conf_file_loc ); + if (!f.exists()) { + if (!f.mkdir()) { + console.printError("Unable to create log directory " + conf_file_loc); return; } } Random randGen = new Random(); - histFileName = conf_file_loc + "/hive_job_log_" + ss.getSessionId() - +"_" + Math.abs(randGen.nextInt()) + ".txt"; + histFileName = conf_file_loc + "/hive_job_log_" + ss.getSessionId() + "_" + + Math.abs(randGen.nextInt()) + ".txt"; console.printInfo("Hive history file=" + histFileName); histStream = new PrintWriter(histFileName); @@ -207,7 +203,8 @@ hm.put(Keys.SESSION_ID.name(), ss.getSessionId()); log(RecordTypes.SessionStart, hm); } catch (FileNotFoundException e) { - console.printError("FAILED: Failed to open Query Log : " +histFileName+ " "+ e.getMessage(), "\n" + console.printError("FAILED: Failed to open Query Log : " + histFileName + + " " + e.getMessage(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); } @@ -228,14 +225,15 @@ */ void log(RecordTypes rt, Map keyValMap) { - if (histStream == null) + if (histStream == null) { return; + } StringBuffer sb = new StringBuffer(); sb.append(rt.name()); for (Map.Entry ent : keyValMap.entrySet()) { - + sb.append(DELIMITER); String key = ent.getKey(); String val = ent.getValue(); @@ -255,16 +253,15 @@ */ public void startQuery(String cmd, String id) { SessionState ss = SessionState.get(); - if (ss == null) + if (ss == null) { return; + } QueryInfo ji = new QueryInfo(); ji.hm.put(Keys.QUERY_ID.name(), id); ji.hm.put(Keys.QUERY_STRING.name(), cmd); - + queryInfoMap.put(id, ji); - - log(RecordTypes.QueryStart, ji.hm); @@ -279,8 +276,9 @@ */ public void setQueryProperty(String queryId, Keys propName, String propValue) { QueryInfo ji = queryInfoMap.get(queryId); - if (ji == null) + if (ji == null) { return; + } ji.hm.put(propName.name(), propValue); } @@ -295,8 +293,9 @@ String propValue) { String id = queryId + ":" + taskId; TaskInfo ti = taskInfoMap.get(id); - if (ti == null) + if (ti == null) { return; + } ti.hm.put(propName.name(), propValue); } @@ -311,8 +310,9 @@ QueryInfo ji = queryInfoMap.get(queryId); StringBuilder sb1 = new StringBuilder(""); TaskInfo ti = taskInfoMap.get(id); - if (ti == null) + if (ti == null) { return; + } StringBuilder sb = new StringBuilder(""); try { @@ -330,39 +330,39 @@ sb.append(':'); sb.append(counter.getCounter()); String tab = getRowCountTableName(counter.getDisplayName()); - if (tab != null){ - if (sb1.length() > 0) + if (tab != null) { + if (sb1.length() > 0) { sb1.append(","); + } sb1.append(tab); sb1.append('~'); sb1.append(counter.getCounter()); ji.rowCountMap.put(tab, counter.getCounter()); - - + } } } - - } catch (Exception e) { e.printStackTrace(); } - if (sb1.length()>0) - { + if (sb1.length() > 0) { taskInfoMap.get(id).hm.put(Keys.ROWS_INSERTED.name(), sb1.toString()); - queryInfoMap.get(queryId).hm.put(Keys.ROWS_INSERTED.name(), sb1.toString()); + queryInfoMap.get(queryId).hm.put(Keys.ROWS_INSERTED.name(), sb1 + .toString()); } - if (sb.length() > 0) + if (sb.length() > 0) { taskInfoMap.get(id).hm.put(Keys.TASK_COUNTERS.name(), sb.toString()); + } } - public void printRowCount(String queryId){ + public void printRowCount(String queryId) { QueryInfo ji = queryInfoMap.get(queryId); - for (String tab: ji.rowCountMap.keySet()){ - console.printInfo(ji.rowCountMap.get(tab)+" Rows loaded to "+ tab); + for (String tab : ji.rowCountMap.keySet()) { + console.printInfo(ji.rowCountMap.get(tab) + " Rows loaded to " + tab); } } + /** * Called at the end of Job. A Job is sql query. * @@ -371,8 +371,9 @@ public void endQuery(String queryId) { QueryInfo ji = queryInfoMap.get(queryId); - if (ji == null) + if (ji == null) { return; + } log(RecordTypes.QueryEnd, ji.hm); } @@ -385,8 +386,9 @@ public void startTask(String queryId, Task task, String taskName) { SessionState ss = SessionState.get(); - if (ss == null) + if (ss == null) { return; + } TaskInfo ti = new TaskInfo(); ti.hm.put(Keys.QUERY_ID.name(), ss.getQueryId()); @@ -409,8 +411,9 @@ String id = queryId + ":" + task.getId(); TaskInfo ti = taskInfoMap.get(id); - if (ti == null) + if (ti == null) { return; + } log(RecordTypes.TaskEnd, ti.hm); } @@ -422,8 +425,9 @@ public void progressTask(String queryId, Task task) { String id = queryId + ":" + task.getId(); TaskInfo ti = taskInfoMap.get(id); - if (ti == null) + if (ti == null) { return; + } log(RecordTypes.TaskProgress, ti.hm); } @@ -432,6 +436,7 @@ * write out counters */ static Map ctrmap = null; + public void logPlanProgress(QueryPlan plan) throws IOException { if (ctrmap == null) { ctrmap = new HashMap(); @@ -439,25 +444,29 @@ ctrmap.put("plan", plan.toString()); log(RecordTypes.Counters, ctrmap); } - + /** * Set the table to id map + * * @param map */ - public void setIdToTableMap(Map map){ + public void setIdToTableMap(Map map) { idToTableMap = map; } /** - * Returns table name for the counter name + * Returns table name for the counter name + * * @param name * @return tableName */ - String getRowCountTableName(String name){ - if (idToTableMap == null) return null; + String getRowCountTableName(String name) { + if (idToTableMap == null) { + return null; + } Matcher m = rowCountPattern.matcher(name); - if (m.find()){ + if (m.find()) { String tuple = m.group(1); return idToTableMap.get(tuple); } @@ -465,5 +474,4 @@ } - } Index: ql/src/java/org/apache/hadoop/hive/ql/processors/AddResourceProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/processors/AddResourceProcessor.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/processors/AddResourceProcessor.java (working copy) @@ -26,20 +26,22 @@ public class AddResourceProcessor implements CommandProcessor { - public static final Log LOG = LogFactory.getLog(AddResourceProcessor.class.getName()); + public static final Log LOG = LogFactory.getLog(AddResourceProcessor.class + .getName()); public static final LogHelper console = new LogHelper(LOG); public void init() { } - + public int run(String command) { SessionState ss = SessionState.get(); String[] tokens = command.split("\\s+"); SessionState.ResourceType t; - if (tokens.length < 2 || (t = SessionState.find_resource_type(tokens[0])) == null) { - console.printError("Usage: add [" + - StringUtils.join(SessionState.ResourceType.values(),"|") + - "] []*"); + if (tokens.length < 2 + || (t = SessionState.find_resource_type(tokens[0])) == null) { + console.printError("Usage: add [" + + StringUtils.join(SessionState.ResourceType.values(), "|") + + "] []*"); return 1; } for (int i = 1; i < tokens.length; i++) { Index: ql/src/java/org/apache/hadoop/hive/ql/processors/CommandProcessorFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/processors/CommandProcessorFactory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/processors/CommandProcessorFactory.java (working copy) @@ -18,16 +18,16 @@ package org.apache.hadoop.hive.ql.processors; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.commons.lang.StringUtils; public class CommandProcessorFactory { - + public static CommandProcessor get(String cmd) { String cmdl = cmd.toLowerCase(); - if(cmdl.equals("set")) { + if (cmdl.equals("set")) { return new SetProcessor(); } else if (cmdl.equals("dfs")) { SessionState ss = SessionState.get(); Index: ql/src/java/org/apache/hadoop/hive/ql/processors/DeleteResourceProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/processors/DeleteResourceProcessor.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/processors/DeleteResourceProcessor.java (working copy) @@ -24,35 +24,36 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; - public class DeleteResourceProcessor implements CommandProcessor { - public static final Log LOG = LogFactory.getLog(DeleteResourceProcessor.class.getName()); + public static final Log LOG = LogFactory.getLog(DeleteResourceProcessor.class + .getName()); public static final LogHelper console = new LogHelper(LOG); public void init() { } - + public int run(String command) { SessionState ss = SessionState.get(); String[] tokens = command.split("\\s+"); SessionState.ResourceType t; - if(tokens.length < 1 || (t = SessionState.find_resource_type(tokens[0])) == null) { - console.printError("Usage: delete [" + - StringUtils.join(SessionState.ResourceType.values(),"|") + - "] []*"); + if (tokens.length < 1 + || (t = SessionState.find_resource_type(tokens[0])) == null) { + console.printError("Usage: delete [" + + StringUtils.join(SessionState.ResourceType.values(), "|") + + "] []*"); return 1; } if (tokens.length >= 2) { - for(int i = 1; i < tokens.length; i++) { + for (int i = 1; i < tokens.length; i++) { ss.delete_resource(t, tokens[i]); } } else { ss.delete_resource(t); } - + return 0; } } Index: ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java (working copy) @@ -18,7 +18,7 @@ package org.apache.hadoop.hive.ql.processors; -import java.util.*; +import java.util.Properties; import org.apache.hadoop.hive.ql.session.SessionState; @@ -27,64 +27,67 @@ private static String prefix = "set: "; public static boolean getBoolean(String value) { - if(value.equals("on") || value.equals("true")) + if (value.equals("on") || value.equals("true")) { return true; - if(value.equals("off") || value.equals("false")) + } + if (value.equals("off") || value.equals("false")) { return false; - throw new IllegalArgumentException(prefix + "'" + value + "' is not a boolean"); + } + throw new IllegalArgumentException(prefix + "'" + value + + "' is not a boolean"); } private void dumpOptions(Properties p) { SessionState ss = SessionState.get(); ss.out.println("silent=" + (ss.getIsSilent() ? "on" : "off")); - for(Object one: p.keySet()) { - String oneProp = (String)one; + for (Object one : p.keySet()) { + String oneProp = (String) one; String oneValue = p.getProperty(oneProp); - ss.out.println(oneProp+"="+oneValue); + ss.out.println(oneProp + "=" + oneValue); } } private void dumpOption(Properties p, String s) { SessionState ss = SessionState.get(); - - if(p.getProperty(s) != null) { - ss.out.println(s+"="+p.getProperty(s)); + + if (p.getProperty(s) != null) { + ss.out.println(s + "=" + p.getProperty(s)); } else { - ss.out.println(s+" is undefined"); + ss.out.println(s + " is undefined"); } } public void init() { } - + public int run(String command) { SessionState ss = SessionState.get(); String nwcmd = command.trim(); - if(nwcmd.equals("")) { + if (nwcmd.equals("")) { dumpOptions(ss.getConf().getChangedProperties()); return 0; } - if(nwcmd.equals("-v")) { + if (nwcmd.equals("-v")) { dumpOptions(ss.getConf().getAllProperties()); return 0; } - String[] part = new String [2]; + String[] part = new String[2]; int eqIndex = nwcmd.indexOf('='); - if(eqIndex == -1) { + if (eqIndex == -1) { // no equality sign - print the property out dumpOption(ss.getConf().getAllProperties(), nwcmd); return (0); - } else if (eqIndex == nwcmd.length()-1) { - part[0] = nwcmd.substring(0, nwcmd.length()-1); + } else if (eqIndex == nwcmd.length() - 1) { + part[0] = nwcmd.substring(0, nwcmd.length() - 1); part[1] = ""; } else { part[0] = nwcmd.substring(0, eqIndex).trim(); - part[1] = nwcmd.substring(eqIndex+1).trim(); + part[1] = nwcmd.substring(eqIndex + 1).trim(); } try { Index: ql/src/java/org/apache/hadoop/hive/ql/processors/DfsProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/processors/DfsProcessor.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/processors/DfsProcessor.java (working copy) @@ -32,7 +32,7 @@ public static final Log LOG = LogFactory.getLog(DfsProcessor.class.getName()); public static final LogHelper console = new LogHelper(LOG); - private FsShell dfs; + private final FsShell dfs; public DfsProcessor(Configuration conf) { dfs = new FsShell(conf); @@ -40,7 +40,7 @@ public void init() { } - + public int run(String command) { String[] tokens = command.split("\\s+"); @@ -48,12 +48,12 @@ SessionState ss = SessionState.get(); PrintStream oldOut = System.out; - if(ss != null && ss.out != null) { + if (ss != null && ss.out != null) { System.setOut(ss.out); } int ret = dfs.run(tokens); - if(ret != 0) { + if (ret != 0) { console.printError("Command failed with exit code = " + ret); } @@ -61,8 +61,9 @@ return (ret); } catch (Exception e) { - console.printError("Exception raised from DFSShell.run " + e.getLocalizedMessage(), - org.apache.hadoop.util.StringUtils.stringifyException(e)); + console.printError("Exception raised from DFSShell.run " + + e.getLocalizedMessage(), org.apache.hadoop.util.StringUtils + .stringifyException(e)); return 1; } } Index: ql/src/java/org/apache/hadoop/hive/ql/processors/CommandProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/processors/CommandProcessor.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/processors/CommandProcessor.java (working copy) @@ -20,5 +20,6 @@ public interface CommandProcessor { public void init(); + public int run(String command); } Index: ql/src/java/org/apache/hadoop/hive/ql/Context.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/Context.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/Context.java (working copy) @@ -18,32 +18,27 @@ package org.apache.hadoop.hive.ql; +import java.io.DataInput; import java.io.File; -import java.io.DataInput; +import java.io.FileNotFoundException; import java.io.IOException; -import java.io.FileNotFoundException; import java.net.URI; -import java.net.URISyntaxException; +import java.util.ArrayList; import java.util.Random; -import java.util.ArrayList; import org.antlr.runtime.TokenRewriteStream; - +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.hive.common.FileUtils; - /** - * Context for Semantic Analyzers. - * Usage: - * not reusable - construct a new one for each query - * should call clear() at end of use to remove temporary folders + * Context for Semantic Analyzers. Usage: not reusable - construct a new one for + * each query should call clear() at end of use to remove temporary folders */ public class Context { private Path resFile; @@ -51,22 +46,22 @@ private FileSystem resFs; static final private Log LOG = LogFactory.getLog("hive.ql.Context"); private Path[] resDirPaths; - private int resDirFilesNum; + private int resDirFilesNum; boolean initialized; private String scratchPath; private Path MRScratchDir; private Path localScratchDir; - private ArrayList allScratchDirs = new ArrayList (); + private final ArrayList allScratchDirs = new ArrayList(); private HiveConf conf; - Random rand = new Random (); + Random rand = new Random(); protected int randomid = Math.abs(rand.nextInt()); protected int pathid = 10000; protected boolean explain = false; private TokenRewriteStream tokenRewriteStream; - public Context() { + public Context() { } - + public Context(HiveConf conf) { this.conf = conf; Path tmpPath = new Path(conf.getVar(HiveConf.ConfVars.SCRATCHDIR)); @@ -75,7 +70,9 @@ /** * Set the context on whether the current query is an explain query - * @param value true if the query is an explain query, false if not + * + * @param value + * true if the query is an explain query, false if not */ public void setExplain(boolean value) { explain = value; @@ -83,6 +80,7 @@ /** * Find out whether the current query is an explain query + * * @return true if the query is an explain query, false if not */ public boolean getExplain() { @@ -95,7 +93,7 @@ private void makeLocalScratchDir() throws IOException { while (true) { localScratchDir = new Path(System.getProperty("java.io.tmpdir") - + File.separator + Math.abs(rand.nextInt())); + + File.separator + Math.abs(rand.nextInt())); FileSystem fs = FileSystem.getLocal(conf); if (fs.mkdirs(localScratchDir)) { localScratchDir = fs.makeQualified(localScratchDir); @@ -106,15 +104,15 @@ } /** - * Make a tmp directory for MR intermediate data - * If URI/Scheme are not supplied - those implied by the default filesystem - * will be used (which will typically correspond to hdfs instance on hadoop cluster) + * Make a tmp directory for MR intermediate data If URI/Scheme are not + * supplied - those implied by the default filesystem will be used (which will + * typically correspond to hdfs instance on hadoop cluster) */ private void makeMRScratchDir() throws IOException { - while(true) { - MRScratchDir = FileUtils.makeQualified - (new Path(conf.getVar(HiveConf.ConfVars.SCRATCHDIR), - Integer.toString(Math.abs(rand.nextInt()))), conf); + while (true) { + MRScratchDir = FileUtils.makeQualified(new Path(conf + .getVar(HiveConf.ConfVars.SCRATCHDIR), Integer.toString(Math.abs(rand + .nextInt()))), conf); if (explain) { allScratchDirs.add(MRScratchDir); @@ -128,20 +126,20 @@ } } } - + /** - * Make a tmp directory on specified URI - * Currently will use the same path as implied by SCRATCHDIR config variable + * Make a tmp directory on specified URI Currently will use the same path as + * implied by SCRATCHDIR config variable */ private Path makeExternalScratchDir(URI extURI) throws IOException { - while(true) { - String extPath = scratchPath + File.separator + - Integer.toString(Math.abs(rand.nextInt())); + while (true) { + String extPath = scratchPath + File.separator + + Integer.toString(Math.abs(rand.nextInt())); Path extScratchDir = new Path(extURI.getScheme(), extURI.getAuthority(), - extPath); + extPath); if (explain) { - allScratchDirs.add(extScratchDir); + allScratchDirs.add(extScratchDir); return extScratchDir; } @@ -154,26 +152,25 @@ } /** - * Get a tmp directory on specified URI - * Will check if this has already been made - * (either via MR or Local FileSystem or some other external URI + * Get a tmp directory on specified URI Will check if this has already been + * made (either via MR or Local FileSystem or some other external URI */ private String getExternalScratchDir(URI extURI) { try { // first check if we already made a scratch dir on this URI - for (Path p: allScratchDirs) { + for (Path p : allScratchDirs) { URI pURI = p.toUri(); - if (strEquals(pURI.getScheme(), extURI.getScheme()) && - strEquals(pURI.getAuthority(), extURI.getAuthority())) { + if (strEquals(pURI.getScheme(), extURI.getScheme()) + && strEquals(pURI.getAuthority(), extURI.getAuthority())) { return p.toString(); } } return makeExternalScratchDir(extURI).toString(); } catch (IOException e) { - throw new RuntimeException (e); + throw new RuntimeException(e); } } - + /** * Create a map-reduce scratch directory on demand and return it */ @@ -182,10 +179,10 @@ try { makeMRScratchDir(); } catch (IOException e) { - throw new RuntimeException (e); + throw new RuntimeException(e); } catch (IllegalArgumentException e) { - throw new RuntimeException("Error while making MR scratch " + - "directory - check filesystem config (" + e.getCause() + ")", e); + throw new RuntimeException("Error while making MR scratch " + + "directory - check filesystem config (" + e.getCause() + ")", e); } } return MRScratchDir.toString(); @@ -201,8 +198,8 @@ } catch (IOException e) { throw new RuntimeException(e); } catch (IllegalArgumentException e) { - throw new RuntimeException("Error while making local scratch " + - "directory - check filesystem config (" + e.getCause() + ")", e); + throw new RuntimeException("Error while making local scratch " + + "directory - check filesystem config (" + e.getCause() + ")", e); } } return localScratchDir.toString(); @@ -214,17 +211,21 @@ private void removeScratchDir() { if (explain) { try { - if (localScratchDir != null) + if (localScratchDir != null) { FileSystem.getLocal(conf).delete(localScratchDir, true); + } } catch (Exception e) { - LOG.warn("Error Removing Scratch: " + StringUtils.stringifyException(e)); + LOG + .warn("Error Removing Scratch: " + + StringUtils.stringifyException(e)); } } else { - for (Path p: allScratchDirs) { + for (Path p : allScratchDirs) { try { p.getFileSystem(conf).delete(p, true); } catch (Exception e) { - LOG.warn("Error Removing Scratch: " + StringUtils.stringifyException(e)); + LOG.warn("Error Removing Scratch: " + + StringUtils.stringifyException(e)); } } } @@ -238,12 +239,13 @@ private String nextPath(String base) { return base + File.separator + Integer.toString(pathid++); } - + /** - * check if path is tmp path. the assumption is that all uri's relative - * to scratchdir are temporary - * @return true if a uri is a temporary uri for map-reduce intermediate - * data, false otherwise + * check if path is tmp path. the assumption is that all uri's relative to + * scratchdir are temporary + * + * @return true if a uri is a temporary uri for map-reduce intermediate data, + * false otherwise */ public boolean isMRTmpFileURI(String uriStr) { return (uriStr.indexOf(scratchPath) != -1); @@ -251,28 +253,28 @@ /** * Get a path to store map-reduce intermediate data in + * * @return next available path for map-red intermediate data */ public String getMRTmpFileURI() { return nextPath(getMRScratchDir()); } - /** * Get a tmp path on local host to store intermediate data + * * @return next available tmp path on local fs */ public String getLocalTmpFileURI() { return nextPath(getLocalScratchDir()); } - /** * Get a path to store tmp data destined for external URI - * @param extURI external URI to which the tmp data has to be - * eventually moved - * @return next available tmp path on the file system corresponding - * extURI + * + * @param extURI + * external URI to which the tmp data has to be eventually moved + * @return next available tmp path on the file system corresponding extURI */ public String getExternalTmpFileURI(URI extURI) { return nextPath(getExternalScratchDir(extURI)); @@ -286,7 +288,8 @@ } /** - * @param resFile the resFile to set + * @param resFile + * the resFile to set */ public void setResFile(Path resFile) { this.resFile = resFile; @@ -303,7 +306,8 @@ } /** - * @param resDir the resDir to set + * @param resDir + * the resDir to set */ public void setResDir(Path resDir) { this.resDir = resDir; @@ -311,13 +315,11 @@ resDirFilesNum = 0; resDirPaths = null; - } - + } + public void clear() throws IOException { - if (resDir != null) - { - try - { + if (resDir != null) { + try { FileSystem fs = resDir.getFileSystem(conf); fs.delete(resDir, true); } catch (IOException e) { @@ -325,12 +327,10 @@ } } - if (resFile != null) - { - try - { + if (resFile != null) { + try { FileSystem fs = resFile.getFileSystem(conf); - fs.delete(resFile, false); + fs.delete(resFile, false); } catch (IOException e) { LOG.info("Context clear error: " + StringUtils.stringifyException(e)); } @@ -339,30 +339,34 @@ } public DataInput getStream() { - try - { + try { if (!initialized) { initialized = true; - if ((resFile == null) && (resDir == null)) return null; - + if ((resFile == null) && (resDir == null)) { + return null; + } + if (resFile != null) { - return (DataInput)resFile.getFileSystem(conf).open(resFile); + return resFile.getFileSystem(conf).open(resFile); } - + resFs = resDir.getFileSystem(conf); FileStatus status = resFs.getFileStatus(resDir); assert status.isDir(); FileStatus[] resDirFS = resFs.globStatus(new Path(resDir + "/*")); resDirPaths = new Path[resDirFS.length]; int pos = 0; - for (FileStatus resFS: resDirFS) - if (!resFS.isDir()) + for (FileStatus resFS : resDirFS) { + if (!resFS.isDir()) { resDirPaths[pos++] = resFS.getPath(); - if (pos == 0) return null; - - return (DataInput)resFs.open(resDirPaths[resDirFilesNum++]); - } - else { + } + } + if (pos == 0) { + return null; + } + + return resFs.open(resDirPaths[resDirFilesNum++]); + } else { return getNextStream(); } } catch (FileNotFoundException e) { @@ -375,11 +379,11 @@ } private DataInput getNextStream() { - try - { - if (resDir != null && resDirFilesNum < resDirPaths.length && - (resDirPaths[resDirFilesNum] != null)) - return (DataInput)resFs.open(resDirPaths[resDirFilesNum++]); + try { + if (resDir != null && resDirFilesNum < resDirPaths.length + && (resDirPaths[resDirFilesNum] != null)) { + return resFs.open(resDirPaths[resDirFilesNum++]); + } } catch (FileNotFoundException e) { LOG.info("getNextStream error: " + StringUtils.stringifyException(e)); return null; @@ -387,7 +391,7 @@ LOG.info("getNextStream error: " + StringUtils.stringifyException(e)); return null; } - + return null; } @@ -400,25 +404,25 @@ /** * Set the token rewrite stream being used to parse the current top-level SQL - * statement. Note that this should not be used for other parsing - * activities; for example, when we encounter a reference to a view, we - * switch to a new stream for parsing the stored view definition from the - * catalog, but we don't clobber the top-level stream in the context. - * - * @param tokenRewriteStream the stream being used + * statement. Note that this should not be used for other parsing + * activities; for example, when we encounter a reference to a view, we switch + * to a new stream for parsing the stored view definition from the catalog, + * but we don't clobber the top-level stream in the context. + * + * @param tokenRewriteStream + * the stream being used */ public void setTokenRewriteStream(TokenRewriteStream tokenRewriteStream) { - assert(this.tokenRewriteStream == null); + assert (this.tokenRewriteStream == null); this.tokenRewriteStream = tokenRewriteStream; } /** - * @return the token rewrite stream being used to parse the current - * top-level SQL statement, or null if it isn't available - * (e.g. for parser tests) + * @return the token rewrite stream being used to parse the current top-level + * SQL statement, or null if it isn't available (e.g. for parser + * tests) */ public TokenRewriteStream getTokenRewriteStream() { return tokenRewriteStream; } } - Index: ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java (working copy) @@ -25,28 +25,36 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; /** - * Rule interface for Nodes - * Used in Node dispatching to dispatch process/visitor functions for Nodes + * Rule interface for Nodes Used in Node dispatching to dispatch process/visitor + * functions for Nodes */ public class RuleRegExp implements Rule { - - private String ruleName; - private Pattern pattern; + private final String ruleName; + private final Pattern pattern; + /** - * The rule specified by the regular expression. Note that, the regular expression is specified in terms of Node - * name. For eg: TS.*RS -> means TableScan Node followed by anything any number of times followed by ReduceSink - * @param ruleName name of the rule - * @param regExp regular expression for the rule + * The rule specified by the regular expression. Note that, the regular + * expression is specified in terms of Node name. For eg: TS.*RS -> means + * TableScan Node followed by anything any number of times followed by + * ReduceSink + * + * @param ruleName + * name of the rule + * @param regExp + * regular expression for the rule **/ public RuleRegExp(String ruleName, String regExp) { this.ruleName = ruleName; - pattern = Pattern.compile(regExp); + pattern = Pattern.compile(regExp); } /** - * This function returns the cost of the rule for the specified stack. Lower the cost, the better the rule is matched - * @param stack Node stack encountered so far + * This function returns the cost of the rule for the specified stack. Lower + * the cost, the better the rule is matched + * + * @param stack + * Node stack encountered so far * @return cost of the function * @throws SemanticException */ @@ -56,8 +64,9 @@ for (int pos = numElems - 1; pos >= 0; pos--) { name = stack.get(pos).getName() + "%" + name; Matcher m = pattern.matcher(name); - if (m.matches()) + if (m.matches()) { return m.group().length(); + } } return -1; Index: ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultRuleDispatcher.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultRuleDispatcher.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultRuleDispatcher.java (working copy) @@ -24,36 +24,44 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; /** - * Dispatches calls to relevant method in processor. The user registers various rules with the dispatcher, and - * the processor corresponding to closest matching rule is fired. + * Dispatches calls to relevant method in processor. The user registers various + * rules with the dispatcher, and the processor corresponding to closest + * matching rule is fired. */ public class DefaultRuleDispatcher implements Dispatcher { - - private Map procRules; - private NodeProcessorCtx procCtx; - private NodeProcessor defaultProc; + private final Map procRules; + private final NodeProcessorCtx procCtx; + private final NodeProcessor defaultProc; + /** * constructor - * @param defaultProc default processor to be fired if no rule matches - * @param rules operator processor that handles actual processing of the node - * @param procCtx operator processor context, which is opaque to the dispatcher + * + * @param defaultProc + * default processor to be fired if no rule matches + * @param rules + * operator processor that handles actual processing of the node + * @param procCtx + * operator processor context, which is opaque to the dispatcher */ - public DefaultRuleDispatcher(NodeProcessor defaultProc, - Map rules, NodeProcessorCtx procCtx) { + public DefaultRuleDispatcher(NodeProcessor defaultProc, + Map rules, NodeProcessorCtx procCtx) { this.defaultProc = defaultProc; - this.procRules = rules; - this.procCtx = procCtx; + procRules = rules; + this.procCtx = procCtx; } /** * dispatcher function - * @param nd operator to process - * @param ndStack the operators encountered so far + * + * @param nd + * operator to process + * @param ndStack + * the operators encountered so far * @throws SemanticException */ - public Object dispatch(Node nd, Stack ndStack, Object... nodeOutputs) - throws SemanticException { + public Object dispatch(Node nd, Stack ndStack, Object... nodeOutputs) + throws SemanticException { // find the firing rule // find the rule from the stack specified @@ -69,17 +77,18 @@ NodeProcessor proc; - if (rule == null) + if (rule == null) { proc = defaultProc; - else + } else { proc = procRules.get(rule); + } // Do nothing in case proc is null if (proc != null) { // Call the process function return proc.process(nd, ndStack, procCtx, nodeOutputs); + } else { + return null; } - else - return null; } } Index: ql/src/java/org/apache/hadoop/hive/ql/lib/Node.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/lib/Node.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/lib/Node.java (working copy) @@ -25,14 +25,15 @@ * These are implemented by the node of the graph that needs to be walked. */ public interface Node { - + /** - * Gets the vector of children nodes. This is used in the graph walker algorithms. + * Gets the vector of children nodes. This is used in the graph walker + * algorithms. * * @return Vector */ public List getChildren(); - + /** * Gets the name of the node. This is used in the rule dispatchers. * Index: ql/src/java/org/apache/hadoop/hive/ql/lib/NodeProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/lib/NodeProcessor.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/lib/NodeProcessor.java (working copy) @@ -17,25 +17,28 @@ */ package org.apache.hadoop.hive.ql.lib; - import java.util.Stack; import org.apache.hadoop.hive.ql.parse.SemanticException; /** - * Base class for processing operators which is no-op. The specific processors can register their own context with - * the dispatcher. + * Base class for processing operators which is no-op. The specific processors + * can register their own context with the dispatcher. */ public interface NodeProcessor { - + /** * generic process for all ops that don't have specific implementations - * @param nd operator to process - * @param procCtx operator processor context - * @param nodeOutputs A variable argument list of outputs from other nodes in the walk + * + * @param nd + * operator to process + * @param procCtx + * operator processor context + * @param nodeOutputs + * A variable argument list of outputs from other nodes in the walk * @return Object to be returned by the process call * @throws SemanticException */ - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) - throws SemanticException; + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException; } Index: ql/src/java/org/apache/hadoop/hive/ql/lib/Dispatcher.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/lib/Dispatcher.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/lib/Dispatcher.java (working copy) @@ -23,20 +23,24 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; /** - * Dispatcher interface for Operators - * Used in operator graph walking to dispatch process/visitor functions for operators + * Dispatcher interface for Operators Used in operator graph walking to dispatch + * process/visitor functions for operators */ public interface Dispatcher { /** * Dispatcher function. - * @param nd operator to process. - * @param stack operator stack to process. - * @param nodeOutputs The argument list of outputs from processing other nodes that are - * passed to this dispatcher from the walker. + * + * @param nd + * operator to process. + * @param stack + * operator stack to process. + * @param nodeOutputs + * The argument list of outputs from processing other nodes that are + * passed to this dispatcher from the walker. * @return Object The return object from the processing call. * @throws SemanticException */ - public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) - throws SemanticException; + public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) + throws SemanticException; } Index: ql/src/java/org/apache/hadoop/hive/ql/lib/GraphWalker.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/lib/GraphWalker.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/lib/GraphWalker.java (working copy) @@ -31,12 +31,14 @@ /** * starting point for walking. * - * @param startNodes list of starting operators - * @param nodeOutput If this parameter is not null, the call to the function returns the - * map from node to objects returned by the processors. + * @param startNodes + * list of starting operators + * @param nodeOutput + * If this parameter is not null, the call to the function returns + * the map from node to objects returned by the processors. * @throws SemanticException */ - public void startWalking(Collection startNodes, HashMap nodeOutput) - throws SemanticException; + public void startWalking(Collection startNodes, + HashMap nodeOutput) throws SemanticException; } \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/lib/Rule.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/lib/Rule.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/lib/Rule.java (working copy) @@ -19,16 +19,18 @@ package org.apache.hadoop.hive.ql.lib; import java.util.Stack; + import org.apache.hadoop.hive.ql.parse.SemanticException; /** - * Rule interface for Operators - * Used in operator dispatching to dispatch process/visitor functions for operators + * Rule interface for Operators Used in operator dispatching to dispatch + * process/visitor functions for operators */ public interface Rule { /** - * @return the cost of the rule - the lower the cost, the better the rule matches + * @return the cost of the rule - the lower the cost, the better the rule + * matches * @throws SemanticException */ public int cost(Stack stack) throws SemanticException; Index: ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderWalker.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderWalker.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderWalker.java (working copy) @@ -18,31 +18,24 @@ package org.apache.hadoop.hive.ql.lib; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.Stack; -import java.lang.Object; - import org.apache.hadoop.hive.ql.parse.SemanticException; /** - * base class for operator graph walker - * this class takes list of starting ops and walks them one by one. + * base class for operator graph walker this class takes list of starting ops + * and walks them one by one. */ public class PreOrderWalker extends DefaultGraphWalker { - /* - * Since the operator tree is a DAG, nodes with mutliple parents will be visited more than once. - * This can be made configurable. + /* + * Since the operator tree is a DAG, nodes with mutliple parents will be + * visited more than once. This can be made configurable. */ /** * Constructor - * @param disp dispatcher to call for each op encountered + * + * @param disp + * dispatcher to call for each op encountered */ public PreOrderWalker(Dispatcher disp) { super(disp); @@ -50,17 +43,22 @@ /** * walk the current operator and its descendants - * @param nd current operator in the graph + * + * @param nd + * current operator in the graph * @throws SemanticException */ + @Override public void walk(Node nd) throws SemanticException { opStack.push(nd); dispatch(nd, opStack); - + // move all the children to the front of queue - if (nd.getChildren() != null) - for (Node n : nd.getChildren()) + if (nd.getChildren() != null) { + for (Node n : nd.getChildren()) { walk(n); + } + } opStack.pop(); } Index: ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java (working copy) @@ -29,26 +29,29 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; /** - * base class for operator graph walker - * this class takes list of starting ops and walks them one by one. it maintains list of walked - * operators (dispatchedList) and a list of operators that are discovered but not yet dispatched + * base class for operator graph walker this class takes list of starting ops + * and walks them one by one. it maintains list of walked operators + * (dispatchedList) and a list of operators that are discovered but not yet + * dispatched */ public class DefaultGraphWalker implements GraphWalker { protected Stack opStack; - private List toWalk = new ArrayList(); - private Set seenList = new HashSet(); - private HashMap retMap = new HashMap(); - private Dispatcher dispatcher; + private final List toWalk = new ArrayList(); + private final Set seenList = new HashSet(); + private final HashMap retMap = new HashMap(); + private final Dispatcher dispatcher; /** * Constructor - * @param disp dispatcher to call for each op encountered + * + * @param disp + * dispatcher to call for each op encountered */ public DefaultGraphWalker(Dispatcher disp) { - this.dispatcher = disp; + dispatcher = disp; opStack = new Stack(); - } + } /** * @return the toWalk @@ -66,8 +69,11 @@ /** * Dispatch the current operator - * @param nd node being walked - * @param ndStack stack of nodes encountered + * + * @param nd + * node being walked + * @param ndStack + * stack of nodes encountered * @throws SemanticException */ public void dispatch(Node nd, Stack ndStack) throws SemanticException { @@ -75,22 +81,24 @@ if (nd.getChildren() != null) { nodeOutputs = new Object[nd.getChildren().size()]; int i = 0; - for(Node child: nd.getChildren()) { + for (Node child : nd.getChildren()) { nodeOutputs[i++] = retMap.get(child); } } - + Object retVal = dispatcher.dispatch(nd, ndStack, nodeOutputs); retMap.put(nd, retVal); } /** * starting point for walking + * * @throws SemanticException */ - public void startWalking(Collection startNodes, HashMap nodeOutput) throws SemanticException { + public void startWalking(Collection startNodes, + HashMap nodeOutput) throws SemanticException { toWalk.addAll(startNodes); - while(toWalk.size() > 0) { + while (toWalk.size() > 0) { Node nd = toWalk.remove(0); walk(nd); if (nodeOutput != null) { @@ -101,19 +109,23 @@ /** * walk the current operator and its descendants - * @param nd current operator in the graph + * + * @param nd + * current operator in the graph * @throws SemanticException */ public void walk(Node nd) throws SemanticException { - if (opStack.empty() || nd != opStack.peek()) + if (opStack.empty() || nd != opStack.peek()) { opStack.push(nd); + } - if((nd.getChildren() == null) + if ((nd.getChildren() == null) || getDispatchedList().containsAll(nd.getChildren())) { // all children are done or no need to walk the children - if(getDispatchedList().contains(nd)) + if (getDispatchedList().contains(nd)) { // sanity check assert false; + } dispatch(nd, opStack); opStack.pop(); return; Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveKey.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveKey.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveKey.java (working copy) @@ -21,46 +21,52 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.WritableComparator; -/** HiveKey is a simple wrapper on Text which allows us to set the hashCode easily. - * hashCode is used for hadoop partitioner. - */ +/** + * HiveKey is a simple wrapper on Text which allows us to set the hashCode + * easily. hashCode is used for hadoop partitioner. + */ public class HiveKey extends BytesWritable { private static final int LENGTH_BYTES = 4; - + boolean hashCodeValid; + public HiveKey() { - hashCodeValid = false; + hashCodeValid = false; } - - protected int myHashCode; + + protected int myHashCode; + public void setHashCode(int myHashCode) { - this.hashCodeValid = true; + hashCodeValid = true; this.myHashCode = myHashCode; } + + @Override public int hashCode() { if (!hashCodeValid) { - throw new RuntimeException("Cannot get hashCode() from deserialized " + HiveKey.class); + throw new RuntimeException("Cannot get hashCode() from deserialized " + + HiveKey.class); } return myHashCode; } - /** A Comparator optimized for HiveKey. */ + /** A Comparator optimized for HiveKey. */ public static class Comparator extends WritableComparator { public Comparator() { super(HiveKey.class); } - + /** * Compare the buffers in serialized form. */ - public int compare(byte[] b1, int s1, int l1, - byte[] b2, int s2, int l2) { - return compareBytes(b1, s1+LENGTH_BYTES, l1-LENGTH_BYTES, - b2, s2+LENGTH_BYTES, l2-LENGTH_BYTES); + @Override + public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { + return compareBytes(b1, s1 + LENGTH_BYTES, l1 - LENGTH_BYTES, b2, s2 + + LENGTH_BYTES, l2 - LENGTH_BYTES); } } - + static { WritableComparator.define(HiveKey.class, new Comparator()); } Index: ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java (working copy) @@ -37,7 +37,6 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.io.CodecPool; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; @@ -221,7 +220,8 @@ * @param colValLenBuffer * each cell's length of this column's in this split */ - void setColumnLenInfo(int columnValueLen, NonSyncDataOutputBuffer colValLenBuffer, + void setColumnLenInfo(int columnValueLen, + NonSyncDataOutputBuffer colValLenBuffer, int columnUncompressedValueLen, int columnIndex) { eachColumnValueLen[columnIndex] = columnValueLen; eachColumnUncompressedValueLen[columnIndex] = columnUncompressedValueLen; @@ -239,10 +239,11 @@ eachColumnValueLen[i] = WritableUtils.readVInt(in); eachColumnUncompressedValueLen[i] = WritableUtils.readVInt(in); int bufLen = WritableUtils.readVInt(in); - if (allCellValLenBuffer[i] == null) + if (allCellValLenBuffer[i] == null) { allCellValLenBuffer[i] = new NonSyncDataOutputBuffer(); - else + } else { allCellValLenBuffer[i].reset(); + } allCellValLenBuffer[i].write(in, bufLen); } } @@ -292,12 +293,12 @@ * */ static class ValueBuffer implements Writable { - + class LazyDecompressionCallbackImpl implements LazyDecompressionCallback { - + int index = -1; int colIndex = -1; - + public LazyDecompressionCallbackImpl(int index, int colIndex) { super(); this.index = index; @@ -306,27 +307,30 @@ @Override public byte[] decompress() throws IOException { - - if (decompressedFlag[index] || codec == null) + + if (decompressedFlag[index] || codec == null) { return loadedColumnsValueBuffer[index].getData(); - + } + NonSyncDataOutputBuffer compressedData = loadedColumnsValueBuffer[index]; NonSyncDataOutputBuffer decompressedData = new NonSyncDataOutputBuffer(); decompressBuffer.reset(); DataInputStream valueIn = new DataInputStream(deflatFilter); deflatFilter.resetState(); - decompressBuffer.reset(compressedData.getData(), keyBuffer.eachColumnValueLen[colIndex]); - decompressedData.write(valueIn, keyBuffer.eachColumnUncompressedValueLen[colIndex]); + decompressBuffer.reset(compressedData.getData(), + keyBuffer.eachColumnValueLen[colIndex]); + decompressedData.write(valueIn, + keyBuffer.eachColumnUncompressedValueLen[colIndex]); loadedColumnsValueBuffer[index] = decompressedData; decompressedFlag[index] = true; return decompressedData.getData(); } } - + // used to load columns' value into memory private NonSyncDataOutputBuffer[] loadedColumnsValueBuffer = null; private boolean[] decompressedFlag = null; - private LazyDecompressionCallbackImpl[] lazyDecompressCallbackObjs = null; + private LazyDecompressionCallbackImpl[] lazyDecompressCallbackObjs = null; boolean inited = false; @@ -362,19 +366,24 @@ skippedColIDs = skippedCols; } else { skippedColIDs = new boolean[columnNumber]; - for (int i = 0; i < skippedColIDs.length; i++) + for (int i = 0; i < skippedColIDs.length; i++) { skippedColIDs[i] = false; + } } int skipped = 0; if (skippedColIDs != null) { - for (boolean currentSkip : skippedColIDs) - if (currentSkip) + for (boolean currentSkip : skippedColIDs) { + if (currentSkip) { skipped++; + } + } } - loadedColumnsValueBuffer = new NonSyncDataOutputBuffer[columnNumber - skipped]; + loadedColumnsValueBuffer = new NonSyncDataOutputBuffer[columnNumber + - skipped]; decompressedFlag = new boolean[columnNumber - skipped]; - lazyDecompressCallbackObjs = new LazyDecompressionCallbackImpl[columnNumber - skipped]; + lazyDecompressCallbackObjs = new LazyDecompressionCallbackImpl[columnNumber + - skipped]; this.codec = codec; if (codec != null) { valDecompressor = CodecPool.getDecompressor(codec); @@ -383,12 +392,14 @@ } for (int k = 0, readIndex = 0; k < columnNumber; k++) { - if (skippedColIDs[k]) + if (skippedColIDs[k]) { continue; + } loadedColumnsValueBuffer[readIndex] = new NonSyncDataOutputBuffer(); - if(codec != null) { + if (codec != null) { decompressedFlag[readIndex] = false; - lazyDecompressCallbackObjs[readIndex] = new LazyDecompressionCallbackImpl(readIndex, k); + lazyDecompressCallbackObjs[readIndex] = new LazyDecompressionCallbackImpl( + readIndex, k); } else { decompressedFlag[readIndex] = true; } @@ -396,7 +407,8 @@ } } - public void setColumnValueBuffer(NonSyncDataOutputBuffer valBuffer, int addIndex) { + public void setColumnValueBuffer(NonSyncDataOutputBuffer valBuffer, + int addIndex) { loadedColumnsValueBuffer[addIndex] = valBuffer; } @@ -420,8 +432,9 @@ NonSyncDataOutputBuffer valBuf = loadedColumnsValueBuffer[addIndex]; valBuf.reset(); valBuf.write(in, vaRowsLen); - if(codec != null) + if (codec != null) { decompressedFlag[addIndex] = false; + } addIndex++; } @@ -432,8 +445,7 @@ @Override public void write(DataOutput out) throws IOException { - for (int i = 0; i < loadedColumnsValueBuffer.length; i++) { - NonSyncDataOutputBuffer currentBuf = loadedColumnsValueBuffer[i]; + for (NonSyncDataOutputBuffer currentBuf : loadedColumnsValueBuffer) { out.write(currentBuf.getData(), 0, currentBuf.getLength()); } } @@ -443,8 +455,8 @@ } public void close() { - for (int i = 0; i < loadedColumnsValueBuffer.length; i++) { - IOUtils.closeStream(loadedColumnsValueBuffer[i]); + for (NonSyncDataOutputBuffer element : loadedColumnsValueBuffer) { + IOUtils.closeStream(element); } if (codec != null) { IOUtils.closeStream(decompressBuffer); @@ -496,7 +508,7 @@ NonSyncDataOutputBuffer[] compressionBuffer; CompressionOutputStream[] deflateFilter = null; DataOutputStream[] deflateOut = null; - private ColumnBuffer[] columnBuffers; + private final ColumnBuffer[] columnBuffers; NonSyncDataOutputBuffer keyCompressionBuffer; CompressionOutputStream keyDeflateFilter; @@ -505,7 +517,7 @@ private int columnNumber = 0; - private int[] columnValuePlainLength; + private final int[] columnValuePlainLength; KeyBuffer key = null; ValueBuffer value = null; @@ -528,7 +540,7 @@ */ int runLength = 0; int prevValueLength = -1; - + ColumnBuffer() throws IOException { columnValBuffer = new NonSyncDataOutputBuffer(); valLenBuffer = new NonSyncDataOutputBuffer(); @@ -537,17 +549,17 @@ public void append(BytesRefWritable data) throws IOException { data.writeDataTo(columnValBuffer); int currentLen = data.getLength(); - - if( prevValueLength < 0) { + + if (prevValueLength < 0) { startNewGroup(currentLen); return; } - - if(currentLen != prevValueLength) { + + if (currentLen != prevValueLength) { flushGroup(); startNewGroup(currentLen); } else { - runLength ++; + runLength++; } } @@ -563,12 +575,13 @@ prevValueLength = -1; runLength = 0; } - + public void flushGroup() throws IOException { if (prevValueLength >= 0) { WritableUtils.writeVLong(valLenBuffer, prevValueLength); - if (runLength > 0) + if (runLength > 0) { WritableUtils.writeVLong(valLenBuffer, ~runLength); + } runLength = -1; prevValueLength = -1; } @@ -645,8 +658,9 @@ RECORD_INTERVAL = conf.getInt(RECORD_INTERVAL_CONF_STR, RECORD_INTERVAL); columnNumber = conf.getInt(COLUMN_NUMBER_CONF_STR, 0); - if (metadata == null) + if (metadata == null) { metadata = new Metadata(); + } metadata.set(new Text(COLUMN_NUMBER_METADATA_STR), new Text("" + columnNumber)); @@ -766,9 +780,10 @@ */ public void append(Writable val) throws IOException { - if (!(val instanceof BytesRefArrayWritable)) + if (!(val instanceof BytesRefArrayWritable)) { throw new UnsupportedOperationException( "Currently the writer can only accept BytesRefArrayWritable"); + } BytesRefArrayWritable columns = (BytesRefArrayWritable) val; int size = columns.size(); @@ -802,7 +817,7 @@ for (int columnIndex = 0; columnIndex < columnNumber; columnIndex++) { ColumnBuffer currentBuf = columnBuffers[columnIndex]; currentBuf.flushGroup(); - + NonSyncDataOutputBuffer columnValue = currentBuf.columnValBuffer; if (isCompressed()) { @@ -829,8 +844,9 @@ } int keyLength = key.getSize(); - if (keyLength < 0) + if (keyLength < 0) { throw new IOException("negative length keys not allowed: " + key); + } // Write the record out checkAndWriteSync(); // sync @@ -865,8 +881,9 @@ } public synchronized void close() throws IOException { - if (bufferedRecords > 0) + if (bufferedRecords > 0) { flushRecords(); + } clearColumnBuffers(); if (isCompressed()) { @@ -898,25 +915,25 @@ */ public static class Reader { - private Path file; - private FSDataInputStream in; + private final Path file; + private final FSDataInputStream in; private byte version; private CompressionCodec codec = null; private Metadata metadata = null; - private byte[] sync = new byte[SYNC_HASH_SIZE]; - private byte[] syncCheck = new byte[SYNC_HASH_SIZE]; + private final byte[] sync = new byte[SYNC_HASH_SIZE]; + private final byte[] syncCheck = new byte[SYNC_HASH_SIZE]; private boolean syncSeen; - private long end; + private final long end; private int currentKeyLength; private int currentRecordLength; - private Configuration conf; + private final Configuration conf; - private ValueBuffer currentValue; + private final ValueBuffer currentValue; private boolean[] skippedColIDs = null; @@ -931,14 +948,14 @@ private int passedRowsNum = 0; private int[] columnRowReadIndex = null; - private NonSyncDataInputBuffer[] colValLenBufferReadIn; - private int[] columnRunLength; - private int[] columnPrvLength; + private final NonSyncDataInputBuffer[] colValLenBufferReadIn; + private final int[] columnRunLength; + private final int[] columnPrvLength; private boolean decompress = false; private Decompressor keyDecompressor; NonSyncDataOutputBuffer keyDecompressedData = new NonSyncDataOutputBuffer(); - + int[] prjColIDs = null; // selected column IDs /** Create a new RCFile reader. */ @@ -966,15 +983,17 @@ columnNumber = Integer.parseInt(metadata.get( new Text(COLUMN_NUMBER_METADATA_STR)).toString()); - java.util.ArrayList notSkipIDs = ColumnProjectionUtils.getReadColumnIDs(conf); + java.util.ArrayList notSkipIDs = ColumnProjectionUtils + .getReadColumnIDs(conf); skippedColIDs = new boolean[columnNumber]; if (notSkipIDs.size() > 0) { for (int i = 0; i < skippedColIDs.length; i++) { skippedColIDs[i] = true; } for (int read : notSkipIDs) { - if (read < columnNumber) + if (read < columnNumber) { skippedColIDs[read] = false; + } } } else { // TODO: if no column name is specified e.g, in select count(1) from tt; @@ -987,15 +1006,16 @@ loadColumnNum = columnNumber; if (skippedColIDs != null && skippedColIDs.length > 0) { - for (int i = 0; i < skippedColIDs.length; i++) { - if (skippedColIDs[i]) + for (boolean skippedColID : skippedColIDs) { + if (skippedColID) { loadColumnNum -= 1; + } } } // get list of selected column IDs prjColIDs = new int[loadColumnNum]; - for ( int i = 0, j = 0; i < columnNumber; ++i ) { + for (int i = 0, j = 0; i < columnNumber; ++i) { if (!skippedColIDs[i]) { prjColIDs[j++] = i; } @@ -1007,8 +1027,9 @@ columnRowReadIndex = new int[columnNumber]; for (int i = 0; i < columnNumber; i++) { columnRowReadIndex[i] = 0; - if (!skippedColIDs[i]) + if (!skippedColIDs[i]) { colValLenBufferReadIn[i] = new NonSyncDataInputBuffer(); + } columnRunLength[i] = 0; columnPrvLength[i] = -1; } @@ -1022,20 +1043,23 @@ in.readFully(versionBlock); if ((versionBlock[0] != VERSION[0]) || (versionBlock[1] != VERSION[1]) - || (versionBlock[2] != VERSION[2])) + || (versionBlock[2] != VERSION[2])) { throw new IOException(file + " not a RCFile"); + } // Set 'version' version = versionBlock[3]; - if (version > VERSION[3]) + if (version > VERSION[3]) { throw new VersionMismatchException(VERSION[3], version); + } try { Class keyCls = conf.getClassByName(Text.readString(in)); Class valCls = conf.getClassByName(Text.readString(in)); if (!keyCls.equals(KeyBuffer.class) - || !valCls.equals(ValueBuffer.class)) + || !valCls.equals(ValueBuffer.class)) { throw new IOException(file + " not a RCFile"); + } } catch (ClassNotFoundException e) { throw new IOException(file + " not a RCFile", e); } @@ -1110,8 +1134,9 @@ for (int i = 0; in.getPos() < end; i++) { int j = 0; for (; j < syncLen; j++) { - if (sync[j] != syncCheck[(i + j) % syncLen]) + if (sync[j] != syncCheck[(i + j) % syncLen]) { break; + } } if (j == syncLen) { in.seek(in.getPos() - SYNC_SIZE); // position before @@ -1160,8 +1185,9 @@ // a // sync entry in.readFully(syncCheck); // read syncCheck - if (!Arrays.equals(sync, syncCheck)) // check it + if (!Arrays.equals(sync, syncCheck)) { throw new IOException("File is corrupt!"); + } syncSeen = true; if (in.getPos() >= end) { return -1; @@ -1174,8 +1200,9 @@ } private void seekToNextKeyBuffer() throws IOException { - if (!keyInit) + if (!keyInit) { return; + } if (!currentValue.inited) { in.skip(currentRecordLength - currentKeyLength); } @@ -1220,21 +1247,22 @@ readRowsIndexInBuffer = 0; recordsNumInValBuffer = currentKey.numberRows; - for (int j = 0; j < prjColIDs.length; j++) { - int i = prjColIDs[j]; + for (int prjColID : prjColIDs) { + int i = prjColID; colValLenBufferReadIn[i].reset(currentKey.allCellValLenBuffer[i] .getData(), currentKey.allCellValLenBuffer[i].getLength()); columnRowReadIndex[i] = 0; columnRunLength[i] = 0; columnPrvLength[i] = -1; } - + return currentKeyLength; } protected void currentValueBuffer() throws IOException { - if (!keyInit) + if (!keyInit) { nextKeyBuffer(); + } currentValue.keyBuffer = currentKey; currentValue.clearColumnBuffer(); currentValue.readFields(in); @@ -1245,7 +1273,7 @@ // use this buffer to hold column's cells value length for usages in // getColumn(), instead of using colValLenBufferReadIn directly. - private NonSyncDataInputBuffer fetchColumnTempBuf = new NonSyncDataInputBuffer(); + private final NonSyncDataInputBuffer fetchColumnTempBuf = new NonSyncDataInputBuffer(); /** * Fetch all data in the buffer for a given column. This is useful for @@ -1271,8 +1299,9 @@ rest.resetValid(recordsNumInValBuffer); - if (!currentValue.inited) + if (!currentValue.inited) { currentValueBuffer(); + } int columnNextRowStart = 0; fetchColumnTempBuf.reset(currentKey.allCellValLenBuffer[columnID] @@ -1281,10 +1310,13 @@ int length = getColumnNextValueLength(columnID); BytesRefWritable currentCell = rest.get(i); - if (currentValue.decompressedFlag[columnID]) - currentCell.set(currentValue.loadedColumnsValueBuffer[columnID].getData(), columnNextRowStart, length); - else - currentCell.set(currentValue.lazyDecompressCallbackObjs[columnID], columnNextRowStart, length); + if (currentValue.decompressedFlag[columnID]) { + currentCell.set(currentValue.loadedColumnsValueBuffer[columnID] + .getData(), columnNextRowStart, length); + } else { + currentCell.set(currentValue.lazyDecompressCallbackObjs[columnID], + columnNextRowStart, length); + } columnNextRowStart = columnNextRowStart + length; } return rest; @@ -1292,8 +1324,9 @@ /** * Read in next key buffer and throw any data in current key buffer and - * current value buffer. It will influence the result of {@link - * #next(LongWritable)} and {@link #getCurrentRow(BytesRefArrayWritable)} + * current value buffer. It will influence the result of + * {@link #next(LongWritable)} and + * {@link #getCurrentRow(BytesRefArrayWritable)} * * @return whether there still has records or not * @throws IOException @@ -1349,14 +1382,15 @@ public synchronized void getCurrentRow(BytesRefArrayWritable ret) throws IOException { - if (!keyInit || rowFetched) + if (!keyInit || rowFetched) { return; + } if (!currentValue.inited) { currentValueBuffer(); - // do this only when not initialized, but we may need to find a way to - // tell the caller how to initialize the valid size - ret.resetValid(columnNumber); + // do this only when not initialized, but we may need to find a way to + // tell the caller how to initialize the valid size + ret.resetValid(columnNumber); } // we do not use BytesWritable here to avoid the byte-copy from @@ -1367,14 +1401,17 @@ BytesRefWritable ref = ret.unCheckedGet(i); - int columnCurrentRowStart = (int) columnRowReadIndex[i]; + int columnCurrentRowStart = columnRowReadIndex[i]; int length = getColumnNextValueLength(i); columnRowReadIndex[i] = columnCurrentRowStart + length; - if (currentValue.decompressedFlag[j]) - ref.set(currentValue.loadedColumnsValueBuffer[j].getData(), columnCurrentRowStart, length); - else - ref.set(currentValue.lazyDecompressCallbackObjs[j], columnCurrentRowStart, length); + if (currentValue.decompressedFlag[j]) { + ref.set(currentValue.loadedColumnsValueBuffer[j].getData(), + columnCurrentRowStart, length); + } else { + ref.set(currentValue.lazyDecompressCallbackObjs[j], + columnCurrentRowStart, length); + } } rowFetched = true; } @@ -1405,6 +1442,7 @@ } /** Returns the name of the file. */ + @Override public String toString() { return file.toString(); } @@ -1413,7 +1451,7 @@ public void close() { IOUtils.closeStream(in); currentValue.close(); - if (this.decompress) { + if (decompress) { IOUtils.closeStream(keyDecompressedData); CodecPool.returnDecompressor(keyDecompressor); } Index: ql/src/java/org/apache/hadoop/hive/ql/io/CodecPool.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/CodecPool.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/CodecPool.java (working copy) @@ -29,45 +29,43 @@ import org.apache.hadoop.io.compress.Decompressor; /** - * A global compressor/decompressor pool used to save and reuse - * (possibly native) compression/decompression codecs. + * A global compressor/decompressor pool used to save and reuse (possibly + * native) compression/decompression codecs. */ public class CodecPool { private static final Log LOG = LogFactory.getLog(CodecPool.class); - + /** - * A global compressor pool used to save the expensive + * A global compressor pool used to save the expensive * construction/destruction of (possibly native) decompression codecs. */ - private static final Map, List> compressorPool = - new HashMap, List>(); - + private static final Map, List> compressorPool = new HashMap, List>(); + /** - * A global decompressor pool used to save the expensive + * A global decompressor pool used to save the expensive * construction/destruction of (possibly native) decompression codecs. */ - private static final Map, List> decompressorPool = - new HashMap, List>(); + private static final Map, List> decompressorPool = new HashMap, List>(); private static T borrow(Map, List> pool, - Class codecClass) { + Class codecClass) { T codec = null; - + // Check if an appropriate codec is available synchronized (pool) { if (pool.containsKey(codecClass)) { List codecList = pool.get(codecClass); - + if (codecList != null) { synchronized (codecList) { if (!codecList.isEmpty()) { - codec = codecList.remove(codecList.size()-1); + codec = codecList.remove(codecList.size() - 1); } } } } } - + return codec; } @@ -86,18 +84,19 @@ } } } - + /** - * Get a {@link Compressor} for the given {@link CompressionCodec} from the + * Get a {@link Compressor} for the given {@link CompressionCodec} from the * pool or a new one. - * - * @param codec the CompressionCodec for which to get the - * Compressor - * @return Compressor for the given - * CompressionCodec from the pool or a new one + * + * @param codec + * the CompressionCodec for which to get the + * Compressor + * @return Compressor for the given CompressionCodec + * from the pool or a new one */ public static Compressor getCompressor(CompressionCodec codec) { - Compressor compressor = (Compressor) borrow(compressorPool, codec.getCompressorType()); + Compressor compressor = borrow(compressorPool, codec.getCompressorType()); if (compressor == null) { compressor = codec.createCompressor(); LOG.info("Got brand-new compressor"); @@ -106,18 +105,20 @@ } return compressor; } - + /** * Get a {@link Decompressor} for the given {@link CompressionCodec} from the * pool or a new one. - * - * @param codec the CompressionCodec for which to get the - * Decompressor - * @return Decompressor for the given + * + * @param codec + * the CompressionCodec for which to get the + * Decompressor + * @return Decompressor for the given * CompressionCodec the pool or a new one */ public static Decompressor getDecompressor(CompressionCodec codec) { - Decompressor decompressor = (Decompressor) borrow(decompressorPool, codec.getDecompressorType()); + Decompressor decompressor = borrow(decompressorPool, codec + .getDecompressorType()); if (decompressor == null) { decompressor = codec.createDecompressor(); LOG.info("Got brand-new decompressor"); @@ -126,11 +127,12 @@ } return decompressor; } - + /** * Return the {@link Compressor} to the pool. * - * @param compressor the Compressor to be returned to the pool + * @param compressor + * the Compressor to be returned to the pool */ public static void returnCompressor(Compressor compressor) { if (compressor == null) { @@ -139,12 +141,12 @@ compressor.reset(); payback(compressorPool, compressor); } - + /** * Return the {@link Decompressor} to the pool. * - * @param decompressor the Decompressor to be returned to the - * pool + * @param decompressor + * the Decompressor to be returned to the pool */ public static void returnDecompressor(Decompressor decompressor) { if (decompressor == null) { Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveIgnoreKeyTextOutputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveIgnoreKeyTextOutputFormat.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveIgnoreKeyTextOutputFormat.java (working copy) @@ -77,8 +77,8 @@ final int finalRowSeparator = rowSeparator; FileSystem fs = outPath.getFileSystem(jc); - final OutputStream outStream = Utilities.createCompressedStream(jc, - fs.create(outPath), isCompressed); + final OutputStream outStream = Utilities.createCompressedStream(jc, fs + .create(outPath), isCompressed); return new RecordWriter() { public void write(Writable r) throws IOException { if (r instanceof Text) { @@ -102,7 +102,7 @@ protected static class IgnoreKeyWriter implements org.apache.hadoop.mapred.RecordWriter { - private org.apache.hadoop.mapred.RecordWriter mWriter; + private final org.apache.hadoop.mapred.RecordWriter mWriter; public IgnoreKeyWriter(org.apache.hadoop.mapred.RecordWriter writer) { this.mWriter = writer; @@ -117,6 +117,7 @@ } } + @Override public org.apache.hadoop.mapred.RecordWriter getRecordWriter( FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { Index: ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java (working copy) @@ -17,74 +17,75 @@ */ package org.apache.hadoop.hive.ql.io; + import java.io.IOException; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.hive.ql.exec.ExecMapper; - import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.CombineHiveInputSplit; import org.apache.hadoop.hive.shims.HadoopShims.InputSplitShim; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; -public class CombineHiveRecordReader - implements RecordReader { - - private RecordReader recordReader; - - public CombineHiveRecordReader(InputSplit split, Configuration conf, - Reporter reporter, Integer partition) - throws IOException { - JobConf job = (JobConf)conf; - CombineHiveInputSplit hsplit = new CombineHiveInputSplit(job, (InputSplitShim)split); - String inputFormatClassName = hsplit.inputFormatClassName(); +public class CombineHiveRecordReader + implements RecordReader { + + private final RecordReader recordReader; + + public CombineHiveRecordReader(InputSplit split, Configuration conf, + Reporter reporter, Integer partition) throws IOException { + JobConf job = (JobConf) conf; + CombineHiveInputSplit hsplit = new CombineHiveInputSplit(job, + (InputSplitShim) split); + String inputFormatClassName = hsplit.inputFormatClassName(); Class inputFormatClass = null; try { inputFormatClass = Class.forName(inputFormatClassName); } catch (ClassNotFoundException e) { - throw new IOException ("CombineHiveRecordReader: class not found " + inputFormatClassName); + throw new IOException("CombineHiveRecordReader: class not found " + + inputFormatClassName); } - InputFormat inputFormat = CombineHiveInputFormat.getInputFormatFromCache(inputFormatClass, job); - + InputFormat inputFormat = HiveInputFormat.getInputFormatFromCache( + inputFormatClass, job); + // create a split for the given partition - FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition], - hsplit.getStartOffsets()[partition], - hsplit.getLengths()[partition], - hsplit.getLocations()); - + FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition], hsplit + .getStartOffsets()[partition], hsplit.getLengths()[partition], hsplit + .getLocations()); + this.recordReader = inputFormat.getRecordReader(fsplit, job, reporter); } - - public void close() throws IOException { - recordReader.close(); + + public void close() throws IOException { + recordReader.close(); } - - public K createKey() { - return (K)recordReader.createKey(); + + public K createKey() { + return (K) recordReader.createKey(); } - - public V createValue() { - return (V)recordReader.createValue(); + + public V createValue() { + return (V) recordReader.createValue(); } - - public long getPos() throws IOException { + + public long getPos() throws IOException { return recordReader.getPos(); } - - public float getProgress() throws IOException { + + public float getProgress() throws IOException { return recordReader.getProgress(); } - - public boolean next(K key, V value) throws IOException { - if (ExecMapper.getDone()) + + public boolean next(K key, V value) throws IOException { + if (ExecMapper.getDone()) { return false; + } return recordReader.next(key, value); } } - Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveRecordReader.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveRecordReader.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveRecordReader.java (working copy) @@ -18,44 +18,46 @@ package org.apache.hadoop.hive.ql.io; +import java.io.IOException; + import org.apache.hadoop.hive.ql.exec.ExecMapper; -import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; -import java.io.IOException; +import org.apache.hadoop.mapred.RecordReader; -public class HiveRecordReader - implements RecordReader { +public class HiveRecordReader + implements RecordReader { - private RecordReader recordReader; - public HiveRecordReader(RecordReader recordReader){ + private final RecordReader recordReader; + + public HiveRecordReader(RecordReader recordReader) { this.recordReader = recordReader; } - public void close() throws IOException { - recordReader.close(); + public void close() throws IOException { + recordReader.close(); } - public K createKey() { - return (K)recordReader.createKey(); + public K createKey() { + return (K) recordReader.createKey(); } - public V createValue() { - return (V)recordReader.createValue(); + public V createValue() { + return (V) recordReader.createValue(); } - public long getPos() throws IOException { + public long getPos() throws IOException { return recordReader.getPos(); } - public float getProgress() throws IOException { + public float getProgress() throws IOException { return recordReader.getProgress(); } - - public boolean next(K key, V value) throws IOException { - if (ExecMapper.getDone()) + + public boolean next(K key, V value) throws IOException { + if (ExecMapper.getDone()) { return false; + } return recordReader.next(key, value); } } - Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveSequenceFileOutputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveSequenceFileOutputFormat.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveSequenceFileOutputFormat.java (working copy) @@ -34,9 +34,8 @@ import org.apache.hadoop.util.Progressable; /** A {@link HiveOutputFormat} that writes {@link SequenceFile}s. */ -public class HiveSequenceFileOutputFormat extends - SequenceFileOutputFormat implements - HiveOutputFormat { +public class HiveSequenceFileOutputFormat extends SequenceFileOutputFormat + implements HiveOutputFormat { BytesWritable EMPTY_KEY = new BytesWritable(); @@ -64,8 +63,7 @@ FileSystem fs = finalOutPath.getFileSystem(jc); final SequenceFile.Writer outStream = Utilities.createSequenceWriter(jc, - fs, finalOutPath, BytesWritable.class, valueClass, - isCompressed); + fs, finalOutPath, BytesWritable.class, valueClass, isCompressed); return new RecordWriter() { public void write(Writable r) throws IOException { Index: ql/src/java/org/apache/hadoop/hive/ql/io/FlatFileInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/FlatFileInputFormat.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/FlatFileInputFormat.java (working copy) @@ -18,82 +18,85 @@ package org.apache.hadoop.hive.ql.io; +import java.io.DataInputStream; +import java.io.EOFException; import java.io.IOException; -import java.io.EOFException; import java.io.InputStream; -import java.io.DataInputStream; -import org.apache.hadoop.fs.Path; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; - +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.hadoop.io.serializer.Deserializer; +import org.apache.hadoop.io.serializer.Serialization; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.RecordReader; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configurable; - -import org.apache.hadoop.io.serializer.Serialization; -import org.apache.hadoop.io.serializer.Serializer; -import org.apache.hadoop.io.serializer.SerializationFactory; -import org.apache.hadoop.io.serializer.Deserializer; - -import org.apache.hadoop.io.compress.CompressionCodecFactory; -import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.util.ReflectionUtils; -/** An {@link org.apache.hadoop.mapred.InputFormat} for Plain files with {@link Deserializer} records */ -public class FlatFileInputFormat extends FileInputFormat> { +/** + * An {@link org.apache.hadoop.mapred.InputFormat} for Plain files with + * {@link Deserializer} records + */ +public class FlatFileInputFormat extends + FileInputFormat> { /** - * A work-around until HADOOP-1230 is fixed. - * - * Allows boolean next(k,v) to be called by reference but still allow the deserializer to create a new - * object (i.e., row) on every call to next. + * A work-around until HADOOP-1230 is fixed. + * + * Allows boolean next(k,v) to be called by reference but still allow the + * deserializer to create a new object (i.e., row) on every call to next. */ static public class RowContainer { T row; } /** - * An implementation of SerializationContext is responsible for looking up the Serialization implementation - * for the given RecordReader. Potentially based on the Configuration or some other mechanism - * - * The SerializationFactory does not give this functionality since: - * 1. Requires Serialization implementations to be specified in the Configuration a-priori (although same as setting - * a SerializationContext) - * 2. Does not lookup the actual subclass being deserialized. e.g., for Serializable does not have a way of configuring - * the actual Java class being serialized/deserialized. + * An implementation of SerializationContext is responsible for looking up the + * Serialization implementation for the given RecordReader. Potentially based + * on the Configuration or some other mechanism + * + * The SerializationFactory does not give this functionality since: 1. + * Requires Serialization implementations to be specified in the Configuration + * a-priori (although same as setting a SerializationContext) 2. Does not + * lookup the actual subclass being deserialized. e.g., for Serializable does + * not have a way of configuring the actual Java class being + * serialized/deserialized. */ static public interface SerializationContext extends Configurable { /** - * An {@link Serialization} object for objects of type S + * An {@link Serialization} object for objects of type S + * * @return a serialization object for this context */ public Serialization getSerialization() throws IOException; /** - * Produces the specific class to deserialize + * Produces the specific class to deserialize */ public Class getRealClass() throws IOException; } - + /** * The JobConf keys for the Serialization implementation */ static public final String SerializationImplKey = "mapred.input.serialization.implKey"; /** - * An implementation of {@link SerializationContext} that reads the Serialization class and - * specific subclass to be deserialized from the JobConf. - * + * An implementation of {@link SerializationContext} that reads the + * Serialization class and specific subclass to be deserialized from the + * JobConf. + * */ - static public class SerializationContextFromConf implements FlatFileInputFormat.SerializationContext { + static public class SerializationContextFromConf implements + FlatFileInputFormat.SerializationContext { /** * The JobConf keys for the Class that is being deserialized. @@ -101,57 +104,67 @@ static public final String SerializationSubclassKey = "mapred.input.serialization.subclassKey"; /** - * Implements configurable so it can use the configuration to find the right classes - * Note: ReflectionUtils will automatigically call setConf with the right configuration. + * Implements configurable so it can use the configuration to find the right + * classes Note: ReflectionUtils will automatigically call setConf with the + * right configuration. */ private Configuration conf; - public void setConf(Configuration conf) { - this.conf = conf; + public void setConf(Configuration conf) { + this.conf = conf; } - public Configuration getConf() { - return conf; + public Configuration getConf() { + return conf; } /** * @return the actual class being deserialized - * @exception does not currently throw IOException + * @exception does + * not currently throw IOException */ public Class getRealClass() throws IOException { - return (Class)conf.getClass(SerializationSubclassKey, null, Object.class); + return (Class) conf.getClass(SerializationSubclassKey, null, + Object.class); } /** * Looks up and instantiates the Serialization Object - * - * Important to note here that we are not relying on the Hadoop SerializationFactory part of the - * Serialization framework. This is because in the case of Non-Writable Objects, we cannot make any - * assumptions about the uniformity of the serialization class APIs - i.e., there may not be a "write" - * method call and a subclass may need to implement its own Serialization classes. - * The SerializationFactory currently returns the first (de)serializer that is compatible - * with the class to be deserialized; in this context, that assumption isn't necessarily true. - * + * + * Important to note here that we are not relying on the Hadoop + * SerializationFactory part of the Serialization framework. This is because + * in the case of Non-Writable Objects, we cannot make any assumptions about + * the uniformity of the serialization class APIs - i.e., there may not be a + * "write" method call and a subclass may need to implement its own + * Serialization classes. The SerializationFactory currently returns the + * first (de)serializer that is compatible with the class to be + * deserialized; in this context, that assumption isn't necessarily true. + * * @return the serialization object for this context - * @exception does not currently throw any IOException + * @exception does + * not currently throw any IOException */ public Serialization getSerialization() throws IOException { - Class> tClass = (Class>)conf.getClass(SerializationImplKey, null, Serialization.class); - return tClass == null ? null : (Serialization)ReflectionUtils.newInstance(tClass, conf); + Class> tClass = (Class>) conf.getClass( + SerializationImplKey, null, Serialization.class); + return tClass == null ? null : (Serialization) ReflectionUtils + .newInstance(tClass, conf); } } - /** - * An {@link RecordReader} for plain files with {@link Deserializer} records - * - * Reads one row at a time of type R. - * R is intended to be a base class of something such as: Record, Writable, Text, ... - * + /** + * An {@link RecordReader} for plain files with {@link Deserializer} records + * + * Reads one row at a time of type R. R is intended to be a base class of + * something such as: Record, Writable, Text, ... + * */ - public class FlatFileRecordReader implements RecordReader> { + public class FlatFileRecordReader implements + RecordReader> { /** - * An interface for a helper class for instantiating {@link Serialization} classes. + * An interface for a helper class for instantiating {@link Serialization} + * classes. */ /** * The stream in use - is fsin if not compressed, otherwise, it is dcin. @@ -179,33 +192,37 @@ private final Deserializer deserializer; /** - * Once EOF is reached, stop calling the deserializer + * Once EOF is reached, stop calling the deserializer */ private boolean isEOF; /** - * The JobConf which contains information needed to instantiate the correct Deserializer + * The JobConf which contains information needed to instantiate the correct + * Deserializer */ - private Configuration conf; + private final Configuration conf; /** - * The actual class of the row's we are deserializing, not just the base class + * The actual class of the row's we are deserializing, not just the base + * class */ - private Class realRowClass; + private final Class realRowClass; - /** - * FlatFileRecordReader constructor constructs the underlying stream (potentially decompressed) and - * creates the deserializer. - * - * @param conf the jobconf - * @param split the split for this file + * FlatFileRecordReader constructor constructs the underlying stream + * (potentially decompressed) and creates the deserializer. + * + * @param conf + * the jobconf + * @param split + * the split for this file */ - public FlatFileRecordReader(Configuration conf, - FileSplit split) throws IOException { + public FlatFileRecordReader(Configuration conf, FileSplit split) + throws IOException { final Path path = split.getPath(); FileSystem fileSys = path.getFileSystem(conf); - CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(conf); + CompressionCodecFactory compressionCodecs = new CompressionCodecFactory( + conf); final CompressionCodec codec = compressionCodecs.getCodec(path); this.conf = conf; @@ -221,28 +238,26 @@ isEOF = false; end = split.getLength(); - // Instantiate a SerializationContext which this will use to lookup the Serialization class and the + // Instantiate a SerializationContext which this will use to lookup the + // Serialization class and the // actual class being deserialized SerializationContext sinfo; - Class> sinfoClass = - (Class>)conf.getClass(SerializationContextImplKey, SerializationContextFromConf.class); + Class> sinfoClass = (Class>) conf + .getClass(SerializationContextImplKey, + SerializationContextFromConf.class); - sinfo = (SerializationContext)ReflectionUtils.newInstance(sinfoClass, conf); + sinfo = (SerializationContext) ReflectionUtils.newInstance(sinfoClass, + conf); // Get the Serialization object and the class being deserialized Serialization serialization = sinfo.getSerialization(); - realRowClass = (Class)sinfo.getRealClass(); + realRowClass = (Class) sinfo.getRealClass(); - deserializer = (Deserializer)serialization.getDeserializer((Class)realRowClass); + deserializer = serialization.getDeserializer(realRowClass); deserializer.open(in); } /** - * The actual class of the data being deserialized - */ - private Class realRowclass; - - /** * The JobConf key of the SerializationContext to use */ static public final String SerializationContextImplKey = "mapred.input.serialization.context_impl"; @@ -250,34 +265,41 @@ /** * @return null */ - public Void createKey() { + public Void createKey() { return null; } /** * @return a new R instance. */ - public RowContainer createValue() { + public RowContainer createValue() { RowContainer r = new RowContainer(); - r.row = (R)ReflectionUtils.newInstance(realRowClass, conf); + r.row = (R) ReflectionUtils.newInstance(realRowClass, conf); return r; } /** * Returns the next row # and value - * - * @param key - void as these files have a value only - * @param value - the row container which is always re-used, but the internal value may be set to a new Object - * @return whether the key and value were read. True if they were and false if EOF - * @exception IOException from the deserializer + * + * @param key + * - void as these files have a value only + * @param value + * - the row container which is always re-used, but the internal + * value may be set to a new Object + * @return whether the key and value were read. True if they were and false + * if EOF + * @exception IOException + * from the deserializer */ - public synchronized boolean next(Void key, RowContainer value) throws IOException { - if(isEOF || in.available() == 0) { + public synchronized boolean next(Void key, RowContainer value) + throws IOException { + if (isEOF || in.available() == 0) { isEOF = true; return false; } - // the deserializer is responsible for actually reading each record from the stream + // the deserializer is responsible for actually reading each record from + // the stream try { value.row = deserializer.deserialize(value.row); if (value.row == null) { @@ -285,27 +307,27 @@ return false; } return true; - } catch(EOFException e) { + } catch (EOFException e) { isEOF = true; return false; } } public synchronized float getProgress() throws IOException { - // this assumes no splitting + // this assumes no splitting if (end == 0) { return 0.0f; } else { - // gives progress over uncompressed stream + // gives progress over uncompressed stream // assumes deserializer is not buffering itself - return Math.min(1.0f, fsin.getPos()/(float)(end)); + return Math.min(1.0f, fsin.getPos() / (float) (end)); } } public synchronized long getPos() throws IOException { // assumes deserializer is not buffering itself - // position over uncompressed stream. not sure what - // effect this has on stats about job + // position over uncompressed stream. not sure what + // effect this has on stats about job return fsin.getPos(); } @@ -319,9 +341,9 @@ return false; } + @Override public RecordReader> getRecordReader(InputSplit split, - JobConf job, Reporter reporter) - throws IOException { + JobConf job, Reporter reporter) throws IOException { reporter.setStatus(split.toString()); Index: ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (working copy) @@ -18,105 +18,103 @@ package org.apache.hadoop.hive.ql.io; -import java.io.File; import java.io.DataInput; import java.io.DataOutput; +import java.io.File; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; -import java.util.HashMap; import java.util.Map; -import java.util.List; -import java.util.Iterator; -import java.util.Map.Entry; -import java.io.Serializable; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.plan.mapredWork; -import org.apache.hadoop.hive.ql.plan.tableDesc; import org.apache.hadoop.hive.ql.plan.partitionDesc; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.HadoopShims.CombineFileInputFormatShim; +import org.apache.hadoop.hive.shims.HadoopShims.InputSplitShim; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.JobConfigurable; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.FileInputFormat; -import org.apache.hadoop.hive.shims.HadoopShims.CombineFileInputFormatShim; -import org.apache.hadoop.hive.shims.HadoopShims.InputSplitShim; -import org.apache.hadoop.hive.shims.ShimLoader; - -import org.apache.hadoop.util.ReflectionUtils; - /** - * CombineHiveInputFormat is a parameterized InputFormat which looks at the path name and determine - * the correct InputFormat for that path name from mapredPlan.pathToPartitionInfo(). - * It can be used to read files with different input format in the same map-reduce job. + * CombineHiveInputFormat is a parameterized InputFormat which looks at the path + * name and determine the correct InputFormat for that path name from + * mapredPlan.pathToPartitionInfo(). It can be used to read files with different + * input format in the same map-reduce job. */ -public class CombineHiveInputFormat extends HiveInputFormat { +public class CombineHiveInputFormat + extends HiveInputFormat { - public static final Log LOG = - LogFactory.getLog("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"); + public static final Log LOG = LogFactory + .getLog("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"); /** - * CombineHiveInputSplit encapsulates an InputSplit with its corresponding inputFormatClassName. - * A CombineHiveInputSplit comprises of multiple chunks from different files. Since, they belong - * to a single directory, there is a single inputformat for all the chunks. + * CombineHiveInputSplit encapsulates an InputSplit with its corresponding + * inputFormatClassName. A CombineHiveInputSplit comprises of multiple chunks + * from different files. Since, they belong to a single directory, there is a + * single inputformat for all the chunks. */ public static class CombineHiveInputSplit implements InputSplitShim { - String inputFormatClassName; - InputSplitShim inputSplitShim; + String inputFormatClassName; + InputSplitShim inputSplitShim; public CombineHiveInputSplit() throws IOException { - this(ShimLoader.getHadoopShims().getCombineFileInputFormat().getInputSplitShim()); + this(ShimLoader.getHadoopShims().getCombineFileInputFormat() + .getInputSplitShim()); } - public CombineHiveInputSplit(InputSplitShim inputSplitShim) throws IOException { + public CombineHiveInputSplit(InputSplitShim inputSplitShim) + throws IOException { this(inputSplitShim.getJob(), inputSplitShim); } - public CombineHiveInputSplit(JobConf job, InputSplitShim inputSplitShim) throws IOException { + public CombineHiveInputSplit(JobConf job, InputSplitShim inputSplitShim) + throws IOException { this.inputSplitShim = inputSplitShim; if (job != null) { - Map pathToPartitionInfo = - Utilities.getMapRedWork(job).getPathToPartitionInfo(); + Map pathToPartitionInfo = Utilities + .getMapRedWork(job).getPathToPartitionInfo(); - // extract all the inputFormatClass names for each chunk in the CombinedSplit. + // extract all the inputFormatClass names for each chunk in the + // CombinedSplit. Path[] ipaths = inputSplitShim.getPaths(); for (int i = 0; i < ipaths.length; i++) { - partitionDesc part = null; + partitionDesc part = null; try { - part = getPartitionDescFromPath(pathToPartitionInfo, ipaths[i].getParent()); + part = getPartitionDescFromPath(pathToPartitionInfo, ipaths[i] + .getParent()); } catch (IOException e) { // The file path may be present in case of sampling - so ignore that - part = null; + part = null; } if (part == null) { try { - part = getPartitionDescFromPath(pathToPartitionInfo, ipaths[i]); + part = getPartitionDescFromPath(pathToPartitionInfo, ipaths[i]); } catch (IOException e) { - LOG.warn("CombineHiveInputSplit unable to find table description for " + - ipaths[i].getParent()); + LOG + .warn("CombineHiveInputSplit unable to find table description for " + + ipaths[i].getParent()); continue; } } - - // create a new InputFormat instance if this is the first time to see this class - if (i == 0) + + // create a new InputFormat instance if this is the first time to see + // this class + if (i == 0) { inputFormatClassName = part.getInputFileFormatClass().getName(); - else - assert inputFormatClassName.equals(part.getInputFileFormatClass().getName()); + } else { + assert inputFormatClassName.equals(part.getInputFileFormatClass() + .getName()); + } } } } @@ -124,7 +122,7 @@ public InputSplitShim getInputSplitShim() { return inputSplitShim; } - + /** * Returns the inputFormat class name for the i-th chunk */ @@ -135,58 +133,59 @@ public void setInputFormatClassName(String inputFormatClassName) { this.inputFormatClassName = inputFormatClassName; } - + public JobConf getJob() { return inputSplitShim.getJob(); } - + public long getLength() { return inputSplitShim.getLength(); } - - /** Returns an array containing the startoffsets of the files in the split*/ + + /** Returns an array containing the startoffsets of the files in the split */ public long[] getStartOffsets() { return inputSplitShim.getStartOffsets(); } - - /** Returns an array containing the lengths of the files in the split*/ + + /** Returns an array containing the lengths of the files in the split */ public long[] getLengths() { return inputSplitShim.getLengths(); } - + /** Returns the start offset of the ith Path */ public long getOffset(int i) { return inputSplitShim.getOffset(i); } - + /** Returns the length of the ith Path */ public long getLength(int i) { return inputSplitShim.getLength(i); } - + /** Returns the number of Paths in the split */ public int getNumPaths() { return inputSplitShim.getNumPaths(); } - + /** Returns the ith Path */ public Path getPath(int i) { return inputSplitShim.getPath(i); } - + /** Returns all the Paths in the split */ public Path[] getPaths() { return inputSplitShim.getPaths(); } - + /** Returns all the Paths where this input-split resides */ public String[] getLocations() throws IOException { return inputSplitShim.getLocations(); } - + /** * Prints this obejct as a string. */ + @Override public String toString() { StringBuffer sb = new StringBuffer(); sb.append(inputSplitShim.toString()); @@ -210,22 +209,27 @@ inputSplitShim.write(out); if (inputFormatClassName == null) { - Map pathToPartitionInfo = - Utilities.getMapRedWork(getJob()).getPathToPartitionInfo(); - - // extract all the inputFormatClass names for each chunk in the CombinedSplit. + Map pathToPartitionInfo = Utilities + .getMapRedWork(getJob()).getPathToPartitionInfo(); + + // extract all the inputFormatClass names for each chunk in the + // CombinedSplit. partitionDesc part = null; try { - part = getPartitionDescFromPath(pathToPartitionInfo, inputSplitShim.getPath(0).getParent()); + part = getPartitionDescFromPath(pathToPartitionInfo, inputSplitShim + .getPath(0).getParent()); } catch (IOException e) { // The file path may be present in case of sampling - so ignore that - part = null; + part = null; } - if (part == null) - part = getPartitionDescFromPath(pathToPartitionInfo, inputSplitShim.getPath(0)); + if (part == null) { + part = getPartitionDescFromPath(pathToPartitionInfo, inputSplitShim + .getPath(0)); + } - // create a new InputFormat instance if this is the first time to see this class + // create a new InputFormat instance if this is the first time to see + // this class inputFormatClassName = part.getInputFileFormatClass().getName(); } @@ -236,40 +240,45 @@ /** * Create Hive splits based on CombineFileSplit */ + @Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { init(job); - CombineFileInputFormatShim combine = ShimLoader.getHadoopShims().getCombineFileInputFormat(); + CombineFileInputFormatShim combine = ShimLoader.getHadoopShims() + .getCombineFileInputFormat(); if (combine.getInputPathsShim(job).length == 0) { throw new IOException("No input paths specified in job"); } ArrayList result = new ArrayList(); - // combine splits only from same tables. Do not combine splits from multiple tables. + // combine splits only from same tables. Do not combine splits from multiple + // tables. Path[] paths = combine.getInputPathsShim(job); - for (int i = 0; i < paths.length; i++) { - LOG.info("CombineHiveInputSplit creating pool for " + paths[i]); - combine.createPool(job, new CombineFilter(paths[i])); + for (Path path : paths) { + LOG.info("CombineHiveInputSplit creating pool for " + path); + combine.createPool(job, new CombineFilter(path)); } - InputSplitShim[] iss = (InputSplitShim[])combine.getSplits(job, 1); - for (InputSplitShim is: iss) { + InputSplitShim[] iss = combine.getSplits(job, 1); + for (InputSplitShim is : iss) { CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is); result.add(csplit); } - + LOG.info("number of splits " + result.size()); return result.toArray(new CombineHiveInputSplit[result.size()]); } /** - * Create a generic Hive RecordReader than can iterate over all chunks in - * a CombinedFileSplit + * Create a generic Hive RecordReader than can iterate over all chunks in a + * CombinedFileSplit */ - public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { - CombineHiveInputSplit hsplit = (CombineHiveInputSplit)split; + @Override + public RecordReader getRecordReader(InputSplit split, JobConf job, + Reporter reporter) throws IOException { + CombineHiveInputSplit hsplit = (CombineHiveInputSplit) split; String inputFormatClassName = null; Class inputFormatClass = null; @@ -280,33 +289,35 @@ throw new IOException("cannot find class " + inputFormatClassName); } - initColumnsNeeded(job, inputFormatClass, hsplit.getPath(0).toString(), - hsplit.getPath(0).toUri().getPath()); + initColumnsNeeded(job, inputFormatClass, hsplit.getPath(0).toString(), + hsplit.getPath(0).toUri().getPath()); - return - ShimLoader.getHadoopShims().getCombineFileInputFormat().getRecordReader(job, - ((CombineHiveInputSplit)split).getInputSplitShim(), - reporter, CombineHiveRecordReader.class); + return ShimLoader.getHadoopShims().getCombineFileInputFormat() + .getRecordReader(job, + ((CombineHiveInputSplit) split).getInputSplitShim(), reporter, + CombineHiveRecordReader.class); } protected static partitionDesc getPartitionDescFromPath( - Map pathToPartitionInfo, Path dir) throws IOException { - // The format of the keys in pathToPartitionInfo sometimes contains a port - // and sometimes doesn't, so we just compare paths. - for (Map.Entry entry : pathToPartitionInfo.entrySet()) { + Map pathToPartitionInfo, Path dir) + throws IOException { + // The format of the keys in pathToPartitionInfo sometimes contains a port + // and sometimes doesn't, so we just compare paths. + for (Map.Entry entry : pathToPartitionInfo + .entrySet()) { try { - if (new URI(entry.getKey()).getPath().equals(dir.toUri().getPath())) { + if (new URI(entry.getKey()).getPath().equals(dir.toUri().getPath())) { return entry.getValue(); } + } catch (URISyntaxException e2) { } - catch (URISyntaxException e2) {} } throw new IOException("cannot find dir = " + dir.toString() - + " in partToPartitionInfo!"); + + " in partToPartitionInfo!"); } static class CombineFilter implements PathFilter { - private String pString; + private final String pString; // store a path prefix in this TestFilter public CombineFilter(Path p) { @@ -322,6 +333,7 @@ return false; } + @Override public String toString() { return "PathFilter:" + pString; } Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (working copy) @@ -22,9 +22,7 @@ import java.io.DataOutput; import java.io.IOException; import java.io.Serializable; -import java.net.URLClassLoader; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.Map; @@ -39,52 +37,52 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.plan.mapredWork; -import org.apache.hadoop.hive.ql.plan.tableDesc; import org.apache.hadoop.hive.ql.plan.partitionDesc; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobConfigurable; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; - -import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.util.ReflectionUtils; -import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; - /** - * HiveInputFormat is a parameterized InputFormat which looks at the path name and determine - * the correct InputFormat for that path name from mapredPlan.pathToPartitionInfo(). - * It can be used to read files with different input format in the same map-reduce job. + * HiveInputFormat is a parameterized InputFormat which looks at the path name + * and determine the correct InputFormat for that path name from + * mapredPlan.pathToPartitionInfo(). It can be used to read files with different + * input format in the same map-reduce job. */ -public class HiveInputFormat implements InputFormat, JobConfigurable { +public class HiveInputFormat + implements InputFormat, JobConfigurable { - public static final Log LOG = - LogFactory.getLog("org.apache.hadoop.hive.ql.io.HiveInputFormat"); + public static final Log LOG = LogFactory + .getLog("org.apache.hadoop.hive.ql.io.HiveInputFormat"); /** - * HiveInputSplit encapsulates an InputSplit with its corresponding inputFormatClass. - * The reason that it derives from FileSplit is to make sure "map.input.file" in MapTask. + * HiveInputSplit encapsulates an InputSplit with its corresponding + * inputFormatClass. The reason that it derives from FileSplit is to make sure + * "map.input.file" in MapTask. */ - public static class HiveInputSplit extends FileSplit implements InputSplit, Configurable { + public static class HiveInputSplit extends FileSplit implements InputSplit, + Configurable { InputSplit inputSplit; - String inputFormatClassName; + String inputFormatClassName; public HiveInputSplit() { // This is the only public constructor of FileSplit - super((Path)null, 0, 0, (String[])null); + super((Path) null, 0, 0, (String[]) null); } public HiveInputSplit(InputSplit inputSplit, String inputFormatClassName) { // This is the only public constructor of FileSplit - super((Path)null, 0, 0, (String[])null); + super((Path) null, 0, 0, (String[]) null); this.inputSplit = inputSplit; this.inputFormatClassName = inputFormatClassName; } @@ -92,29 +90,34 @@ public InputSplit getInputSplit() { return inputSplit; } + public String inputFormatClassName() { return inputFormatClassName; } + @Override public Path getPath() { if (inputSplit instanceof FileSplit) { - return ((FileSplit)inputSplit).getPath(); + return ((FileSplit) inputSplit).getPath(); } return new Path(""); } /** The position of the first byte in the file to process. */ + @Override public long getStart() { if (inputSplit instanceof FileSplit) { - return ((FileSplit)inputSplit).getStart(); + return ((FileSplit) inputSplit).getStart(); } return 0; } + @Override public String toString() { return inputFormatClassName + ":" + inputSplit.toString(); } + @Override public long getLength() { long r = 0; try { @@ -125,23 +128,27 @@ return r; } + @Override public String[] getLocations() throws IOException { return inputSplit.getLocations(); } + @Override public void readFields(DataInput in) throws IOException { String inputSplitClassName = in.readUTF(); try { - inputSplit = (InputSplit) ReflectionUtils.newInstance( - conf.getClassByName(inputSplitClassName), conf); + inputSplit = (InputSplit) ReflectionUtils.newInstance(conf + .getClassByName(inputSplitClassName), conf); } catch (Exception e) { - throw new IOException("Cannot create an instance of InputSplit class = " - + inputSplitClassName + ":" + e.getMessage()); + throw new IOException( + "Cannot create an instance of InputSplit class = " + + inputSplitClassName + ":" + e.getMessage()); } inputSplit.readFields(in); inputFormatClassName = in.readUTF(); } + @Override public void write(DataOutput out) throws IOException { out.writeUTF(inputSplit.getClass().getName()); inputSplit.write(out); @@ -149,7 +156,7 @@ } Configuration conf; - + @Override public Configuration getConf() { return conf; @@ -170,19 +177,21 @@ /** * A cache of InputFormat instances. */ - private static Map> inputFormats; - static InputFormat getInputFormatFromCache(Class inputFormatClass, JobConf job) throws IOException { + private static Map> inputFormats; + + static InputFormat getInputFormatFromCache( + Class inputFormatClass, JobConf job) throws IOException { if (inputFormats == null) { inputFormats = new HashMap>(); } if (!inputFormats.containsKey(inputFormatClass)) { try { - InputFormat newInstance = - (InputFormat)ReflectionUtils.newInstance(inputFormatClass, job); + InputFormat newInstance = (InputFormat) ReflectionUtils + .newInstance(inputFormatClass, job); inputFormats.put(inputFormatClass, newInstance); } catch (Exception e) { - throw new IOException("Cannot create an instance of InputFormat class " + inputFormatClass.getName() - + " as specified in mapredWork!"); + throw new IOException("Cannot create an instance of InputFormat class " + + inputFormatClass.getName() + " as specified in mapredWork!"); } } return inputFormats.get(inputFormatClass); @@ -191,7 +200,7 @@ public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { - HiveInputSplit hsplit = (HiveInputSplit)split; + HiveInputSplit hsplit = (HiveInputSplit) split; InputSplit inputSplit = hsplit.getInputSplit(); String inputFormatClassName = null; @@ -203,14 +212,15 @@ throw new IOException("cannot find class " + inputFormatClassName); } - //clone a jobConf for setting needed columns for reading + // clone a jobConf for setting needed columns for reading JobConf cloneJobConf = new JobConf(job); - initColumnsNeeded(cloneJobConf, inputFormatClass, hsplit.getPath().toString(), - hsplit.getPath().toUri().getPath()); + initColumnsNeeded(cloneJobConf, inputFormatClass, hsplit.getPath() + .toString(), hsplit.getPath().toUri().getPath()); - InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, cloneJobConf); + InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, + cloneJobConf); return new HiveRecordReader(inputFormat.getRecordReader(inputSplit, - cloneJobConf, reporter)); + cloneJobConf, reporter)); } private Map pathToPartitionInfo; @@ -233,16 +243,17 @@ ArrayList result = new ArrayList(); // for each dir, get the InputFormat, and do getSplits. - for(Path dir: dirs) { - partitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir); - // create a new InputFormat instance if this is the first time to see this class + for (Path dir : dirs) { + partitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir); + // create a new InputFormat instance if this is the first time to see this + // class Class inputFormatClass = part.getInputFileFormatClass(); InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); FileInputFormat.setInputPaths(newjob, dir); newjob.setInputFormat(inputFormat.getClass()); - InputSplit[] iss = inputFormat.getSplits(newjob, numSplits/dirs.length); - for(InputSplit is: iss) { + InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length); + for (InputSplit is : iss) { result.add(new HiveInputSplit(is, inputFormatClass.getName())); } } @@ -260,10 +271,12 @@ JobConf newjob = new JobConf(job); // for each dir, get the InputFormat, and do validateInput. - for (Path dir: dirs) { + for (Path dir : dirs) { partitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir); - // create a new InputFormat instance if this is the first time to see this class - InputFormat inputFormat = getInputFormatFromCache(part.getInputFileFormatClass(), job); + // create a new InputFormat instance if this is the first time to see this + // class + InputFormat inputFormat = getInputFormatFromCache(part + .getInputFileFormatClass(), job); FileInputFormat.setInputPaths(newjob, dir); newjob.setInputFormat(inputFormat.getClass()); @@ -271,47 +284,53 @@ } } - protected static partitionDesc getPartitionDescFromPath(Map pathToPartitionInfo, - Path dir) throws IOException { + protected static partitionDesc getPartitionDescFromPath( + Map pathToPartitionInfo, Path dir) + throws IOException { partitionDesc partDesc = pathToPartitionInfo.get(dir.toString()); if (partDesc == null) { partDesc = pathToPartitionInfo.get(dir.toUri().getPath()); } if (partDesc == null) { - throw new IOException("cannot find dir = " + dir.toString() + " in partToPartitionInfo!"); + throw new IOException("cannot find dir = " + dir.toString() + + " in partToPartitionInfo!"); } return partDesc; } protected void initColumnsNeeded(JobConf jobConf, Class inputFormatClass, - String splitPath, String splitPathWithNoSchema) { - if (this.mrwork == null) + String splitPath, String splitPathWithNoSchema) { + if (this.mrwork == null) { init(job); + } ArrayList aliases = new ArrayList(); - Iterator>> iterator = - this.mrwork.getPathToAliases().entrySet().iterator(); + Iterator>> iterator = this.mrwork + .getPathToAliases().entrySet().iterator(); while (iterator.hasNext()) { Entry> entry = iterator.next(); String key = entry.getKey(); if (splitPath.startsWith(key) || splitPathWithNoSchema.startsWith(key)) { ArrayList list = entry.getValue(); - for (String val : list) + for (String val : list) { aliases.add(val); + } } } for (String alias : aliases) { - Operator op = this.mrwork.getAliasToWork().get(alias); + Operator op = this.mrwork.getAliasToWork().get( + alias); if (op instanceof TableScanOperator) { TableScanOperator tableScan = (TableScanOperator) op; ArrayList list = tableScan.getNeededColumnIDs(); - if (list != null) - ColumnProjectionUtils.appendReadColumnIDs(jobConf, list); - else - ColumnProjectionUtils.setFullyReadColumns(jobConf); + if (list != null) { + ColumnProjectionUtils.appendReadColumnIDs(jobConf, list); + } else { + ColumnProjectionUtils.setFullyReadColumns(jobConf); + } } } } Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (working copy) @@ -25,7 +25,6 @@ import java.util.Properties; import java.util.Set; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -45,14 +44,12 @@ import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.TextInputFormat; -import org.apache.hadoop.util.StringUtils; /** * An util class for various Hive file format tasks. - * registerOutputFormatSubstitute(Class, Class) - * getOutputFormatSubstitute(Class) are added for backward - * compatibility. They return the newly added HiveOutputFormat for the older - * ones. + * registerOutputFormatSubstitute(Class, Class) getOutputFormatSubstitute(Class) + * are added for backward compatibility. They return the newly added + * HiveOutputFormat for the older ones. * */ public class HiveFileFormatUtils { @@ -67,7 +64,7 @@ @SuppressWarnings("unchecked") private static Map, Class> outputFormatSubstituteMap; - + /** * register a substitute * @@ -88,8 +85,9 @@ @SuppressWarnings("unchecked") public synchronized static Class getOutputFormatSubstitute( Class origin) { - if (HiveOutputFormat.class.isAssignableFrom(origin)) + if (HiveOutputFormat.class.isAssignableFrom(origin)) { return (Class) origin; + } Class result = outputFormatSubstituteMap .get(origin); return result; @@ -112,11 +110,13 @@ } return defaultFinalPath; } - + static { inputFormatCheckerMap = new HashMap, Class>(); - HiveFileFormatUtils.registerInputFormatChecker(SequenceFileInputFormat.class, SequenceFileInputFormatChecker.class); - HiveFileFormatUtils.registerInputFormatChecker(RCFileInputFormat.class, RCFileInputFormat.class); + HiveFileFormatUtils.registerInputFormatChecker( + SequenceFileInputFormat.class, SequenceFileInputFormatChecker.class); + HiveFileFormatUtils.registerInputFormatChecker(RCFileInputFormat.class, + RCFileInputFormat.class); inputFormatCheckerInstanceCache = new HashMap, InputFormatChecker>(); } @@ -139,12 +139,13 @@ inputFormatCheckerMap.put(format, checker); } - /** + /** * get an InputFormatChecker for a file format. */ public synchronized static Class getInputFormatChecker( Class inputFormat) { - Class result = inputFormatCheckerMap.get(inputFormat); + Class result = inputFormatCheckerMap + .get(inputFormat); return result; } @@ -157,14 +158,15 @@ throws HiveException { if (files.size() > 0) { Class checkerCls = getInputFormatChecker(inputFormatCls); - if(checkerCls==null && inputFormatCls.isAssignableFrom(TextInputFormat.class)) { + if (checkerCls == null + && inputFormatCls.isAssignableFrom(TextInputFormat.class)) { // we get a text input format here, we can not determine a file is text // according to its content, so we can do is to test if other file // format can accept it. If one other file format can accept this file, // we treat this file as text file, although it maybe not. - return checkTextInputFormat(fs, conf, files); + return checkTextInputFormat(fs, conf, files); } - + if (checkerCls != null) { InputFormatChecker checkerInstance = inputFormatCheckerInstanceCache .get(checkerCls); @@ -190,25 +192,27 @@ .keySet(); for (Class reg : inputFormatter) { boolean result = checkInputFormat(fs, conf, reg, files); - if (result) + if (result) { return false; + } } return true; } - - + public static RecordWriter getHiveRecordWriter(JobConf jc, tableDesc tableInfo, Class outputClass, fileSinkDesc conf, Path outPath) throws HiveException { try { - HiveOutputFormat hiveOutputFormat = tableInfo.getOutputFileFormatClass().newInstance(); + HiveOutputFormat hiveOutputFormat = tableInfo + .getOutputFileFormatClass().newInstance(); boolean isCompressed = conf.getCompressed(); JobConf jc_output = jc; if (isCompressed) { jc_output = new JobConf(jc); String codecStr = conf.getCompressCodec(); if (codecStr != null && !codecStr.trim().equals("")) { - Class codec = (Class) Class.forName(codecStr); + Class codec = (Class) Class + .forName(codecStr); FileOutputFormat.setOutputCompressorClass(jc_output, codec); } String type = conf.getCompressType(); @@ -234,5 +238,5 @@ } return null; } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveOutputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveOutputFormat.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveOutputFormat.java (working copy) @@ -30,9 +30,10 @@ /** * HiveOutputFormat describes the output-specification for Hive's - * operators. It has a method {@link #getHiveRecordWriter(JobConf, Path, Class, - * boolean, Properties, Progressable)}, with various parameters used to create - * the final out file and get some specific settings. + * operators. It has a method + * {@link #getHiveRecordWriter(JobConf, Path, Class, boolean, Properties, Progressable)} + * , with various parameters used to create the final out file and get some + * specific settings. * * @see org.apache.hadoop.mapred.OutputFormat * @see RecordWriter Index: ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java (working copy) @@ -31,8 +31,9 @@ @Override public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList files) throws IOException { - if (files.size() <= 0) + if (files.size() <= 0) { return false; + } for (int fileId = 0; fileId < files.size(); fileId++) { try { SequenceFile.Reader reader = new SequenceFile.Reader(fs, files.get( Index: ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java (working copy) @@ -33,9 +33,9 @@ public class RCFileRecordReader implements RecordReader { - private Reader in; - private long start; - private long end; + private final Reader in; + private final long start; + private final long end; private boolean more = true; protected Configuration conf; @@ -47,8 +47,9 @@ this.end = split.getStart() + split.getLength(); this.conf = conf; - if (split.getStart() > in.getPosition()) + if (split.getStart() > in.getPosition()) { in.sync(split.getStart()); // sync to start + } this.start = in.getPosition(); more = start < end; @@ -76,8 +77,9 @@ @Override public boolean next(LongWritable key, BytesRefArrayWritable value) throws IOException { - if (!more) + if (!more) { return false; + } long pos = in.getPosition(); boolean hasMore = in.next(key); if (hasMore) { @@ -96,8 +98,9 @@ } protected boolean next(LongWritable key) throws IOException { - if (!more) + if (!more) { return false; + } long pos = in.getPosition(); boolean hasMore = in.next(key); if (pos >= end && in.syncSeen()) { Index: ql/src/java/org/apache/hadoop/hive/ql/io/IgnoreKeyTextOutputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/IgnoreKeyTextOutputFormat.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/IgnoreKeyTextOutputFormat.java (working copy) @@ -30,42 +30,39 @@ import org.apache.hadoop.util.Progressable; /** - * This class replaces key with null before feeding the - * to TextOutputFormat.RecordWriter. + * This class replaces key with null before feeding the to + * TextOutputFormat.RecordWriter. * * @deprecated use {@link HiveIgnoreKeyTextOutputFormat} instead} */ -public class IgnoreKeyTextOutputFormat - extends TextOutputFormat { +@Deprecated +public class IgnoreKeyTextOutputFormat + extends TextOutputFormat { - protected static class IgnoreKeyWriter - implements RecordWriter { - - private RecordWriter mWriter; - + protected static class IgnoreKeyWriter + implements RecordWriter { + + private final RecordWriter mWriter; + public IgnoreKeyWriter(RecordWriter writer) { this.mWriter = writer; } - + public synchronized void write(K key, V value) throws IOException { - this.mWriter.write(null, value); + this.mWriter.write(null, value); } public void close(Reporter reporter) throws IOException { this.mWriter.close(reporter); } } - - public RecordWriter getRecordWriter(FileSystem ignored, - JobConf job, - String name, - Progressable progress) - throws IOException { - - return new IgnoreKeyWriter(super.getRecordWriter(ignored, job, name, progress)); + + @Override + public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, + String name, Progressable progress) throws IOException { + + return new IgnoreKeyWriter(super.getRecordWriter(ignored, job, name, + progress)); } - } Index: ql/src/java/org/apache/hadoop/hive/ql/io/NonSyncDataInputBuffer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/NonSyncDataInputBuffer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/NonSyncDataInputBuffer.java (working copy) @@ -31,9 +31,10 @@ * A thread-not-safe version of Hadoop's DataInputBuffer, which removes all * synchronized modifiers. */ -public class NonSyncDataInputBuffer extends FilterInputStream implements DataInput { +public class NonSyncDataInputBuffer extends FilterInputStream implements + DataInput { - private NonSyncByteArrayInputStream buffer; + private final NonSyncByteArrayInputStream buffer; byte[] buff = new byte[16]; @@ -77,7 +78,7 @@ * * @throws IOException * If a problem occurs reading from this DataInputStream. - * + * */ @Override public final int read(byte[] buffer) throws IOException { @@ -156,8 +157,9 @@ while (offset < count) { int bytesRead = in.read(buff, offset, count - offset); - if (bytesRead == -1) + if (bytesRead == -1) { return bytesRead; + } offset += bytesRead; } return offset; @@ -464,22 +466,26 @@ int utfSize) throws UTFDataFormatException { int count = 0, s = 0, a; while (count < utfSize) { - if ((out[s] = (char) buf[offset + count++]) < '\u0080') + if ((out[s] = (char) buf[offset + count++]) < '\u0080') { s++; - else if (((a = out[s]) & 0xe0) == 0xc0) { - if (count >= utfSize) + } else if (((a = out[s]) & 0xe0) == 0xc0) { + if (count >= utfSize) { throw new UTFDataFormatException(); + } int b = buf[count++]; - if ((b & 0xC0) != 0x80) + if ((b & 0xC0) != 0x80) { throw new UTFDataFormatException(); + } out[s++] = (char) (((a & 0x1F) << 6) | (b & 0x3F)); } else if ((a & 0xf0) == 0xe0) { - if (count + 1 >= utfSize) + if (count + 1 >= utfSize) { throw new UTFDataFormatException(); + } int b = buf[count++]; int c = buf[count++]; - if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) + if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) { throw new UTFDataFormatException(); + } out[s++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F)); } else { throw new UTFDataFormatException(); Index: ql/src/java/org/apache/hadoop/hive/ql/io/NonSyncDataOutputBuffer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/NonSyncDataOutputBuffer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/NonSyncDataOutputBuffer.java (working copy) @@ -29,7 +29,7 @@ */ public class NonSyncDataOutputBuffer extends DataOutputStream { - private NonSyncByteArrayOutputStream buffer; + private final NonSyncByteArrayOutputStream buffer; /** Constructs a new empty buffer. */ public NonSyncDataOutputBuffer() { @@ -56,7 +56,7 @@ /** Resets the buffer to empty. */ public NonSyncDataOutputBuffer reset() { - this.written = 0; + written = 0; buffer.reset(); return this; } @@ -66,11 +66,13 @@ buffer.write(in, length); } + @Override public void write(int b) throws IOException { buffer.write(b); incCount(1); } + @Override public void write(byte b[], int off, int len) throws IOException { buffer.write(b, off, len); incCount(len); @@ -79,7 +81,8 @@ private void incCount(int value) { if (written + value < 0) { written = Integer.MAX_VALUE; - } else + } else { written += value; + } } } Index: ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java (working copy) @@ -30,10 +30,11 @@ */ public interface InputFormatChecker { - /** - * This method is used to validate the input files - * - */ - public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList files) throws IOException; + /** + * This method is used to validate the input files + * + */ + public boolean validateInput(FileSystem fs, HiveConf conf, + ArrayList files) throws IOException; } Index: ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java (working copy) @@ -35,12 +35,13 @@ import org.apache.hadoop.mapred.Reporter; public class RCFileInputFormat - extends FileInputFormat implements InputFormatChecker{ + extends FileInputFormat implements InputFormatChecker { public RCFileInputFormat() { setMinSplitSize(SequenceFile.SYNC_INTERVAL); } + @Override @SuppressWarnings("unchecked") public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { @@ -53,11 +54,13 @@ @Override public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList files) throws IOException { - if (files.size() <= 0) + if (files.size() <= 0) { return false; + } for (int fileId = 0; fileId < files.size(); fileId++) { try { - RCFile.Reader reader = new RCFile.Reader(fs, files.get(fileId).getPath(), conf); + RCFile.Reader reader = new RCFile.Reader(fs, files.get(fileId) + .getPath(), conf); reader.close(); } catch (IOException e) { return false; Index: ql/src/java/org/apache/hadoop/hive/ql/io/RCFileOutputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/RCFileOutputFormat.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/io/RCFileOutputFormat.java (working copy) @@ -67,6 +67,7 @@ } /** {@inheritDoc} */ + @Override public RecordWriter getRecordWriter( FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { @@ -124,10 +125,11 @@ String[] cols = null; String columns = tableProperties.getProperty("columns"); - if (columns == null || columns.trim().equals("")) + if (columns == null || columns.trim().equals("")) { cols = new String[0]; - else + } else { cols = StringUtils.split(columns, ","); + } RCFileOutputFormat.setColumnNumber(jc, cols.length); final RCFile.Writer outWriter = Utilities.createRCFileWriter(jc, FileSystem Index: ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java (working copy) @@ -23,13 +23,17 @@ /** * Implementation of the query block expression - * + * **/ public class QBExpr { private static final Log LOG = LogFactory.getLog("hive.ql.parse.QBExpr"); - public static enum Opcode { NULLOP, UNION, INTERSECT, DIFF }; + + public static enum Opcode { + NULLOP, UNION, INTERSECT, DIFF + }; + private Opcode opcode; private QBExpr qbexpr1; private QBExpr qbexpr2; @@ -49,7 +53,7 @@ } public QBExpr(QB qb) { - this.opcode = Opcode.NULLOP; + opcode = Opcode.NULLOP; this.qb = qb; } @@ -68,15 +72,15 @@ } public void setQBExpr1(QBExpr qbexpr) { - this.qbexpr1 = qbexpr; + qbexpr1 = qbexpr; } public void setQBExpr2(QBExpr qbexpr) { - this.qbexpr2 = qbexpr; + qbexpr2 = qbexpr; } public QB getQB() { - return this.qb; + return qb; } public Opcode getOpcode() { @@ -94,15 +98,14 @@ public void print(String msg) { if (opcode == Opcode.NULLOP) { LOG.info(msg + "start qb = " + qb); - qb.print(msg+" "); + qb.print(msg + " "); LOG.info(msg + "end qb = " + qb); - } - else { + } else { LOG.info(msg + "start qbexpr1 = " + qbexpr1); - qbexpr1.print(msg+" "); + qbexpr1.print(msg + " "); LOG.info(msg + "end qbexpr1 = " + qbexpr1); LOG.info(msg + "start qbexpr2 = " + qbexpr2); - qbexpr2.print(msg+" "); + qbexpr2.print(msg + " "); LOG.info(msg + "end qbexpr2 = " + qbexpr2); } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/joinType.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/joinType.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/joinType.java (working copy) @@ -18,4 +18,6 @@ package org.apache.hadoop.hive.ql.parse; -public enum joinType {INNER, LEFTOUTER, RIGHTOUTER, FULLOUTER, UNIQUE, LEFTSEMI}; +public enum joinType { + INNER, LEFTOUTER, RIGHTOUTER, FULLOUTER, UNIQUE, LEFTSEMI +}; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java (working copy) @@ -18,31 +18,31 @@ package org.apache.hadoop.hive.ql.parse; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Vector; -import java.util.List; -import java.util.ArrayList; import java.util.Map.Entry; /** * Internal representation of the join tree - * + * */ -public class QBJoinTree -{ - private String leftAlias; - private String[] rightAliases; - private String[] leftAliases; - private QBJoinTree joinSrc; - private String[] baseSrc; - private int nextTag; - private joinCond[] joinCond; - private boolean noOuterJoin; - private boolean noSemiJoin; - - // keeps track of the right-hand-side table name of the left-semi-join, and its list of join keys - private HashMap> rhsSemijoin; - +public class QBJoinTree { + private String leftAlias; + private String[] rightAliases; + private String[] leftAliases; + private QBJoinTree joinSrc; + private String[] baseSrc; + private int nextTag; + private joinCond[] joinCond; + private boolean noOuterJoin; + private boolean noSemiJoin; + + // keeps track of the right-hand-side table name of the left-semi-join, and + // its list of join keys + private final HashMap> rhsSemijoin; + // join conditions private Vector> expressions; @@ -50,24 +50,25 @@ private Vector> filters; // user asked for map-side join - private boolean mapSideJoin; - private List mapAliases; - + private boolean mapSideJoin; + private List mapAliases; + // big tables that should be streamed - private List streamAliases; + private List streamAliases; /** - * constructor + * constructor */ - public QBJoinTree() { + public QBJoinTree() { nextTag = 0; noOuterJoin = true; - noSemiJoin = true; + noSemiJoin = true; rhsSemijoin = new HashMap>(); } /** * returns left alias if any - this is used for merging later on + * * @return left alias if any */ public String getLeftAlias() { @@ -76,7 +77,9 @@ /** * set left alias for the join expression - * @param leftAlias String + * + * @param leftAlias + * String */ public void setLeftAlias(String leftAlias) { this.leftAlias = leftAlias; @@ -145,13 +148,13 @@ public void setNoOuterJoin(boolean noOuterJoin) { this.noOuterJoin = noOuterJoin; } - + public boolean getNoSemiJoin() { return noSemiJoin; } public void setNoSemiJoin(boolean semi) { - this.noSemiJoin = semi; + noSemiJoin = semi; } /** @@ -162,7 +165,8 @@ } /** - * @param filters the filters to set + * @param filters + * the filters to set */ public void setFilters(Vector> filters) { this.filters = filters; @@ -176,7 +180,8 @@ } /** - * @param mapSideJoin the mapSidejoin to set + * @param mapSideJoin + * the mapSidejoin to set */ public void setMapSideJoin(boolean mapSideJoin) { this.mapSideJoin = mapSideJoin; @@ -190,12 +195,13 @@ } /** - * @param mapAliases the mapAliases to set + * @param mapAliases + * the mapAliases to set */ public void setMapAliases(List mapAliases) { this.mapAliases = mapAliases; } - + public List getStreamAliases() { return streamAliases; } @@ -203,39 +209,43 @@ public void setStreamAliases(List streamAliases) { this.streamAliases = streamAliases; } - + /** - * Insert only a key to the semijoin table name to column names map. - * @param alias table name alias. + * Insert only a key to the semijoin table name to column names map. + * + * @param alias + * table name alias. */ public void addRHSSemijoin(String alias) { - if ( ! rhsSemijoin.containsKey(alias) ) { + if (!rhsSemijoin.containsKey(alias)) { rhsSemijoin.put(alias, null); } } - + /** * Remeber the mapping of table alias to set of columns. + * * @param alias * @param columns */ public void addRHSSemijoinColumns(String alias, ArrayList columns) { ArrayList cols = rhsSemijoin.get(alias); - if ( cols == null ) { + if (cols == null) { rhsSemijoin.put(alias, columns); } else { cols.addAll(columns); } } - + /** * Remeber the mapping of table alias to set of columns. + * * @param alias * @param columns */ public void addRHSSemijoinColumns(String alias, ASTNode column) { ArrayList cols = rhsSemijoin.get(alias); - if ( cols == null ) { + if (cols == null) { cols = new ArrayList(); cols.add(column); rhsSemijoin.put(alias, cols); @@ -243,26 +253,26 @@ cols.add(column); } } - + public ArrayList getRHSSemijoinColumns(String alias) { return rhsSemijoin.get(alias); } - + /** * Merge the rhs tables from another join tree. - * @param src the source join tree + * + * @param src + * the source join tree */ public void mergeRHSSemijoin(QBJoinTree src) { - for (Entry> e: src.rhsSemijoin.entrySet()) { + for (Entry> e : src.rhsSemijoin.entrySet()) { String key = e.getKey(); - ArrayList value = this.rhsSemijoin.get(key); - if ( value == null ) { - this.rhsSemijoin.put(key, e.getValue()); + ArrayList value = rhsSemijoin.get(key); + if (value == null) { + rhsSemijoin.put(key, e.getValue()); } else { value.addAll(e.getValue()); } } } } - - Index: ql/src/java/org/apache/hadoop/hive/ql/parse/joinCond.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/joinCond.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/joinCond.java (working copy) @@ -18,10 +18,7 @@ package org.apache.hadoop.hive.ql.parse; -import java.util.Vector; -import org.apache.hadoop.hive.ql.plan.joinDesc; - /** * Join conditions Descriptor implementation. * @@ -32,24 +29,26 @@ private joinType joinType; private boolean preserved; - public joinCond() { } + public joinCond() { + } public joinCond(int left, int right, joinType joinType) { this.left = left; this.right = right; this.joinType = joinType; } - + /** * Constructor for a UNIQUEJOIN cond * - * @param p true if table is preserved, false otherwise + * @param p + * true if table is preserved, false otherwise */ public joinCond(boolean p) { - this.joinType = org.apache.hadoop.hive.ql.parse.joinType.UNIQUE; - this.preserved = p; + joinType = org.apache.hadoop.hive.ql.parse.joinType.UNIQUE; + preserved = p; } - + /** * @return the true if table is preserved, false otherwise */ Index: ql/src/java/org/apache/hadoop/hive/ql/parse/GenMapRedWalker.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/GenMapRedWalker.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenMapRedWalker.java (working copy) @@ -32,34 +32,40 @@ /** * constructor of the walker - the dispatcher is passed - * @param disp the dispatcher to be called for each node visited + * + * @param disp + * the dispatcher to be called for each node visited */ public GenMapRedWalker(Dispatcher disp) { super(disp); } - + /** * Walk the given operator - * @param nd operator being walked + * + * @param nd + * operator being walked */ @Override public void walk(Node nd) throws SemanticException { List children = nd.getChildren(); - + // maintain the stack of operators encountered opStack.push(nd); dispatch(nd, opStack); // kids of reduce sink operator need not be traversed again - if ((children == null) || - ((nd instanceof ReduceSinkOperator) && (getDispatchedList().containsAll(children)))) { + if ((children == null) + || ((nd instanceof ReduceSinkOperator) && (getDispatchedList() + .containsAll(children)))) { opStack.pop(); return; } // move all the children to the front of queue - for (Node ch : children) + for (Node ch : children) { walk(ch); + } // done with this operator opStack.pop(); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java (working copy) @@ -18,167 +18,181 @@ package org.apache.hadoop.hive.ql.parse; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; -import org.antlr.runtime.tree.*; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * Implementation of the parse information related to a query block - * + * **/ public class QBParseInfo { - private boolean isSubQ; - private String alias; + private final boolean isSubQ; + private final String alias; private ASTNode joinExpr; private ASTNode hints; - private HashMap aliasToSrc; - private HashMap nameToDest; - private HashMap nameToSample; - private HashMap destToSelExpr; - private HashMap destToWhereExpr; - private HashMap destToGroupby; + private final HashMap aliasToSrc; + private final HashMap nameToDest; + private final HashMap nameToSample; + private final HashMap destToSelExpr; + private final HashMap destToWhereExpr; + private final HashMap destToGroupby; /** - * ClusterBy is a short name for both DistributeBy and SortBy. + * ClusterBy is a short name for both DistributeBy and SortBy. */ - private HashMap destToClusterby; + private final HashMap destToClusterby; /** - * DistributeBy controls the hashcode of the row, which determines which reducer - * the rows will go to. + * DistributeBy controls the hashcode of the row, which determines which + * reducer the rows will go to. */ - private HashMap destToDistributeby; + private final HashMap destToDistributeby; /** - * SortBy controls the reduce keys, which affects the order of rows - * that the reducer receives. + * SortBy controls the reduce keys, which affects the order of rows that the + * reducer receives. */ - private HashMap destToSortby; + private final HashMap destToSortby; /** - * Maping from table/subquery aliases to all the associated lateral view - * nodes + * Maping from table/subquery aliases to all the associated lateral view nodes */ - private HashMap> aliasToLateralViews; - + private final HashMap> aliasToLateralViews; + /* Order by clause */ - private HashMap destToOrderby; - private HashMap destToLimit; + private final HashMap destToOrderby; + private final HashMap destToLimit; private int outerQueryLimit; // used by GroupBy - private LinkedHashMap > destToAggregationExprs; - private HashMap destToDistinctFuncExpr; + private final LinkedHashMap> destToAggregationExprs; + private final HashMap destToDistinctFuncExpr; @SuppressWarnings("unused") private static final Log LOG = LogFactory.getLog(QBParseInfo.class.getName()); - + public QBParseInfo(String alias, boolean isSubQ) { - this.aliasToSrc = new HashMap(); - this.nameToDest = new HashMap(); - this.nameToSample = new HashMap(); - this.destToSelExpr = new HashMap(); - this.destToWhereExpr = new HashMap(); - this.destToGroupby = new HashMap(); - this.destToClusterby = new HashMap(); - this.destToDistributeby = new HashMap(); - this.destToSortby = new HashMap(); - this.destToOrderby = new HashMap(); - this.destToLimit = new HashMap(); - - this.destToAggregationExprs = new LinkedHashMap >(); - this.destToDistinctFuncExpr = new HashMap(); - + aliasToSrc = new HashMap(); + nameToDest = new HashMap(); + nameToSample = new HashMap(); + destToSelExpr = new HashMap(); + destToWhereExpr = new HashMap(); + destToGroupby = new HashMap(); + destToClusterby = new HashMap(); + destToDistributeby = new HashMap(); + destToSortby = new HashMap(); + destToOrderby = new HashMap(); + destToLimit = new HashMap(); + + destToAggregationExprs = new LinkedHashMap>(); + destToDistinctFuncExpr = new HashMap(); + this.alias = alias; this.isSubQ = isSubQ; - this.outerQueryLimit = -1; - - this.aliasToLateralViews = new HashMap>(); + outerQueryLimit = -1; + + aliasToLateralViews = new HashMap>(); } - public void setAggregationExprsForClause(String clause, LinkedHashMap aggregationTrees) { - this.destToAggregationExprs.put(clause, aggregationTrees); + public void setAggregationExprsForClause(String clause, + LinkedHashMap aggregationTrees) { + destToAggregationExprs.put(clause, aggregationTrees); } public HashMap getAggregationExprsForClause(String clause) { - return this.destToAggregationExprs.get(clause); + return destToAggregationExprs.get(clause); } public void setDistinctFuncExprForClause(String clause, ASTNode ast) { - this.destToDistinctFuncExpr.put(clause, ast); + destToDistinctFuncExpr.put(clause, ast); } - + public ASTNode getDistinctFuncExprForClause(String clause) { - return this.destToDistinctFuncExpr.get(clause); + return destToDistinctFuncExpr.get(clause); } - + public void setSelExprForClause(String clause, ASTNode ast) { - this.destToSelExpr.put(clause, ast); + destToSelExpr.put(clause, ast); } public void setWhrExprForClause(String clause, ASTNode ast) { - this.destToWhereExpr.put(clause, ast); + destToWhereExpr.put(clause, ast); } public void setGroupByExprForClause(String clause, ASTNode ast) { - this.destToGroupby.put(clause, ast); + destToGroupby.put(clause, ast); } public void setDestForClause(String clause, ASTNode ast) { - this.nameToDest.put(clause, ast); + nameToDest.put(clause, ast); } /** - * Set the Cluster By AST for the clause. - * @param clause the name of the clause - * @param ast the abstract syntax tree + * Set the Cluster By AST for the clause. + * + * @param clause + * the name of the clause + * @param ast + * the abstract syntax tree */ public void setClusterByExprForClause(String clause, ASTNode ast) { - this.destToClusterby.put(clause, ast); + destToClusterby.put(clause, ast); } /** - * Set the Distribute By AST for the clause. - * @param clause the name of the clause - * @param ast the abstract syntax tree + * Set the Distribute By AST for the clause. + * + * @param clause + * the name of the clause + * @param ast + * the abstract syntax tree */ public void setDistributeByExprForClause(String clause, ASTNode ast) { - this.destToDistributeby.put(clause, ast); + destToDistributeby.put(clause, ast); } /** - * Set the Sort By AST for the clause. - * @param clause the name of the clause - * @param ast the abstract syntax tree + * Set the Sort By AST for the clause. + * + * @param clause + * the name of the clause + * @param ast + * the abstract syntax tree */ public void setSortByExprForClause(String clause, ASTNode ast) { - this.destToSortby.put(clause, ast); + destToSortby.put(clause, ast); } public void setOrderByExprForClause(String clause, ASTNode ast) { - this.destToOrderby.put(clause, ast); + destToOrderby.put(clause, ast); } public void setSrcForAlias(String alias, ASTNode ast) { - this.aliasToSrc.put(alias.toLowerCase(), ast); + aliasToSrc.put(alias.toLowerCase(), ast); } public Set getClauseNames() { - return this.destToSelExpr.keySet(); + return destToSelExpr.keySet(); } public Set getClauseNamesForDest() { - return this.nameToDest.keySet(); + return nameToDest.keySet(); } public ASTNode getDestForClause(String clause) { - return this.nameToDest.get(clause); + return nameToDest.get(clause); } public ASTNode getWhrForClause(String clause) { - return this.destToWhereExpr.get(clause); + return destToWhereExpr.get(clause); } public HashMap getDestToWhereExpr() { @@ -186,77 +200,84 @@ } public ASTNode getGroupByForClause(String clause) { - return this.destToGroupby.get(clause); + return destToGroupby.get(clause); } + public HashMap getDestToGroupBy() { - return this.destToGroupby; + return destToGroupby; } - + public ASTNode getSelForClause(String clause) { - return this.destToSelExpr.get(clause); + return destToSelExpr.get(clause); } /** - * Get the Cluster By AST for the clause. - * @param clause the name of the clause + * Get the Cluster By AST for the clause. + * + * @param clause + * the name of the clause * @return the abstract syntax tree */ public ASTNode getClusterByForClause(String clause) { - return this.destToClusterby.get(clause); + return destToClusterby.get(clause); } public HashMap getDestToClusterBy() { return destToClusterby; } - + /** - * Get the Distribute By AST for the clause. - * @param clause the name of the clause + * Get the Distribute By AST for the clause. + * + * @param clause + * the name of the clause * @return the abstract syntax tree */ public ASTNode getDistributeByForClause(String clause) { - return this.destToDistributeby.get(clause); + return destToDistributeby.get(clause); } public HashMap getDestToDistributeBy() { return destToDistributeby; } - + /** - * Get the Sort By AST for the clause. - * @param clause the name of the clause + * Get the Sort By AST for the clause. + * + * @param clause + * the name of the clause * @return the abstract syntax tree */ public ASTNode getSortByForClause(String clause) { - return this.destToSortby.get(clause); + return destToSortby.get(clause); } public ASTNode getOrderByForClause(String clause) { - return this.destToOrderby.get(clause); + return destToOrderby.get(clause); } public HashMap getDestToSortBy() { return destToSortby; } - + public HashMap getDestToOrderBy() { return destToOrderby; } - + public ASTNode getSrcForAlias(String alias) { - return this.aliasToSrc.get(alias.toLowerCase()); + return aliasToSrc.get(alias.toLowerCase()); } public String getAlias() { - return this.alias; + return alias; } public boolean getIsSubQ() { - return this.isSubQ; + return isSubQ; } public ASTNode getJoinExpr() { - return this.joinExpr; + return joinExpr; } public void setJoinExpr(ASTNode joinExpr) { @@ -264,81 +285,87 @@ } public TableSample getTabSample(String alias) { - return this.nameToSample.get(alias.toLowerCase()); + return nameToSample.get(alias.toLowerCase()); } - + public void setTabSample(String alias, TableSample tableSample) { - this.nameToSample.put(alias.toLowerCase(), tableSample); + nameToSample.put(alias.toLowerCase(), tableSample); } public void setDestLimit(String dest, Integer limit) { - this.destToLimit.put(dest, limit); + destToLimit.put(dest, limit); } public Integer getDestLimit(String dest) { - return this.destToLimit.get(dest); + return destToLimit.get(dest); } - /** - * @return the outerQueryLimit - */ - public int getOuterQueryLimit() { - return outerQueryLimit; - } + /** + * @return the outerQueryLimit + */ + public int getOuterQueryLimit() { + return outerQueryLimit; + } - /** - * @param outerQueryLimit the outerQueryLimit to set - */ - public void setOuterQueryLimit(int outerQueryLimit) { - this.outerQueryLimit = outerQueryLimit; - } + /** + * @param outerQueryLimit + * the outerQueryLimit to set + */ + public void setOuterQueryLimit(int outerQueryLimit) { + this.outerQueryLimit = outerQueryLimit; + } public boolean isSelectStarQuery() { - if (isSubQ || - (joinExpr != null) || - (!nameToSample.isEmpty()) || - (!destToGroupby.isEmpty()) || - (!destToClusterby.isEmpty()) || - (!aliasToLateralViews.isEmpty())) + if (isSubQ || (joinExpr != null) || (!nameToSample.isEmpty()) + || (!destToGroupby.isEmpty()) || (!destToClusterby.isEmpty()) + || (!aliasToLateralViews.isEmpty())) { return false; - - Iterator>> aggrIter = destToAggregationExprs.entrySet().iterator(); + } + + Iterator>> aggrIter = destToAggregationExprs + .entrySet().iterator(); while (aggrIter.hasNext()) { HashMap h = aggrIter.next().getValue(); - if ((h != null) && (!h.isEmpty())) + if ((h != null) && (!h.isEmpty())) { return false; + } } - + if (!destToDistinctFuncExpr.isEmpty()) { - Iterator> distn = destToDistinctFuncExpr.entrySet().iterator(); + Iterator> distn = destToDistinctFuncExpr + .entrySet().iterator(); while (distn.hasNext()) { ASTNode ct = distn.next().getValue(); - if (ct != null) + if (ct != null) { return false; + } } } - - Iterator> iter = nameToDest.entrySet().iterator(); + + Iterator> iter = nameToDest.entrySet() + .iterator(); while (iter.hasNext()) { Map.Entry entry = iter.next(); ASTNode v = entry.getValue(); - if (!(((ASTNode)v.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) + if (!(((ASTNode) v.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) { return false; + } } - + iter = destToSelExpr.entrySet().iterator(); while (iter.hasNext()) { Map.Entry entry = iter.next(); ASTNode selExprList = entry.getValue(); // Iterate over the selects for (int i = 0; i < selExprList.getChildCount(); ++i) { - + // list of the columns ASTNode selExpr = (ASTNode) selExprList.getChild(i); - ASTNode sel = (ASTNode)selExpr.getChild(0); - - if (sel.getToken().getType() != HiveParser.TOK_ALLCOLREF) + ASTNode sel = (ASTNode) selExpr.getChild(0); + + if (sel.getToken().getType() != HiveParser.TOK_ALLCOLREF) { return false; + } } } @@ -346,20 +373,21 @@ } public void setHints(ASTNode hint) { - this.hints = hint; + hints = hint; } public ASTNode getHints() { return hints; } - + public Map> getAliasToLateralViews() { - return this.aliasToLateralViews; + return aliasToLateralViews; } + public List getLateralViewsForAlias(String alias) { return aliasToLateralViews.get(alias.toLowerCase()); } - + public void addLateralViewForAlias(String alias, ASTNode lateralView) { String lowerAlias = alias.toLowerCase(); ArrayList lateralViews = aliasToLateralViews.get(lowerAlias); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java (working copy) @@ -48,35 +48,39 @@ super(conf); } - public static FileStatus [] matchFilesOrDir(FileSystem fs, Path path) throws IOException { - FileStatus [] srcs = fs.globStatus(path); - if((srcs != null) && srcs.length == 1) { - if(srcs[0].isDir()) { + public static FileStatus[] matchFilesOrDir(FileSystem fs, Path path) + throws IOException { + FileStatus[] srcs = fs.globStatus(path); + if ((srcs != null) && srcs.length == 1) { + if (srcs[0].isDir()) { srcs = fs.listStatus(srcs[0].getPath()); } } return (srcs); } - private URI initializeFromURI(String fromPath) throws IOException, URISyntaxException { + private URI initializeFromURI(String fromPath) throws IOException, + URISyntaxException { URI fromURI = new Path(fromPath).toUri(); String fromScheme = fromURI.getScheme(); String fromAuthority = fromURI.getAuthority(); String path = fromURI.getPath(); - // generate absolute path relative to current directory or hdfs home directory - if(!path.startsWith("/")) { - if(isLocal) { + // generate absolute path relative to current directory or hdfs home + // directory + if (!path.startsWith("/")) { + if (isLocal) { path = new Path(System.getProperty("user.dir"), path).toString(); } else { - path = new Path(new Path("/user/"+System.getProperty("user.name")), path).toString(); + path = new Path(new Path("/user/" + System.getProperty("user.name")), + path).toString(); } } // set correct scheme and authority - if(StringUtils.isEmpty(fromScheme)) { - if(isLocal) { + if (StringUtils.isEmpty(fromScheme)) { + if (isLocal) { // file for local fromScheme = "file"; } else { @@ -88,7 +92,7 @@ } // if scheme is specified but not authority then use the default authority - if(fromScheme.equals("hdfs") && StringUtils.isEmpty(fromAuthority)) { + if (fromScheme.equals("hdfs") && StringUtils.isEmpty(fromAuthority)) { URI defaultURI = FileSystem.get(conf).getUri(); fromAuthority = defaultURI.getAuthority(); } @@ -97,51 +101,53 @@ return new URI(fromScheme, fromAuthority, path, null, null); } - - private void applyConstraints(URI fromURI, URI toURI, Tree ast, boolean isLocal) throws SemanticException { - if(!fromURI.getScheme().equals("file") && - !fromURI.getScheme().equals("hdfs")) { - throw new SemanticException (ErrorMsg.INVALID_PATH.getMsg(ast, "only \"file\" or \"hdfs\" file systems accepted")); + private void applyConstraints(URI fromURI, URI toURI, Tree ast, + boolean isLocal) throws SemanticException { + if (!fromURI.getScheme().equals("file") + && !fromURI.getScheme().equals("hdfs")) { + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, + "only \"file\" or \"hdfs\" file systems accepted")); } // local mode implies that scheme should be "file" // we can change this going forward - if(isLocal && !fromURI.getScheme().equals("file")) { - throw new SemanticException (ErrorMsg.ILLEGAL_PATH.getMsg(ast, "Source file system should be \"file\" if \"local\" is specified")); + if (isLocal && !fromURI.getScheme().equals("file")) { + throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast, + "Source file system should be \"file\" if \"local\" is specified")); } try { - FileStatus [] srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), - new Path(fromURI.getScheme(), - fromURI.getAuthority(), - fromURI.getPath())); + FileStatus[] srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), + new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI + .getPath())); - if(srcs == null || srcs.length == 0) { - throw new SemanticException (ErrorMsg.INVALID_PATH.getMsg(ast, "No files matching path " + fromURI)); + if (srcs == null || srcs.length == 0) { + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, + "No files matching path " + fromURI)); } - - for(FileStatus oneSrc: srcs) { - if(oneSrc.isDir()) { - throw new SemanticException - (ErrorMsg.INVALID_PATH.getMsg(ast, - "source contains directory: " + oneSrc.getPath().toString())); + for (FileStatus oneSrc : srcs) { + if (oneSrc.isDir()) { + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, + "source contains directory: " + oneSrc.getPath().toString())); } } } catch (IOException e) { - // Has to use full name to make sure it does not conflict with org.apache.commons.lang.StringUtils - throw new SemanticException (ErrorMsg.INVALID_PATH.getMsg(ast), e); + // Has to use full name to make sure it does not conflict with + // org.apache.commons.lang.StringUtils + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast), e); } - // only in 'local' mode do we copy stuff from one place to another. // reject different scheme/authority in other cases. - if(!isLocal && (!StringUtils.equals(fromURI.getScheme(), toURI.getScheme()) || - !StringUtils.equals(fromURI.getAuthority(), toURI.getAuthority()))) { - String reason = "Move from: " + fromURI.toString() + " to: " + toURI.toString() + " is not valid. " + - "Please check that values for params \"default.fs.name\" and " + - "\"hive.metastore.warehouse.dir\" do not conflict."; - throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast, reason)) ; + if (!isLocal + && (!StringUtils.equals(fromURI.getScheme(), toURI.getScheme()) || !StringUtils + .equals(fromURI.getAuthority(), toURI.getAuthority()))) { + String reason = "Move from: " + fromURI.toString() + " to: " + + toURI.toString() + " is not valid. " + + "Please check that values for params \"default.fs.name\" and " + + "\"hive.metastore.warehouse.dir\" do not conflict."; + throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast, reason)); } } @@ -151,12 +157,12 @@ Tree from_t = ast.getChild(0); Tree table_t = ast.getChild(1); - if(ast.getChildCount() == 4) { + if (ast.getChildCount() == 4) { isOverWrite = isLocal = true; } - if(ast.getChildCount() == 3) { - if(ast.getChild(2).getText().toLowerCase().equals("local")) { + if (ast.getChildCount() == 3) { + if (ast.getChild(2).getText().toLowerCase().equals("local")) { isLocal = true; } else { isOverWrite = true; @@ -169,9 +175,11 @@ String fromPath = stripQuotes(from_t.getText()); fromURI = initializeFromURI(fromPath); } catch (IOException e) { - throw new SemanticException (ErrorMsg.INVALID_PATH.getMsg(from_t, e.getMessage()), e); + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(from_t, e + .getMessage()), e); } catch (URISyntaxException e) { - throw new SemanticException (ErrorMsg.INVALID_PATH.getMsg(from_t, e.getMessage()), e); + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(from_t, e + .getMessage()), e); } // initialize destination table/partition @@ -180,7 +188,8 @@ if (ts.tableHandle.isView()) { throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg()); } - URI toURI = (ts.partHandle != null) ? ts.partHandle.getDataLocation() : ts.tableHandle.getDataLocation(); + URI toURI = (ts.partHandle != null) ? ts.partHandle.getDataLocation() + : ts.tableHandle.getDataLocation(); List parts = ts.tableHandle.getTTable().getPartitionKeys(); if (isOverWrite && (parts != null && parts.size() > 0) @@ -194,28 +203,32 @@ Task rTask = null; // create copy work - if(isLocal) { - // if the local keyword is specified - we will always make a copy. this might seem redundant in the case - // that the hive warehouse is also located in the local file system - but that's just a test case. + if (isLocal) { + // if the local keyword is specified - we will always make a copy. this + // might seem redundant in the case + // that the hive warehouse is also located in the local file system - but + // that's just a test case. String copyURIStr = ctx.getExternalTmpFileURI(toURI); URI copyURI = URI.create(copyURIStr); - rTask = TaskFactory.get(new copyWork(fromURI.toString(), copyURIStr), this.conf); + rTask = TaskFactory.get(new copyWork(fromURI.toString(), copyURIStr), + conf); fromURI = copyURI; } // create final load/move work String loadTmpPath = ctx.getExternalTmpFileURI(toURI); - loadTableDesc loadTableWork = new loadTableDesc(fromURI.toString(), loadTmpPath, - Utilities.getTableDesc(ts.tableHandle), - (ts.partSpec != null) ? ts.partSpec : - new HashMap (), - isOverWrite); + loadTableDesc loadTableWork = new loadTableDesc(fromURI.toString(), + loadTmpPath, Utilities.getTableDesc(ts.tableHandle), + (ts.partSpec != null) ? ts.partSpec : new HashMap(), + isOverWrite); - if(rTask != null) { - rTask.addDependentTask(TaskFactory.get(new moveWork(getInputs(), getOutputs(), loadTableWork, null, true), this.conf)); + if (rTask != null) { + rTask.addDependentTask(TaskFactory.get(new moveWork(getInputs(), + getOutputs(), loadTableWork, null, true), conf)); } else { - rTask = TaskFactory.get(new moveWork(getInputs(), getOutputs(), loadTableWork, null, true), this.conf); + rTask = TaskFactory.get(new moveWork(getInputs(), getOutputs(), + loadTableWork, null, true), conf); } rootTasks.add(rTask); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/PrintOpTreeProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/PrintOpTreeProcessor.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PrintOpTreeProcessor.java (working copy) @@ -29,26 +29,27 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; public class PrintOpTreeProcessor implements NodeProcessor { - - private PrintStream out; - private HashMap, Integer> opMap = new HashMap, Integer>(); + + private final PrintStream out; + private final HashMap, Integer> opMap = new HashMap, Integer>(); private Integer curNum = 0; public PrintOpTreeProcessor() { out = System.out; } - + public PrintOpTreeProcessor(PrintStream o) { out = o; } - + private String getParents(Operator op) { StringBuilder ret = new StringBuilder("["); boolean first = true; - if(op.getParentOperators() != null) { - for(Operator parent : op.getParentOperators()) { - if(!first) + if (op.getParentOperators() != null) { + for (Operator parent : op.getParentOperators()) { + if (!first) { ret.append(","); + } ret.append(opMap.get(parent)); first = false; } @@ -56,14 +57,15 @@ ret.append("]"); return ret.toString(); } - + private String getChildren(Operator op) { StringBuilder ret = new StringBuilder("["); boolean first = true; - if(op.getChildOperators() != null) { - for(Operator child : op.getChildOperators()) { - if(!first) + if (op.getChildOperators() != null) { + for (Operator child : op.getChildOperators()) { + if (!first) { ret.append(","); + } ret.append(opMap.get(child)); first = false; } @@ -71,14 +73,16 @@ ret.append("]"); return ret.toString(); } - - public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { - Operator op = (Operator)nd; + + public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, + Object... nodeOutputs) throws SemanticException { + Operator op = (Operator) nd; if (opMap.get(op) == null) { opMap.put(op, curNum++); } - out.println("[" + opMap.get(op) + "] " + op.getClass().getName() + " =p=> " + getParents(op) + " =c=> " + getChildren(op)); - if(op.getConf() == null) { + out.println("[" + opMap.get(op) + "] " + op.getClass().getName() + " =p=> " + + getParents(op) + " =c=> " + getChildren(op)); + if (op.getConf() == null) { return null; } return null; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java (working copy) @@ -21,39 +21,39 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.plan.FunctionWork; import org.apache.hadoop.hive.ql.plan.createFunctionDesc; import org.apache.hadoop.hive.ql.plan.dropFunctionDesc; public class FunctionSemanticAnalyzer extends BaseSemanticAnalyzer { - private static final Log LOG = - LogFactory.getLog("hive.ql.parse.FunctionSemanticAnalyzer"); - + private static final Log LOG = LogFactory + .getLog("hive.ql.parse.FunctionSemanticAnalyzer"); + public FunctionSemanticAnalyzer(HiveConf conf) throws SemanticException { super(conf); } - + + @Override public void analyzeInternal(ASTNode ast) throws SemanticException { - if (ast.getToken().getType() == HiveParser.TOK_CREATEFUNCTION) + if (ast.getToken().getType() == HiveParser.TOK_CREATEFUNCTION) { analyzeCreateFunction(ast); - if (ast.getToken().getType() == HiveParser.TOK_DROPFUNCTION) + } + if (ast.getToken().getType() == HiveParser.TOK_DROPFUNCTION) { analyzeDropFunction(ast); + } LOG.info("analyze done"); } - - private void analyzeCreateFunction(ASTNode ast) - throws SemanticException { + + private void analyzeCreateFunction(ASTNode ast) throws SemanticException { String functionName = ast.getChild(0).getText(); String className = unescapeSQLString(ast.getChild(1).getText()); createFunctionDesc desc = new createFunctionDesc(functionName, className); rootTasks.add(TaskFactory.get(new FunctionWork(desc), conf)); } - - private void analyzeDropFunction(ASTNode ast) - throws SemanticException { + + private void analyzeDropFunction(ASTNode ast) throws SemanticException { String functionName = ast.getChild(0).getText(); dropFunctionDesc desc = new dropFunctionDesc(functionName); rootTasks.add(TaskFactory.get(new FunctionWork(desc), conf)); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/InputSignature.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/InputSignature.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/InputSignature.java (working copy) @@ -19,47 +19,48 @@ package org.apache.hadoop.hive.ql.parse; import java.util.ArrayList; -import java.lang.Class; -import java.lang.Object; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /** - * The input signature of a function or operator. The signature basically consists - * of name, list of parameter types. - * + * The input signature of a function or operator. The signature basically + * consists of name, list of parameter types. + * **/ public class InputSignature { - private String name; - private ArrayList typeArray; + private final String name; + private final ArrayList typeArray; @SuppressWarnings("unused") - private static final Log LOG = LogFactory.getLog(InputSignature.class.getName()); + private static final Log LOG = LogFactory.getLog(InputSignature.class + .getName()); public InputSignature(String name) { this.name = name; typeArray = new ArrayList(); } - public InputSignature(String name, TypeInfo ... classList) { + public InputSignature(String name, TypeInfo... classList) { this(name); - + if (classList.length != 0) { - for(TypeInfo cl: classList) { + for (TypeInfo cl : classList) { typeArray.add(cl); } } } - public InputSignature(String name, Class ... classList) { + public InputSignature(String name, Class... classList) { this(name); - + if (classList.length != 0) { - for(Class cl: classList) { - typeArray.add(TypeInfoFactory.getPrimitiveTypeInfoFromPrimitiveWritable(cl)); + for (Class cl : classList) { + typeArray.add(TypeInfoFactory + .getPrimitiveTypeInfoFromPrimitiveWritable(cl)); } } } @@ -76,6 +77,7 @@ return typeArray; } + @Override public boolean equals(Object obj) { if (obj == null) { return false; @@ -83,9 +85,8 @@ InputSignature other = null; try { - other = (InputSignature)obj; - } - catch (ClassCastException cce) { + other = (InputSignature) obj; + } catch (ClassCastException cce) { return false; } @@ -93,16 +94,18 @@ && (other.typeArray.equals(typeArray)); } + @Override public int hashCode() { return toString().hashCode(); } + @Override public String toString() { StringBuffer sb = new StringBuffer(); sb.append(getName()); sb.append("("); boolean isfirst = true; - for(TypeInfo cls: getTypeArray()) { + for (TypeInfo cls : getTypeArray()) { if (!isfirst) { sb.append(","); } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java (working copy) @@ -21,16 +21,24 @@ import java.util.ArrayList; import java.util.HashMap; -import org.antlr.runtime.*; -import org.antlr.runtime.tree.*; - +import org.antlr.runtime.ANTLRStringStream; +import org.antlr.runtime.BitSet; +import org.antlr.runtime.CharStream; +import org.antlr.runtime.IntStream; +import org.antlr.runtime.MismatchedTokenException; +import org.antlr.runtime.NoViableAltException; +import org.antlr.runtime.RecognitionException; +import org.antlr.runtime.Token; +import org.antlr.runtime.TokenRewriteStream; +import org.antlr.runtime.TokenStream; +import org.antlr.runtime.tree.CommonTreeAdaptor; +import org.antlr.runtime.tree.TreeAdaptor; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - import org.apache.hadoop.hive.ql.Context; public class ParseDriver { - + static final private Log LOG = LogFactory.getLog("hive.ql.parse.ParseDriver"); private static HashMap xlateMap; @@ -148,25 +156,25 @@ xlateMap.put("KW_PROPERTIES", "TBLPROPERTIES"); xlateMap.put("KW_VALUE_TYPE", "$VALUE$"); xlateMap.put("KW_ELEM_TYPE", "$ELEM$"); - + // Operators xlateMap.put("DOT", "."); xlateMap.put("COLON", ":"); xlateMap.put("COMMA", ","); xlateMap.put("SEMICOLON", ");"); - + xlateMap.put("LPAREN", "("); xlateMap.put("RPAREN", ")"); xlateMap.put("LSQUARE", "["); xlateMap.put("RSQUARE", "]"); - + xlateMap.put("EQUAL", "="); xlateMap.put("NOTEQUAL", "<>"); xlateMap.put("LESSTHANOREQUALTO", "<="); xlateMap.put("LESSTHAN", "<"); xlateMap.put("GREATERTHANOREQUALTO", ">="); xlateMap.put("GREATERTHAN", ">"); - + xlateMap.put("DIVIDE", "/"); xlateMap.put("PLUS", "+"); xlateMap.put("MINUS", "-"); @@ -180,49 +188,58 @@ } private static String xlate(String name) { - + String ret = xlateMap.get(name); if (ret == null) { ret = name; } - + return ret; } - // This class provides and implementation for a case insensitive token checker for - // the lexical analysis part of antlr. By converting the token stream into upper case - // at the time when lexical rules are checked, this class ensures that the lexical rules - // need to just match the token with upper case letters as opposed to combination of upper - // case and lower case characteres. This is purely used for matching lexical rules. The - // actual token text is stored in the same way as the user input without actually converting - // it into an upper case. The token values are generated by the consume() function of the - // super class ANTLRStringStream. The LA() function is the lookahead funtion and is purely - // used for matching lexical rules. This also means that the grammar will only accept - // capitalized tokens in case it is run from other tools like antlrworks which do not + // This class provides and implementation for a case insensitive token checker + // for + // the lexical analysis part of antlr. By converting the token stream into + // upper case + // at the time when lexical rules are checked, this class ensures that the + // lexical rules + // need to just match the token with upper case letters as opposed to + // combination of upper + // case and lower case characteres. This is purely used for matching lexical + // rules. The + // actual token text is stored in the same way as the user input without + // actually converting + // it into an upper case. The token values are generated by the consume() + // function of the + // super class ANTLRStringStream. The LA() function is the lookahead funtion + // and is purely + // used for matching lexical rules. This also means that the grammar will only + // accept + // capitalized tokens in case it is run from other tools like antlrworks which + // do not // have the ANTLRNoCaseStringStream implementation. - public class ANTLRNoCaseStringStream extends ANTLRStringStream { + public class ANTLRNoCaseStringStream extends ANTLRStringStream { public ANTLRNoCaseStringStream(String input) { super(input); } - + public int LA(int i) { int returnChar = super.LA(i); - if(returnChar == CharStream.EOF) { - return returnChar; - } - else if(returnChar == 0) { + if (returnChar == CharStream.EOF) { return returnChar; + } else if (returnChar == 0) { + return returnChar; } - - return Character.toUpperCase((char)returnChar); + + return Character.toUpperCase((char) returnChar); } } public class HiveLexerX extends HiveLexer { - private ArrayList errors; + private final ArrayList errors; public HiveLexerX() { super(); @@ -235,7 +252,7 @@ } public void displayRecognitionError(String[] tokenNames, - RecognitionException e) { + RecognitionException e) { errors.add(new ParseError(this, e, tokenNames)); } @@ -245,13 +262,13 @@ if (e instanceof NoViableAltException) { @SuppressWarnings("unused") - NoViableAltException nvae = (NoViableAltException)e; - // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" + NoViableAltException nvae = (NoViableAltException) e; + // for development, can add + // "decision=<<"+nvae.grammarDecisionDescription+">>" // and "(decision="+nvae.decisionNumber+") and // "state "+nvae.stateNumber msg = "character " + getCharErrorDisplay(e.c) + " not supported here"; - } - else { + } else { msg = super.getErrorMessage(e, tokenNames); } @@ -266,29 +283,26 @@ public class HiveParserX extends HiveParser { - private ArrayList errors; + private final ArrayList errors; public HiveParserX(TokenStream input) { super(input); errors = new ArrayList(); } - protected void mismatch(IntStream input, int ttype, BitSet follow) - throws RecognitionException { + protected void mismatch(IntStream input, int ttype, BitSet follow) + throws RecognitionException { throw new MismatchedTokenException(ttype, input); } public void recoverFromMismatchedSet(IntStream input, - RecognitionException re, - BitSet follow) - throws RecognitionException - { + RecognitionException re, BitSet follow) throws RecognitionException { throw re; } public void displayRecognitionError(String[] tokenNames, - RecognitionException e) { + RecognitionException e) { errors.add(new ParseError(this, e, tokenNames)); } @@ -298,25 +312,25 @@ // Transalate the token names to something that the user can understand String[] xlateNames = new String[tokenNames.length]; - for(int i=0; i>" + NoViableAltException nvae = (NoViableAltException) e; + // for development, can add + // "decision=<<"+nvae.grammarDecisionDescription+">>" // and "(decision="+nvae.decisionNumber+") and // "state "+nvae.stateNumber msg = "cannot recognize input " + getTokenErrorDisplay(e.token); - } - else { + } else { msg = super.getErrorMessage(e, xlateNames); } - if(msgs.size() > 0) { - msg = msg + " in " + msgs.peek(); - } + if (msgs.size() > 0) { + msg = msg + " in " + msgs.peek(); + } return msg; } @@ -333,10 +347,11 @@ */ static final TreeAdaptor adaptor = new CommonTreeAdaptor() { /** - * Creates an ASTNode for the given token. The ASTNode is a wrapper around antlr's - * CommonTree class that implements the Node interface. + * Creates an ASTNode for the given token. The ASTNode is a wrapper around + * antlr's CommonTree class that implements the Node interface. * - * @param payload The token. + * @param payload + * The token. * @return Object (which is actually an ASTNode) for the token. */ @Override @@ -344,26 +359,28 @@ return new ASTNode(payload); } }; - + public ASTNode parse(String command) throws ParseException { return parse(command, null); } /** - * Parses a command, optionally assigning the parser's token stream to - * the given context. - * - * @param command command to parse - * - * @param ctx context with which to associate this parser's - * token stream, or null if either no context is available - * or the context already has an existing stream - * + * Parses a command, optionally assigning the parser's token stream to the + * given context. + * + * @param command + * command to parse + * + * @param ctx + * context with which to associate this parser's token stream, or + * null if either no context is available or the context already has + * an existing stream + * * @return parsed AST */ public ASTNode parse(String command, Context ctx) throws ParseException { LOG.info("Parsing command: " + command); - + HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command)); TokenRewriteStream tokens = new TokenRewriteStream(lexer); if (ctx != null) { @@ -378,17 +395,14 @@ throw new ParseException(parser.getErrors()); } - if (lexer.getErrors().size() == 0 && - parser.getErrors().size() == 0) { + if (lexer.getErrors().size() == 0 && parser.getErrors().size() == 0) { LOG.info("Parse Completed"); - } - else if (lexer.getErrors().size() != 0) { + } else if (lexer.getErrors().size() != 0) { throw new ParseException(lexer.getErrors()); } else { throw new ParseException(parser.getErrors()); } - - return (ASTNode)r.getTree(); + + return (ASTNode) r.getTree(); } } - Index: ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java (working copy) @@ -18,14 +18,13 @@ package org.apache.hadoop.hive.ql.parse; -import org.antlr.runtime.TokenRewriteStream; - -import org.apache.hadoop.hive.ql.metadata.HiveUtils; - import java.util.Map; import java.util.NavigableMap; import java.util.TreeMap; +import org.antlr.runtime.TokenRewriteStream; +import org.apache.hadoop.hive.ql.metadata.HiveUtils; + /** * UnparseTranslator is used to "unparse" objects such as views when their * definition is stored. @@ -55,16 +54,18 @@ /** * Register a translation to be performed as part of unparse. - * - * @param node source node whose subtree is to be replaced - * - * @param replacementText text to use as replacement + * + * @param node + * source node whose subtree is to be replaced + * + * @param replacementText + * text to use as replacement */ void addTranslation(ASTNode node, String replacementText) { if (!enabled) { return; } - + if (node.getOrigin() != null) { // This node was parsed while loading the definition of another view // being referenced by the one being created, and we don't want @@ -79,42 +80,42 @@ translation.tokenStopIndex = tokenStopIndex; translation.replacementText = replacementText; - // Sanity check: no overlap with regions already being expanded - assert(tokenStopIndex >= tokenStartIndex); + // Sanity check: no overlap with regions already being expanded + assert (tokenStopIndex >= tokenStartIndex); Map.Entry existingEntry; existingEntry = translations.floorEntry(tokenStartIndex); if (existingEntry != null) { if (existingEntry.getKey() == tokenStartIndex) { if (existingEntry.getValue().tokenStopIndex == tokenStopIndex) { - if (existingEntry.getValue().replacementText.equals( - replacementText)) { - // exact match for existing mapping: somebody is doing something + if (existingEntry.getValue().replacementText.equals(replacementText)) { + // exact match for existing mapping: somebody is doing something // redundant, but we'll let it pass return; } } } - assert(existingEntry.getValue().tokenStopIndex < tokenStartIndex); + assert (existingEntry.getValue().tokenStopIndex < tokenStartIndex); } existingEntry = translations.ceilingEntry(tokenStartIndex); if (existingEntry != null) { - assert(existingEntry.getKey() > tokenStopIndex); + assert (existingEntry.getKey() > tokenStopIndex); } - // It's all good: create a new entry in the map + // It's all good: create a new entry in the map translations.put(tokenStartIndex, translation); } /** * Register a translation for an identifier. - * - * @param node source node (which must be an identifier) to be replaced + * + * @param node + * source node (which must be an identifier) to be replaced */ void addIdentifierTranslation(ASTNode identifier) { if (!enabled) { return; } - assert(identifier.getToken().getType() == HiveParser.Identifier); + assert (identifier.getToken().getType() == HiveParser.Identifier); String replacementText = identifier.getText(); replacementText = BaseSemanticAnalyzer.unescapeIdentifier(replacementText); replacementText = HiveUtils.unparseIdentifier(replacementText); @@ -123,16 +124,14 @@ /** * Apply translations on the given token stream. - * - * @param tokenRewriteStream rewrite-capable stream + * + * @param tokenRewriteStream + * rewrite-capable stream */ void applyTranslation(TokenRewriteStream tokenRewriteStream) { - for (Map.Entry entry - : translations.entrySet()) { - tokenRewriteStream.replace( - entry.getKey(), - entry.getValue().tokenStopIndex, - entry.getValue().replacementText); + for (Map.Entry entry : translations.entrySet()) { + tokenRewriteStream.replace(entry.getKey(), + entry.getValue().tokenStopIndex, entry.getValue().replacementText); } } @@ -140,9 +139,9 @@ int tokenStopIndex; String replacementText; + @Override public String toString() { return "" + tokenStopIndex + " -> " + replacementText; } } } - Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseException.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseException.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseException.java (working copy) @@ -30,15 +30,16 @@ this.errors = errors; } + @Override public String getMessage() { StringBuilder sb = new StringBuilder(); - for(ParseError err: errors) { + for (ParseError err : errors) { sb.append(err.getMessage()); sb.append("\n"); } return sb.toString(); } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (working copy) @@ -18,8 +18,8 @@ package org.apache.hadoop.hive.ql.parse; +import java.util.ArrayList; import java.util.Arrays; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Stack; @@ -36,67 +36,73 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.exprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.exprNodeDesc; import org.apache.hadoop.hive.ql.plan.exprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.exprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.exprNodeNullDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde.Constants; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.ql.udf.UDFOPPositive; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; /** - * The Factory for creating typecheck processors. The typecheck processors are used to - * processes the syntax trees for expressions and convert them into expression Node - * Descriptor trees. They also introduce the correct conversion functions to do proper - * implicit conversion. + * The Factory for creating typecheck processors. The typecheck processors are + * used to processes the syntax trees for expressions and convert them into + * expression Node Descriptor trees. They also introduce the correct conversion + * functions to do proper implicit conversion. */ public class TypeCheckProcFactory { - protected static final Log LOG = LogFactory.getLog(TypeCheckProcFactory.class.getName()); + protected static final Log LOG = LogFactory.getLog(TypeCheckProcFactory.class + .getName()); + /** - * Function to do groupby subexpression elimination. This is called by all the processors initially. - * As an example, consider the query - * select a+b, count(1) from T group by a+b; - * Then a+b is already precomputed in the group by operators key, so we substitute a+b in the select - * list with the internal column name of the a+b expression that appears in the in input row resolver. + * Function to do groupby subexpression elimination. This is called by all the + * processors initially. As an example, consider the query select a+b, + * count(1) from T group by a+b; Then a+b is already precomputed in the group + * by operators key, so we substitute a+b in the select list with the internal + * column name of the a+b expression that appears in the in input row + * resolver. * - * @param nd The node that is being inspected. - * @param procCtx The processor context. + * @param nd + * The node that is being inspected. + * @param procCtx + * The processor context. * * @return exprNodeColumnDesc. */ - public static exprNodeDesc processGByExpr(Node nd, Object procCtx) - throws SemanticException { - // We recursively create the exprNodeDesc. Base cases: when we encounter - // a column ref, we convert that into an exprNodeColumnDesc; when we encounter - // a constant, we convert that into an exprNodeConstantDesc. For others we just - // build the exprNodeFuncDesc with recursively built children. - ASTNode expr = (ASTNode)nd; + public static exprNodeDesc processGByExpr(Node nd, Object procCtx) + throws SemanticException { + // We recursively create the exprNodeDesc. Base cases: when we encounter + // a column ref, we convert that into an exprNodeColumnDesc; when we + // encounter + // a constant, we convert that into an exprNodeConstantDesc. For others we + // just + // build the exprNodeFuncDesc with recursively built children. + ASTNode expr = (ASTNode) nd; TypeCheckCtx ctx = (TypeCheckCtx) procCtx; RowResolver input = ctx.getInputRR(); exprNodeDesc desc = null; - // If the current subExpression is pre-calculated, as in Group-By etc. + // If the current subExpression is pre-calculated, as in Group-By etc. ColumnInfo colInfo = input.get("", expr.toStringTree()); if (colInfo != null) { - desc = new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), - colInfo.getTabAlias(), colInfo.getIsPartitionCol()); + desc = new exprNodeColumnDesc(colInfo.getType(), colInfo + .getInternalName(), colInfo.getTabAlias(), colInfo + .getIsPartitionCol()); return desc; - } + } return desc; } - + /** * Processor for processing NULL expression. */ @@ -106,29 +112,30 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - TypeCheckCtx ctx = (TypeCheckCtx)procCtx; + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; if (ctx.getError() != null) { return null; } - + exprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); if (desc != null) { return desc; } - + return new exprNodeNullDesc(); } - + } - + /** * Factory method to get NullExprProcessor. + * * @return NullExprProcessor. */ public static NullExprProcessor getNullExprProcessor() { return new NullExprProcessor(); } - + /** * Processor for processing numeric constants. */ @@ -138,18 +145,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - TypeCheckCtx ctx = (TypeCheckCtx)procCtx; + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; if (ctx.getError() != null) { return null; } - + exprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); if (desc != null) { return desc; } - + Number v = null; - ASTNode expr = (ASTNode)nd; + ASTNode expr = (ASTNode) nd; // The expression can be any one of Double, Long and Integer. We // try to parse the expression in that order to ensure that the // most specific type is used for conversion. @@ -161,21 +168,23 @@ // do nothing here, we will throw an exception in the following block } if (v == null) { - throw new SemanticException(ErrorMsg.INVALID_NUMERICAL_CONSTANT.getMsg(expr)); + throw new SemanticException(ErrorMsg.INVALID_NUMERICAL_CONSTANT + .getMsg(expr)); } return new exprNodeConstantDesc(v); } - + } - + /** * Factory method to get NumExprProcessor. + * * @return NumExprProcessor. */ public static NumExprProcessor getNumExprProcessor() { return new NumExprProcessor(); } - + /** * Processor for processing string constants. */ @@ -185,44 +194,47 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - TypeCheckCtx ctx = (TypeCheckCtx)procCtx; + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; if (ctx.getError() != null) { return null; } - + exprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); if (desc != null) { return desc; } - - ASTNode expr = (ASTNode)nd; + + ASTNode expr = (ASTNode) nd; String str = null; - + switch (expr.getToken().getType()) { case HiveParser.StringLiteral: str = BaseSemanticAnalyzer.unescapeSQLString(expr.getText()); break; case HiveParser.TOK_CHARSETLITERAL: - str = BaseSemanticAnalyzer.charSetString(expr.getChild(0).getText(), expr.getChild(1).getText()); + str = BaseSemanticAnalyzer.charSetString(expr.getChild(0).getText(), + expr.getChild(1).getText()); break; default: - // HiveParser.identifier | HiveParse.KW_IF | HiveParse.KW_LEFT | HiveParse.KW_RIGHT + // HiveParser.identifier | HiveParse.KW_IF | HiveParse.KW_LEFT | + // HiveParse.KW_RIGHT str = BaseSemanticAnalyzer.unescapeIdentifier(expr.getText()); break; } return new exprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, str); } - + } - + /** * Factory method to get StrExprProcessor. + * * @return StrExprProcessor. */ public static StrExprProcessor getStrExprProcessor() { return new StrExprProcessor(); } - + /** * Processor for boolean constants. */ @@ -232,7 +244,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - TypeCheckCtx ctx = (TypeCheckCtx)procCtx; + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; if (ctx.getError() != null) { return null; } @@ -242,7 +254,7 @@ return desc; } - ASTNode expr = (ASTNode)nd; + ASTNode expr = (ASTNode) nd; Boolean bool = null; switch (expr.getToken().getType()) { @@ -255,19 +267,20 @@ default: assert false; } - return new exprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, bool); + return new exprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, bool); } - + } - + /** * Factory method to get BoolExprProcessor. + * * @return BoolExprProcessor. */ public static BoolExprProcessor getBoolExprProcessor() { return new BoolExprProcessor(); } - + /** * Processor for table columns */ @@ -277,7 +290,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - TypeCheckCtx ctx = (TypeCheckCtx)procCtx; + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; if (ctx.getError() != null) { return null; } @@ -287,20 +300,21 @@ return desc; } - ASTNode expr = (ASTNode)nd; + ASTNode expr = (ASTNode) nd; RowResolver input = ctx.getInputRR(); if (expr.getType() != HiveParser.TOK_TABLE_OR_COL) { ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr)); return null; } - - assert(expr.getChildCount() == 1); - String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()); + assert (expr.getChildCount() == 1); + String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr + .getChild(0).getText()); + boolean isTableAlias = input.hasTableAlias(tableOrCol); ColumnInfo colInfo = input.get(null, tableOrCol); - + if (isTableAlias) { if (colInfo != null) { // it's a table alias, and also a column @@ -308,7 +322,7 @@ return null; } else { // It's a table alias. - // We will process that later in DOT. + // We will process that later in DOT. return null; } } else { @@ -318,29 +332,33 @@ ctx.setError(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY.getMsg(expr)); return null; } else { - ctx.setError(ErrorMsg.INVALID_TABLE_OR_COLUMN.getMsg(expr.getChild(0))); - LOG.debug(ErrorMsg.INVALID_TABLE_OR_COLUMN.toString() + ":" + input.toString()); + ctx.setError(ErrorMsg.INVALID_TABLE_OR_COLUMN.getMsg(expr + .getChild(0))); + LOG.debug(ErrorMsg.INVALID_TABLE_OR_COLUMN.toString() + ":" + + input.toString()); return null; } } else { // It's a column. - return new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), - colInfo.getTabAlias(), colInfo.getIsPartitionCol()); + return new exprNodeColumnDesc(colInfo.getType(), colInfo + .getInternalName(), colInfo.getTabAlias(), colInfo + .getIsPartitionCol()); } } } - + } - + /** * Factory method to get ColumnExprProcessor. + * * @return ColumnExprProcessor. */ public static ColumnExprProcessor getColumnExprProcessor() { return new ColumnExprProcessor(); } - + /** * The default processor for typechecking. */ @@ -357,33 +375,53 @@ specialFunctionTextHashMap.put(HiveParser.TOK_ISNULL, "isnull"); specialFunctionTextHashMap.put(HiveParser.TOK_ISNOTNULL, "isnotnull"); conversionFunctionTextHashMap = new HashMap(); - conversionFunctionTextHashMap.put(HiveParser.TOK_BOOLEAN, Constants.BOOLEAN_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_TINYINT, Constants.TINYINT_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_SMALLINT, Constants.SMALLINT_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_INT, Constants.INT_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_BIGINT, Constants.BIGINT_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_FLOAT, Constants.FLOAT_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_DOUBLE, Constants.DOUBLE_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_STRING, Constants.STRING_TYPE_NAME); + conversionFunctionTextHashMap.put(HiveParser.TOK_BOOLEAN, + Constants.BOOLEAN_TYPE_NAME); + conversionFunctionTextHashMap.put(HiveParser.TOK_TINYINT, + Constants.TINYINT_TYPE_NAME); + conversionFunctionTextHashMap.put(HiveParser.TOK_SMALLINT, + Constants.SMALLINT_TYPE_NAME); + conversionFunctionTextHashMap.put(HiveParser.TOK_INT, + Constants.INT_TYPE_NAME); + conversionFunctionTextHashMap.put(HiveParser.TOK_BIGINT, + Constants.BIGINT_TYPE_NAME); + conversionFunctionTextHashMap.put(HiveParser.TOK_FLOAT, + Constants.FLOAT_TYPE_NAME); + conversionFunctionTextHashMap.put(HiveParser.TOK_DOUBLE, + Constants.DOUBLE_TYPE_NAME); + conversionFunctionTextHashMap.put(HiveParser.TOK_STRING, + Constants.STRING_TYPE_NAME); } - public static boolean isRedundantConversionFunction(ASTNode expr, boolean isFunction, ArrayList children) { - if (!isFunction) return false; - // children is always one less than the expr.getChildCount(), since the latter contains function name. - assert(children.size() == expr.getChildCount() - 1); + public static boolean isRedundantConversionFunction(ASTNode expr, + boolean isFunction, ArrayList children) { + if (!isFunction) { + return false; + } + // children is always one less than the expr.getChildCount(), since the + // latter contains function name. + assert (children.size() == expr.getChildCount() - 1); // conversion functions take a single parameter - if (children.size() != 1) return false; - String funcText = conversionFunctionTextHashMap.get(((ASTNode)expr.getChild(0)).getType()); - // not a conversion function - if (funcText == null) return false; - // return true when the child type and the conversion target type is the same - return ((PrimitiveTypeInfo)children.get(0).getTypeInfo()).getTypeName().equalsIgnoreCase(funcText); + if (children.size() != 1) { + return false; + } + String funcText = conversionFunctionTextHashMap.get(((ASTNode) expr + .getChild(0)).getType()); + // not a conversion function + if (funcText == null) { + return false; + } + // return true when the child type and the conversion target type is the + // same + return ((PrimitiveTypeInfo) children.get(0).getTypeInfo()).getTypeName() + .equalsIgnoreCase(funcText); } - + public static String getFunctionText(ASTNode expr, boolean isFunction) { String funcText = null; if (!isFunction) { - // For operator, the function name is the operator text, unless it's in our special dictionary + // For operator, the function name is the operator text, unless it's in + // our special dictionary if (expr.getChildCount() == 1) { funcText = specialUnaryOperatorTextHashMap.get(expr.getType()); } @@ -391,184 +429,202 @@ funcText = expr.getText(); } } else { - // For TOK_FUNCTION, the function name is stored in the first child, unless it's in our + // For TOK_FUNCTION, the function name is stored in the first child, + // unless it's in our // special dictionary. - assert(expr.getChildCount() >= 1); - int funcType = ((ASTNode)expr.getChild(0)).getType(); + assert (expr.getChildCount() >= 1); + int funcType = ((ASTNode) expr.getChild(0)).getType(); funcText = specialFunctionTextHashMap.get(funcType); if (funcText == null) { funcText = conversionFunctionTextHashMap.get(funcType); } if (funcText == null) { - funcText = ((ASTNode)expr.getChild(0)).getText(); + funcText = ((ASTNode) expr.getChild(0)).getText(); } } return BaseSemanticAnalyzer.unescapeIdentifier(funcText); } - - /** * Get the exprNodeDesc + * * @param name * @param children * @return The expression node descriptor - * @throws UDFArgumentException + * @throws UDFArgumentException */ - public static exprNodeDesc getFuncExprNodeDesc(String name, exprNodeDesc... children) { - ArrayList c = new ArrayList(Arrays.asList(children)); + public static exprNodeDesc getFuncExprNodeDesc(String name, + exprNodeDesc... children) { + ArrayList c = new ArrayList(Arrays + .asList(children)); try { return getFuncExprNodeDesc(name, c); } catch (UDFArgumentException e) { throw new RuntimeException("Hive 2 internal error", e); } } - + /** - * This function create an ExprNodeDesc for a UDF function given the children (arguments). - * It will insert implicit type conversion functions if necessary. - * @throws UDFArgumentException + * This function create an ExprNodeDesc for a UDF function given the + * children (arguments). It will insert implicit type conversion functions + * if necessary. + * + * @throws UDFArgumentException */ - public static exprNodeDesc getFuncExprNodeDesc(String udfName, List children) - throws UDFArgumentException { + public static exprNodeDesc getFuncExprNodeDesc(String udfName, + List children) throws UDFArgumentException { FunctionInfo fi = FunctionRegistry.getFunctionInfo(udfName); if (fi == null) { throw new UDFArgumentException("udf:" + udfName + " not found."); } - + GenericUDF genericUDF = fi.getGenericUDF(); if (genericUDF == null) { - throw new UDFArgumentException("udf:" + udfName + " is an aggregation function."); + throw new UDFArgumentException("udf:" + udfName + + " is an aggregation function."); } return exprNodeGenericFuncDesc.newInstance(genericUDF, children); } - static exprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, boolean isFunction, - ArrayList children, TypeCheckCtx ctx) + static exprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, + boolean isFunction, ArrayList children, TypeCheckCtx ctx) throws SemanticException, UDFArgumentException { // return the child directly if the conversion is redundant. if (isRedundantConversionFunction(expr, isFunction, children)) { - assert(children.size() == 1); - assert(children.get(0) != null); + assert (children.size() == 1); + assert (children.get(0) != null); return children.get(0); } String funcText = getFunctionText(expr, isFunction); exprNodeDesc desc; if (funcText.equals(".")) { - // "." : FIELD Expression - assert(children.size() == 2); + // "." : FIELD Expression + assert (children.size() == 2); // Only allow constant field name for now - assert(children.get(1) instanceof exprNodeConstantDesc); + assert (children.get(1) instanceof exprNodeConstantDesc); exprNodeDesc object = children.get(0); - exprNodeConstantDesc fieldName = (exprNodeConstantDesc)children.get(1); - assert(fieldName.getValue() instanceof String); - + exprNodeConstantDesc fieldName = (exprNodeConstantDesc) children.get(1); + assert (fieldName.getValue() instanceof String); + // Calculate result TypeInfo - String fieldNameString = (String)fieldName.getValue(); + String fieldNameString = (String) fieldName.getValue(); TypeInfo objectTypeInfo = object.getTypeInfo(); - - // Allow accessing a field of list element structs directly from a list + + // Allow accessing a field of list element structs directly from a list boolean isList = (object.getTypeInfo().getCategory() == ObjectInspector.Category.LIST); if (isList) { - objectTypeInfo = ((ListTypeInfo)objectTypeInfo).getListElementTypeInfo(); + objectTypeInfo = ((ListTypeInfo) objectTypeInfo) + .getListElementTypeInfo(); } if (objectTypeInfo.getCategory() != Category.STRUCT) { throw new SemanticException(ErrorMsg.INVALID_DOT.getMsg(expr)); } - TypeInfo t = ((StructTypeInfo)objectTypeInfo).getStructFieldTypeInfo(fieldNameString); + TypeInfo t = ((StructTypeInfo) objectTypeInfo) + .getStructFieldTypeInfo(fieldNameString); if (isList) { t = TypeInfoFactory.getListTypeInfo(t); } - - desc = new exprNodeFieldDesc(t, children.get(0), fieldNameString, isList); - - } else if (funcText.equals("[")){ + + desc = new exprNodeFieldDesc(t, children.get(0), fieldNameString, + isList); + + } else if (funcText.equals("[")) { // "[]" : LSQUARE/INDEX Expression - assert(children.size() == 2); - + assert (children.size() == 2); + // Check whether this is a list or a map TypeInfo myt = children.get(0).getTypeInfo(); if (myt.getCategory() == Category.LIST) { // Only allow constant integer index for now if (!(children.get(1) instanceof exprNodeConstantDesc) - || !(((exprNodeConstantDesc)children.get(1)).getTypeInfo().equals(TypeInfoFactory.intTypeInfo))) { - throw new SemanticException(ErrorMsg.INVALID_ARRAYINDEX_CONSTANT.getMsg(expr)); + || !(((exprNodeConstantDesc) children.get(1)).getTypeInfo() + .equals(TypeInfoFactory.intTypeInfo))) { + throw new SemanticException(ErrorMsg.INVALID_ARRAYINDEX_CONSTANT + .getMsg(expr)); } - + // Calculate TypeInfo - TypeInfo t = ((ListTypeInfo)myt).getListElementTypeInfo(); - desc = new exprNodeGenericFuncDesc(t, - FunctionRegistry.getGenericUDFForIndex(), - children); - } - else if (myt.getCategory() == Category.MAP) { + TypeInfo t = ((ListTypeInfo) myt).getListElementTypeInfo(); + desc = new exprNodeGenericFuncDesc(t, FunctionRegistry + .getGenericUDFForIndex(), children); + } else if (myt.getCategory() == Category.MAP) { // Only allow only constant indexes for now if (!(children.get(1) instanceof exprNodeConstantDesc)) { - throw new SemanticException(ErrorMsg.INVALID_MAPINDEX_CONSTANT.getMsg(expr)); + throw new SemanticException(ErrorMsg.INVALID_MAPINDEX_CONSTANT + .getMsg(expr)); } - if (!(((exprNodeConstantDesc)children.get(1)).getTypeInfo().equals( - ((MapTypeInfo)myt).getMapKeyTypeInfo()))) { - throw new SemanticException(ErrorMsg.INVALID_MAPINDEX_TYPE.getMsg(expr)); + if (!(((exprNodeConstantDesc) children.get(1)).getTypeInfo() + .equals(((MapTypeInfo) myt).getMapKeyTypeInfo()))) { + throw new SemanticException(ErrorMsg.INVALID_MAPINDEX_TYPE + .getMsg(expr)); } // Calculate TypeInfo - TypeInfo t = ((MapTypeInfo)myt).getMapValueTypeInfo(); - desc = new exprNodeGenericFuncDesc(t, - FunctionRegistry.getGenericUDFForIndex(), - children); - } - else { - throw new SemanticException(ErrorMsg.NON_COLLECTION_TYPE.getMsg(expr, + TypeInfo t = ((MapTypeInfo) myt).getMapValueTypeInfo(); + desc = new exprNodeGenericFuncDesc(t, FunctionRegistry + .getGenericUDFForIndex(), children); + } else { + throw new SemanticException(ErrorMsg.NON_COLLECTION_TYPE.getMsg(expr, myt.getTypeName())); } } else { // other operators or functions FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcText); - + if (fi == null) { - if (isFunction) - throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg((ASTNode)expr.getChild(0))); - else - throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg((ASTNode)expr)); + if (isFunction) { + throw new SemanticException(ErrorMsg.INVALID_FUNCTION + .getMsg((ASTNode) expr.getChild(0))); + } else { + throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr)); + } } if (!fi.isNative()) { ctx.getUnparseTranslator().addIdentifierTranslation( - (ASTNode) expr.getChild(0)); + (ASTNode) expr.getChild(0)); } - // Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't supported + // Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't + // supported if (fi.getGenericUDTF() != null) { throw new SemanticException(ErrorMsg.UDTF_INVALID_LOCATION.getMsg()); } - + try { desc = getFuncExprNodeDesc(funcText, children); } catch (AmbiguousMethodException e) { - ArrayList> argumentClasses = new ArrayList>(children.size()); - for(int i=0; i> argumentClasses = new ArrayList>( + children.size()); + for (int i = 0; i < children.size(); i++) { + argumentClasses.add(((PrimitiveTypeInfo) children.get(i) + .getTypeInfo()).getPrimitiveWritableClass()); } - + if (isFunction) { - String reason = "Looking for UDF \"" + expr.getChild(0).getText() + "\" with parameters " + argumentClasses; - throw new SemanticException(ErrorMsg.INVALID_FUNCTION_SIGNATURE.getMsg((ASTNode)expr.getChild(0), reason), e); + String reason = "Looking for UDF \"" + expr.getChild(0).getText() + + "\" with parameters " + argumentClasses; + throw new SemanticException(ErrorMsg.INVALID_FUNCTION_SIGNATURE + .getMsg((ASTNode) expr.getChild(0), reason), e); } else { - String reason = "Looking for Operator \"" + expr.getText() + "\" with parameters " + argumentClasses; - throw new SemanticException(ErrorMsg.INVALID_OPERATOR_SIGNATURE.getMsg(expr, reason), e); + String reason = "Looking for Operator \"" + expr.getText() + + "\" with parameters " + argumentClasses; + throw new SemanticException(ErrorMsg.INVALID_OPERATOR_SIGNATURE + .getMsg(expr, reason), e); } } } // UDFOPPositive is a no-op. - // However, we still create it, and then remove it here, to make sure we only allow + // However, we still create it, and then remove it here, to make sure we + // only allow // "+" for numeric types. if (FunctionRegistry.isOpPositive(desc)) { - assert(desc.getChildren().size() == 1); + assert (desc.getChildren().size() == 1); desc = desc.getChildren().get(0); } - assert(desc != null); + assert (desc != null); return desc; } @@ -576,7 +632,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - TypeCheckCtx ctx = (TypeCheckCtx)procCtx; + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; // If this is a GroupBy expression, clear error and continue exprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); @@ -584,77 +640,81 @@ ctx.setError(null); return desc; } - + if (ctx.getError() != null) { return null; } - - ASTNode expr = (ASTNode)nd; - - // If the first child is a TOK_TABLE_OR_COL, and nodeOutput[0] is NULL, + + ASTNode expr = (ASTNode) nd; + + // If the first child is a TOK_TABLE_OR_COL, and nodeOutput[0] is NULL, // and the operator is a DOT, then it's a table column reference. if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL && nodeOutputs[0] == null) { RowResolver input = ctx.getInputRR(); - String tableAlias = SemanticAnalyzer.unescapeIdentifier( - expr.getChild(0).getChild(0).getText()); + String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr + .getChild(0).getChild(0).getText()); // NOTE: tableAlias must be a valid non-ambiguous table alias, // because we've checked that in TOK_TABLE_OR_COL's process method. - ColumnInfo colInfo = input.get(tableAlias, - ((exprNodeConstantDesc)nodeOutputs[1]).getValue().toString() ); + ColumnInfo colInfo = input.get(tableAlias, + ((exprNodeConstantDesc) nodeOutputs[1]).getValue().toString()); if (colInfo == null) { ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1))); return null; } - return new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), - colInfo.getTabAlias(), colInfo.getIsPartitionCol()); + return new exprNodeColumnDesc(colInfo.getType(), colInfo + .getInternalName(), colInfo.getTabAlias(), colInfo + .getIsPartitionCol()); } // Return nulls for conversion operators - if (conversionFunctionTextHashMap.keySet().contains(expr.getType()) || - specialFunctionTextHashMap.keySet().contains(expr.getType()) || - expr.getToken().getType() == HiveParser.CharSetName || - expr.getToken().getType() == HiveParser.CharSetLiteral) { + if (conversionFunctionTextHashMap.keySet().contains(expr.getType()) + || specialFunctionTextHashMap.keySet().contains(expr.getType()) + || expr.getToken().getType() == HiveParser.CharSetName + || expr.getToken().getType() == HiveParser.CharSetLiteral) { return null; } - + boolean isFunction = (expr.getType() == HiveParser.TOK_FUNCTION); - + // Create all children int childrenBegin = (isFunction ? 1 : 0); - ArrayList children = new ArrayList(expr.getChildCount() - childrenBegin); - for (int ci=childrenBegin; ci children = new ArrayList(expr + .getChildCount() + - childrenBegin); + for (int ci = childrenBegin; ci < expr.getChildCount(); ci++) { + children.add((exprNodeDesc) nodeOutputs[ci]); } - + // If any of the children contains null, then return a null // this is a hack for now to handle the group by case if (children.contains(null)) { return null; } - + // Create function desc try { return getXpathOrFuncExprNodeDesc(expr, isFunction, children, ctx); } catch (UDFArgumentTypeException e) { - throw new SemanticException(ErrorMsg.INVALID_ARGUMENT_TYPE - .getMsg(expr.getChild(childrenBegin + e.getArgumentId()), e.getMessage())); + throw new SemanticException(ErrorMsg.INVALID_ARGUMENT_TYPE.getMsg(expr + .getChild(childrenBegin + e.getArgumentId()), e.getMessage())); } catch (UDFArgumentLengthException e) { - throw new SemanticException(ErrorMsg.INVALID_ARGUMENT_LENGTH - .getMsg(expr, e.getMessage())); + throw new SemanticException(ErrorMsg.INVALID_ARGUMENT_LENGTH.getMsg( + expr, e.getMessage())); } catch (UDFArgumentException e) { - throw new SemanticException(ErrorMsg.INVALID_ARGUMENT - .getMsg(expr, e.getMessage())); + throw new SemanticException(ErrorMsg.INVALID_ARGUMENT.getMsg(expr, e + .getMessage())); } } - + } - + /** * Factory method to get DefaultExprProcessor. + * * @return DefaultExprProcessor. */ public static DefaultExprProcessor getDefaultExprProcessor() { Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java (working copy) @@ -19,46 +19,53 @@ package org.apache.hadoop.hive.ql.parse; import java.util.Vector; + +import org.antlr.runtime.Token; import org.antlr.runtime.tree.CommonTree; -import org.antlr.runtime.Token; import org.apache.hadoop.hive.ql.lib.Node; /** * @author athusoo - * + * */ public class ASTNode extends CommonTree implements Node { private ASTNodeOrigin origin; - - public ASTNode() { + + public ASTNode() { } - + /** * Constructor - * @param t Token for the CommonTree Node + * + * @param t + * Token for the CommonTree Node */ public ASTNode(Token t) { super(t); } - - /* (non-Javadoc) + + /* + * (non-Javadoc) + * * @see org.apache.hadoop.hive.ql.lib.Node#getChildren() */ public Vector getChildren() { if (super.getChildCount() == 0) { return null; } - + Vector ret_vec = new Vector(); - for(int i=0; i children = getChildren(); - if ( children != null ) { - for ( Node node : getChildren() ) { - if ( node instanceof ASTNode ) { + if (children != null) { + for (Node node : getChildren()) { + if (node instanceof ASTNode) { sb.append(((ASTNode) node).dump()); } else { sb.append("NON-ASTNODE!!"); @@ -100,5 +106,5 @@ sb.append(')'); return sb.toString(); } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java (working copy) @@ -18,119 +18,113 @@ package org.apache.hadoop.hive.ql.parse; -import org.antlr.runtime.tree.*; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.antlr.runtime.tree.Tree; import org.apache.hadoop.hive.ql.metadata.HiveUtils; -import java.util.Map; -import java.util.HashMap; -import java.util.regex.Pattern; -import java.util.regex.Matcher; - /** * List of error messages thrown by the parser **/ public enum ErrorMsg { - //SQLStates are taken from Section 12.5 of ISO-9075. - //See http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt - //Most will just rollup to the generic syntax error state of 42000, but - //specific errors can override the that state. - //See this page for how MySQL uses SQLState codes: - //http://dev.mysql.com/doc/refman/5.0/en/connector-j-reference-error-sqlstates.html + // SQLStates are taken from Section 12.5 of ISO-9075. + // See http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt + // Most will just rollup to the generic syntax error state of 42000, but + // specific errors can override the that state. + // See this page for how MySQL uses SQLState codes: + // http://dev.mysql.com/doc/refman/5.0/en/connector-j-reference-error-sqlstates.html - GENERIC_ERROR("Exception while processing"), - INVALID_TABLE("Table not found", "42S02"), - INVALID_COLUMN("Invalid Column Reference"), - INVALID_TABLE_OR_COLUMN("Invalid Table Alias or Column Reference"), - AMBIGUOUS_TABLE_OR_COLUMN("Ambiguous Table Alias or Column Reference"), - INVALID_PARTITION("Partition not found"), - AMBIGUOUS_COLUMN("Ambiguous Column Reference"), - AMBIGUOUS_TABLE_ALIAS("Ambiguous Table Alias"), - INVALID_TABLE_ALIAS("Invalid Table Alias"), - NO_TABLE_ALIAS("No Table Alias"), - INVALID_FUNCTION("Invalid Function"), - INVALID_FUNCTION_SIGNATURE("Function Argument Type Mismatch"), - INVALID_OPERATOR_SIGNATURE("Operator Argument Type Mismatch"), - INVALID_ARGUMENT("Wrong Arguments"), - INVALID_ARGUMENT_LENGTH("Arguments Length Mismatch", "21000"), - INVALID_ARGUMENT_TYPE("Argument Type Mismatch"), - INVALID_JOIN_CONDITION_1("Both Left and Right Aliases Encountered in Join"), - INVALID_JOIN_CONDITION_2("Neither Left nor Right Aliases Encountered in Join"), - INVALID_JOIN_CONDITION_3("OR not supported in Join currently"), - INVALID_TRANSFORM("TRANSFORM with Other Select Columns not Supported"), - DUPLICATE_GROUPBY_KEY("Repeated Key in Group By"), - UNSUPPORTED_MULTIPLE_DISTINCTS("DISTINCT on Different Columns not Supported"), - NO_SUBQUERY_ALIAS("No Alias For Subquery"), - NO_INSERT_INSUBQUERY("Cannot insert in a Subquery. Inserting to table "), - NON_KEY_EXPR_IN_GROUPBY("Expression Not In Group By Key"), - INVALID_XPATH("General . and [] Operators are Not Supported"), - INVALID_PATH("Invalid Path"), - ILLEGAL_PATH("Path is not legal"), - INVALID_NUMERICAL_CONSTANT("Invalid Numerical Constant"), - INVALID_ARRAYINDEX_CONSTANT("Non Constant Expressions for Array Indexes not Supported"), - INVALID_MAPINDEX_CONSTANT("Non Constant Expression for Map Indexes not Supported"), - INVALID_MAPINDEX_TYPE("Map Key Type does not Match Index Expression Type"), - NON_COLLECTION_TYPE("[] not Valid on Non Collection Types"), - SELECT_DISTINCT_WITH_GROUPBY("SELECT DISTINCT and GROUP BY can not be in the same query"), - COLUMN_REPEATED_IN_PARTITIONING_COLS("Column repeated in partitioning columns"), - DUPLICATE_COLUMN_NAMES("Duplicate column name:"), - INVALID_BUCKET_NUMBER("Bucket number should be bigger than zero"), - COLUMN_REPEATED_IN_CLUSTER_SORT("Same column cannot appear in cluster and sort by"), - SAMPLE_RESTRICTION("Cannot Sample on More Than Two Columns"), - SAMPLE_COLUMN_NOT_FOUND("Sample Column Not Found"), - NO_PARTITION_PREDICATE("No Partition Predicate Found"), - INVALID_DOT(". operator is only supported on struct or list of struct types"), - INVALID_TBL_DDL_SERDE("Either list of columns or a custom serializer should be specified"), - TARGET_TABLE_COLUMN_MISMATCH("Cannot insert into target table because column number/types are different"), - TABLE_ALIAS_NOT_ALLOWED("Table Alias not Allowed in Sampling Clause"), - CLUSTERBY_DISTRIBUTEBY_CONFLICT("Cannot have both Cluster By and Distribute By Clauses"), - ORDERBY_DISTRIBUTEBY_CONFLICT("Cannot have both Order By and Distribute By Clauses"), - CLUSTERBY_SORTBY_CONFLICT("Cannot have both Cluster By and Sort By Clauses"), - ORDERBY_SORTBY_CONFLICT("Cannot have both Order By and Sort By Clauses"), - CLUSTERBY_ORDERBY_CONFLICT("Cannot have both Cluster By and Order By Clauses"), - NO_LIMIT_WITH_ORDERBY("In strict mode, limit must be specified if ORDER BY is present"), - NO_CARTESIAN_PRODUCT("In strict mode, cartesian product is not allowed. If you really want to perform the operation, set hive.mapred.mode=nonstrict"), - UNION_NOTIN_SUBQ("Top level Union is not supported currently; use a subquery for the union"), - INVALID_INPUT_FORMAT_TYPE("Input Format must implement InputFormat"), - INVALID_OUTPUT_FORMAT_TYPE("Output Format must implement HiveOutputFormat, otherwise it should be either IgnoreKeyTextOutputFormat or SequenceFileOutputFormat"), - NO_VALID_PARTN("The query does not reference any valid partition. To run this query, set hive.mapred.mode=nonstrict"), - NO_OUTER_MAPJOIN("Map Join cannot be performed with Outer join"), - INVALID_MAPJOIN_HINT("neither table specified as map-table"), - INVALID_MAPJOIN_TABLE("result of a union cannot be a map table"), - NON_BUCKETED_TABLE("Sampling Expression Needed for Non-Bucketed Table"), - BUCKETED_NUMBERATOR_BIGGER_DENOMINATOR("Numberator should not be bigger than denaminator in sample clause for Table"), - NEED_PARTITION_ERROR("need to specify partition columns because the destination table is partitioned."), - CTAS_CTLT_COEXISTENCE("Create table command does not allow LIKE and AS-SELECT in the same command"), - LINES_TERMINATED_BY_NON_NEWLINE("LINES TERMINATED BY only supports newline '\\n' right now"), - CTAS_COLLST_COEXISTENCE("Create table as select command cannot specify the list of columns for the target table."), - CTLT_COLLST_COEXISTENCE("Create table like command cannot specify the list of columns for the target table."), - INVALID_SELECT_SCHEMA("Cannot derive schema from the select-clause."), - CTAS_PARCOL_COEXISTENCE("CREATE-TABLE-AS-SELECT does not support partitioning in the target table."), - CTAS_MULTI_LOADFILE("CREATE-TABLE-AS-SELECT results in multiple file load."), - CTAS_EXTTBL_COEXISTENCE("CREATE-TABLE-AS-SELECT cannot create external table."), - TABLE_ALREADY_EXISTS("Table already exists:", "42S02"), - COLUMN_ALIAS_ALREADY_EXISTS("Column alias already exists:", "42S02"), - UDTF_MULTIPLE_EXPR("Only a single expression in the SELECT clause is supported with UDTF's"), - UDTF_REQUIRE_AS("UDTF's require an AS clause"), - UDTF_NO_GROUP_BY("GROUP BY is not supported with a UDTF in the SELECT clause"), - UDTF_NO_SORT_BY("SORT BY is not supported with a UDTF in the SELECT clause"), - UDTF_NO_CLUSTER_BY("CLUSTER BY is not supported with a UDTF in the SELECT clause"), - UDTF_NO_DISTRIBUTE_BY("DISTRUBTE BY is not supported with a UDTF in the SELECT clause"), - UDTF_INVALID_LOCATION("UDTF's are not supported outside the SELECT clause, nor nested in expressions"), - UDTF_LATERAL_VIEW("UDTF's cannot be in a select expression when there is a lateral view"), - UDTF_ALIAS_MISMATCH("The number of aliases supplied in the AS clause does not match the number of columns output by the UDTF"), - LATERAL_VIEW_WITH_JOIN("Join with a lateral view is not supported"), - LATERAL_VIEW_INVALID_CHILD("Lateral view AST with invalid child"), - OUTPUT_SPECIFIED_MULTIPLE_TIMES("The same output cannot be present multiple times: "), - INVALID_AS("AS clause has an invalid number of aliases"), - VIEW_COL_MISMATCH("The number of columns produced by the SELECT clause does not match the number of column names specified by CREATE VIEW"), - DML_AGAINST_VIEW("A view cannot be used as target table for LOAD or INSERT"); + GENERIC_ERROR("Exception while processing"), INVALID_TABLE("Table not found", + "42S02"), INVALID_COLUMN("Invalid Column Reference"), INVALID_TABLE_OR_COLUMN( + "Invalid Table Alias or Column Reference"), AMBIGUOUS_TABLE_OR_COLUMN( + "Ambiguous Table Alias or Column Reference"), INVALID_PARTITION( + "Partition not found"), AMBIGUOUS_COLUMN("Ambiguous Column Reference"), AMBIGUOUS_TABLE_ALIAS( + "Ambiguous Table Alias"), INVALID_TABLE_ALIAS("Invalid Table Alias"), NO_TABLE_ALIAS( + "No Table Alias"), INVALID_FUNCTION("Invalid Function"), INVALID_FUNCTION_SIGNATURE( + "Function Argument Type Mismatch"), INVALID_OPERATOR_SIGNATURE( + "Operator Argument Type Mismatch"), INVALID_ARGUMENT("Wrong Arguments"), INVALID_ARGUMENT_LENGTH( + "Arguments Length Mismatch", "21000"), INVALID_ARGUMENT_TYPE( + "Argument Type Mismatch"), INVALID_JOIN_CONDITION_1( + "Both Left and Right Aliases Encountered in Join"), INVALID_JOIN_CONDITION_2( + "Neither Left nor Right Aliases Encountered in Join"), INVALID_JOIN_CONDITION_3( + "OR not supported in Join currently"), INVALID_TRANSFORM( + "TRANSFORM with Other Select Columns not Supported"), DUPLICATE_GROUPBY_KEY( + "Repeated Key in Group By"), UNSUPPORTED_MULTIPLE_DISTINCTS( + "DISTINCT on Different Columns not Supported"), NO_SUBQUERY_ALIAS( + "No Alias For Subquery"), NO_INSERT_INSUBQUERY( + "Cannot insert in a Subquery. Inserting to table "), NON_KEY_EXPR_IN_GROUPBY( + "Expression Not In Group By Key"), INVALID_XPATH( + "General . and [] Operators are Not Supported"), INVALID_PATH( + "Invalid Path"), ILLEGAL_PATH("Path is not legal"), INVALID_NUMERICAL_CONSTANT( + "Invalid Numerical Constant"), INVALID_ARRAYINDEX_CONSTANT( + "Non Constant Expressions for Array Indexes not Supported"), INVALID_MAPINDEX_CONSTANT( + "Non Constant Expression for Map Indexes not Supported"), INVALID_MAPINDEX_TYPE( + "Map Key Type does not Match Index Expression Type"), NON_COLLECTION_TYPE( + "[] not Valid on Non Collection Types"), SELECT_DISTINCT_WITH_GROUPBY( + "SELECT DISTINCT and GROUP BY can not be in the same query"), COLUMN_REPEATED_IN_PARTITIONING_COLS( + "Column repeated in partitioning columns"), DUPLICATE_COLUMN_NAMES( + "Duplicate column name:"), INVALID_BUCKET_NUMBER( + "Bucket number should be bigger than zero"), COLUMN_REPEATED_IN_CLUSTER_SORT( + "Same column cannot appear in cluster and sort by"), SAMPLE_RESTRICTION( + "Cannot Sample on More Than Two Columns"), SAMPLE_COLUMN_NOT_FOUND( + "Sample Column Not Found"), NO_PARTITION_PREDICATE( + "No Partition Predicate Found"), INVALID_DOT( + ". operator is only supported on struct or list of struct types"), INVALID_TBL_DDL_SERDE( + "Either list of columns or a custom serializer should be specified"), TARGET_TABLE_COLUMN_MISMATCH( + "Cannot insert into target table because column number/types are different"), TABLE_ALIAS_NOT_ALLOWED( + "Table Alias not Allowed in Sampling Clause"), CLUSTERBY_DISTRIBUTEBY_CONFLICT( + "Cannot have both Cluster By and Distribute By Clauses"), ORDERBY_DISTRIBUTEBY_CONFLICT( + "Cannot have both Order By and Distribute By Clauses"), CLUSTERBY_SORTBY_CONFLICT( + "Cannot have both Cluster By and Sort By Clauses"), ORDERBY_SORTBY_CONFLICT( + "Cannot have both Order By and Sort By Clauses"), CLUSTERBY_ORDERBY_CONFLICT( + "Cannot have both Cluster By and Order By Clauses"), NO_LIMIT_WITH_ORDERBY( + "In strict mode, limit must be specified if ORDER BY is present"), NO_CARTESIAN_PRODUCT( + "In strict mode, cartesian product is not allowed. If you really want to perform the operation, set hive.mapred.mode=nonstrict"), UNION_NOTIN_SUBQ( + "Top level Union is not supported currently; use a subquery for the union"), INVALID_INPUT_FORMAT_TYPE( + "Input Format must implement InputFormat"), INVALID_OUTPUT_FORMAT_TYPE( + "Output Format must implement HiveOutputFormat, otherwise it should be either IgnoreKeyTextOutputFormat or SequenceFileOutputFormat"), NO_VALID_PARTN( + "The query does not reference any valid partition. To run this query, set hive.mapred.mode=nonstrict"), NO_OUTER_MAPJOIN( + "Map Join cannot be performed with Outer join"), INVALID_MAPJOIN_HINT( + "neither table specified as map-table"), INVALID_MAPJOIN_TABLE( + "result of a union cannot be a map table"), NON_BUCKETED_TABLE( + "Sampling Expression Needed for Non-Bucketed Table"), BUCKETED_NUMBERATOR_BIGGER_DENOMINATOR( + "Numberator should not be bigger than denaminator in sample clause for Table"), NEED_PARTITION_ERROR( + "need to specify partition columns because the destination table is partitioned."), CTAS_CTLT_COEXISTENCE( + "Create table command does not allow LIKE and AS-SELECT in the same command"), LINES_TERMINATED_BY_NON_NEWLINE( + "LINES TERMINATED BY only supports newline '\\n' right now"), CTAS_COLLST_COEXISTENCE( + "Create table as select command cannot specify the list of columns for the target table."), CTLT_COLLST_COEXISTENCE( + "Create table like command cannot specify the list of columns for the target table."), INVALID_SELECT_SCHEMA( + "Cannot derive schema from the select-clause."), CTAS_PARCOL_COEXISTENCE( + "CREATE-TABLE-AS-SELECT does not support partitioning in the target table."), CTAS_MULTI_LOADFILE( + "CREATE-TABLE-AS-SELECT results in multiple file load."), CTAS_EXTTBL_COEXISTENCE( + "CREATE-TABLE-AS-SELECT cannot create external table."), TABLE_ALREADY_EXISTS( + "Table already exists:", "42S02"), COLUMN_ALIAS_ALREADY_EXISTS( + "Column alias already exists:", "42S02"), UDTF_MULTIPLE_EXPR( + "Only a single expression in the SELECT clause is supported with UDTF's"), UDTF_REQUIRE_AS( + "UDTF's require an AS clause"), UDTF_NO_GROUP_BY( + "GROUP BY is not supported with a UDTF in the SELECT clause"), UDTF_NO_SORT_BY( + "SORT BY is not supported with a UDTF in the SELECT clause"), UDTF_NO_CLUSTER_BY( + "CLUSTER BY is not supported with a UDTF in the SELECT clause"), UDTF_NO_DISTRIBUTE_BY( + "DISTRUBTE BY is not supported with a UDTF in the SELECT clause"), UDTF_INVALID_LOCATION( + "UDTF's are not supported outside the SELECT clause, nor nested in expressions"), UDTF_LATERAL_VIEW( + "UDTF's cannot be in a select expression when there is a lateral view"), UDTF_ALIAS_MISMATCH( + "The number of aliases supplied in the AS clause does not match the number of columns output by the UDTF"), LATERAL_VIEW_WITH_JOIN( + "Join with a lateral view is not supported"), LATERAL_VIEW_INVALID_CHILD( + "Lateral view AST with invalid child"), OUTPUT_SPECIFIED_MULTIPLE_TIMES( + "The same output cannot be present multiple times: "), INVALID_AS( + "AS clause has an invalid number of aliases"), VIEW_COL_MISMATCH( + "The number of columns produced by the SELECT clause does not match the number of column names specified by CREATE VIEW"), DML_AGAINST_VIEW( + "A view cannot be used as target table for LOAD or INSERT"); private String mesg; private String SQLState; private static char SPACE = ' '; - private static Pattern ERROR_MESSAGE_PATTERN = Pattern.compile(".*line [0-9]+:[0-9]+ (.*)"); + private static Pattern ERROR_MESSAGE_PATTERN = Pattern + .compile(".*line [0-9]+:[0-9]+ (.*)"); private static Map mesgToErrorMsgMap = new HashMap(); private static int minMesgLength = -1; @@ -139,20 +133,22 @@ mesgToErrorMsgMap.put(errorMsg.getMsg().trim(), errorMsg); int length = errorMsg.getMsg().trim().length(); - if (minMesgLength == -1 || length < minMesgLength) + if (minMesgLength == -1 || length < minMesgLength) { minMesgLength = length; + } } } /** - * For a given error message string, searches for a ErrorMsg - * enum that appears to be a match. If an match is found, returns the + * For a given error message string, searches for a ErrorMsg enum + * that appears to be a match. If an match is found, returns the * SQLState associated with the ErrorMsg. If a match * is not found or ErrorMsg has no SQLState, returns * the SQLState bound to the GENERIC_ERROR * ErrorMsg. - * - * @param mesg An error message string + * + * @param mesg + * An error message string * @return SQLState */ public static String findSQLState(String mesg) { @@ -161,33 +157,41 @@ return GENERIC_ERROR.getSQLState(); } - //first see if there is a direct match + // first see if there is a direct match ErrorMsg errorMsg = mesgToErrorMsgMap.get(mesg); if (errorMsg != null) { - if (errorMsg.getSQLState() != null) + if (errorMsg.getSQLState() != null) { return errorMsg.getSQLState(); - else + } else { return GENERIC_ERROR.getSQLState(); + } } - //if not see if the mesg follows type of format, which is typically the case: - //line 1:14 Table not found table_name + // if not see if the mesg follows type of format, which is typically the + // case: + // line 1:14 Table not found table_name String truncatedMesg = mesg.trim(); Matcher match = ERROR_MESSAGE_PATTERN.matcher(mesg); - if (match.matches()) truncatedMesg = match.group(1); + if (match.matches()) { + truncatedMesg = match.group(1); + } - //appends might exist after the root message, so strip tokens off until we match + // appends might exist after the root message, so strip tokens off until we + // match while (truncatedMesg.length() > minMesgLength) { errorMsg = mesgToErrorMsgMap.get(truncatedMesg.trim()); if (errorMsg != null) { - if (errorMsg.getSQLState() != null) + if (errorMsg.getSQLState() != null) { return errorMsg.getSQLState(); - else + } else { return GENERIC_ERROR.getSQLState(); + } } int lastSpace = truncatedMesg.lastIndexOf(SPACE); - if (lastSpace == -1) break; + if (lastSpace == -1) { + break; + } // hack off the last word and try again truncatedMesg = truncatedMesg.substring(0, lastSpace).trim(); @@ -197,7 +201,7 @@ } ErrorMsg(String mesg) { - //42000 is the generic SQLState for syntax error. + // 42000 is the generic SQLState for syntax error. this(mesg, "42000"); } @@ -211,7 +215,7 @@ return tree.getToken().getLine(); } - return getLine((ASTNode)tree.getChild(0)); + return getLine((ASTNode) tree.getChild(0)); } private static int getCharPositionInLine(ASTNode tree) { @@ -219,16 +223,17 @@ return tree.getToken().getCharPositionInLine(); } - return getCharPositionInLine((ASTNode)tree.getChild(0)); + return getCharPositionInLine((ASTNode) tree.getChild(0)); } - // Dirty hack as this will throw away spaces and other things - find a better way! + // Dirty hack as this will throw away spaces and other things - find a better + // way! private String getText(ASTNode tree) { if (tree.getChildCount() == 0) { return tree.getText(); } - return getText((ASTNode)tree.getChild(tree.getChildCount() - 1)); + return getText((ASTNode) tree.getChild(tree.getChildCount() - 1)); } public String getMsg(ASTNode tree) { @@ -269,7 +274,7 @@ } String getMsg(Tree tree) { - return getMsg((ASTNode)tree); + return getMsg((ASTNode) tree); } String getMsg(ASTNode tree, String reason) { @@ -277,7 +282,7 @@ } String getMsg(Tree tree, String reason) { - return getMsg((ASTNode)tree, reason); + return getMsg((ASTNode) tree, reason); } public String getMsg(String reason) { Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java (working copy) @@ -36,14 +36,20 @@ commandType.put(HiveParser.TOK_DESCFUNCTION, "DESCFUNCTION"); commandType.put(HiveParser.TOK_MSCK, "MSCK"); commandType.put(HiveParser.TOK_ALTERTABLE_ADDCOLS, "ALTERTABLE_ADDCOLS"); - commandType.put(HiveParser.TOK_ALTERTABLE_REPLACECOLS, "ALTERTABLE_REPLACECOLS"); - commandType.put(HiveParser.TOK_ALTERTABLE_RENAMECOL, "ALTERTABLE_RENAMECOL"); + commandType.put(HiveParser.TOK_ALTERTABLE_REPLACECOLS, + "ALTERTABLE_REPLACECOLS"); + commandType + .put(HiveParser.TOK_ALTERTABLE_RENAMECOL, "ALTERTABLE_RENAMECOL"); commandType.put(HiveParser.TOK_ALTERTABLE_RENAME, "ALTERTABLE_RENAME"); - commandType.put(HiveParser.TOK_ALTERTABLE_DROPPARTS, "ALTERTABLE_DROPPARTS"); + commandType + .put(HiveParser.TOK_ALTERTABLE_DROPPARTS, "ALTERTABLE_DROPPARTS"); commandType.put(HiveParser.TOK_ALTERTABLE_ADDPARTS, "ALTERTABLE_ADDPARTS"); - commandType.put(HiveParser.TOK_ALTERTABLE_PROPERTIES, "ALTERTABLE_PROPERTIES"); - commandType.put(HiveParser.TOK_ALTERTABLE_SERIALIZER, "ALTERTABLE_SERIALIZER"); - commandType.put(HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES, "ALTERTABLE_SERDEPROPERTIES"); + commandType.put(HiveParser.TOK_ALTERTABLE_PROPERTIES, + "ALTERTABLE_PROPERTIES"); + commandType.put(HiveParser.TOK_ALTERTABLE_SERIALIZER, + "ALTERTABLE_SERIALIZER"); + commandType.put(HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES, + "ALTERTABLE_SERDEPROPERTIES"); commandType.put(HiveParser.TOK_SHOWTABLES, "SHOWTABLES"); commandType.put(HiveParser.TOK_SHOW_TABLESTATUS, "SHOW_TABLESTATUS"); commandType.put(HiveParser.TOK_SHOWFUNCTIONS, "SHOWFUNCTIONS"); @@ -55,18 +61,23 @@ commandType.put(HiveParser.TOK_QUERY, "QUERY"); } - public static BaseSemanticAnalyzer get(HiveConf conf, ASTNode tree) throws SemanticException { - if(tree.getToken() == null) { - throw new RuntimeException ("Empty Syntax Tree"); + public static BaseSemanticAnalyzer get(HiveConf conf, ASTNode tree) + throws SemanticException { + if (tree.getToken() == null) { + throw new RuntimeException("Empty Syntax Tree"); } else { - if (SessionState.get() != null) - SessionState.get().setCommandType(commandType.get(tree.getToken().getType())); + if (SessionState.get() != null) { + SessionState.get().setCommandType( + commandType.get(tree.getToken().getType())); + } switch (tree.getToken().getType()) { - case HiveParser.TOK_EXPLAIN: return new ExplainSemanticAnalyzer(conf); - case HiveParser.TOK_LOAD: return new LoadSemanticAnalyzer(conf); - case HiveParser.TOK_DROPTABLE: - case HiveParser.TOK_DROPVIEW: + case HiveParser.TOK_EXPLAIN: + return new ExplainSemanticAnalyzer(conf); + case HiveParser.TOK_LOAD: + return new LoadSemanticAnalyzer(conf); + case HiveParser.TOK_DROPTABLE: + case HiveParser.TOK_DROPVIEW: case HiveParser.TOK_DESCTABLE: case HiveParser.TOK_DESCFUNCTION: case HiveParser.TOK_MSCK: @@ -89,7 +100,8 @@ case HiveParser.TOK_CREATEFUNCTION: case HiveParser.TOK_DROPFUNCTION: return new FunctionSemanticAnalyzer(conf); - default: return new SemanticAnalyzer(conf); + default: + return new SemanticAnalyzer(conf); } } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java (working copy) @@ -18,26 +18,24 @@ package org.apache.hadoop.hive.ql.parse; -import java.util.*; +import java.util.HashMap; +import java.util.Set; -import org.apache.hadoop.hive.ql.parse.QBParseInfo; -import org.apache.hadoop.hive.ql.parse.QBMetaData; -import org.apache.hadoop.hive.ql.plan.createTableDesc; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.plan.createTableDesc; /** * Implementation of the query block - * + * **/ public class QB { private static final Log LOG = LogFactory.getLog("hive.ql.parse.QB"); - private int numJoins = 0; - private int numGbys = 0; + private final int numJoins = 0; + private final int numGbys = 0; private int numSels = 0; private int numSelDi = 0; private HashMap aliasToTabs; @@ -47,21 +45,22 @@ private QBJoinTree qbjoin; private String id; private boolean isQuery; - private createTableDesc tblDesc = null; // table descriptor of the final results + private createTableDesc tblDesc = null; // table descriptor of the final + // results public void print(String msg) { LOG.info(msg + "alias=" + qbp.getAlias()); - for(String alias: getSubqAliases()) { + for (String alias : getSubqAliases()) { QBExpr qbexpr = getSubqForAlias(alias); - LOG.info(msg+"start subquery " + alias); - qbexpr.print(msg+" "); - LOG.info(msg+"end subquery " + alias); + LOG.info(msg + "start subquery " + alias); + qbexpr.print(msg + " "); + LOG.info(msg + "end subquery " + alias); } } - public QB() { + public QB() { } - + public QB(String outer_id, String alias, boolean isSubQ) { aliasToTabs = new HashMap(); aliasToSubq = new HashMap(); @@ -70,7 +69,7 @@ } qbp = new QBParseInfo(alias, isSubQ); qbm = new QBMetaData(); - this.id = (outer_id == null ? alias : outer_id + ":" + alias); + id = (outer_id == null ? alias : outer_id + ":" + alias); } public QBParseInfo getParseInfo() { @@ -95,8 +94,9 @@ public boolean exists(String alias) { alias = alias.toLowerCase(); - if (aliasToTabs.get(alias) != null || aliasToSubq.get(alias) != null) + if (aliasToTabs.get(alias) != null || aliasToSubq.get(alias) != null) { return true; + } return false; } @@ -171,7 +171,7 @@ public boolean isSelectStarQuery() { return qbp.isSelectStarQuery() && aliasToSubq.isEmpty() && !isCTAS(); } - + public createTableDesc getTableDesc() { return tblDesc; } @@ -179,7 +179,7 @@ public void setTableDesc(createTableDesc desc) { tblDesc = desc; } - + /** * Whether this QB is for a CREATE-TABLE-AS-SELECT. */ Index: ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java (working copy) @@ -18,14 +18,16 @@ package org.apache.hadoop.hive.ql.parse; -import java.util.*; -import org.apache.hadoop.hive.ql.metadata.*; +import java.util.HashMap; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; /** * Implementation of the metadata information related to a query block - * + * **/ public class QBMetaData { @@ -37,25 +39,27 @@ public static final int DEST_REDUCE = 4; public static final int DEST_LOCAL_FILE = 5; - private HashMap aliasToTable; - private HashMap nameToDestTable; - private HashMap nameToDestPartition; - private HashMap nameToDestFile; - private HashMap nameToDestType; + private final HashMap aliasToTable; + private final HashMap nameToDestTable; + private final HashMap nameToDestPartition; + private final HashMap nameToDestFile; + private final HashMap nameToDestType; @SuppressWarnings("unused") private static final Log LOG = LogFactory.getLog(QBMetaData.class.getName()); - + public QBMetaData() { - this.aliasToTable = new HashMap(); - this.nameToDestTable = new HashMap(); - this.nameToDestPartition = new HashMap(); - this.nameToDestFile = new HashMap(); - this.nameToDestType = new HashMap(); + aliasToTable = new HashMap(); + nameToDestTable = new HashMap(); + nameToDestPartition = new HashMap(); + nameToDestFile = new HashMap(); + nameToDestType = new HashMap(); } - // All getXXX needs toLowerCase() because they are directly called from SemanticAnalyzer - // All setXXX does not need it because they are called from QB which already lowercases + // All getXXX needs toLowerCase() because they are directly called from + // SemanticAnalyzer + // All setXXX does not need it because they are called from QB which already + // lowercases // the aliases. public HashMap getAliasToTable() { @@ -63,46 +67,46 @@ } public Table getTableForAlias(String alias) { - return this.aliasToTable.get(alias.toLowerCase()); + return aliasToTable.get(alias.toLowerCase()); } public void setSrcForAlias(String alias, Table tab) { - this.aliasToTable.put(alias, tab); + aliasToTable.put(alias, tab); } public void setDestForAlias(String alias, Table tab) { - this.nameToDestType.put(alias, Integer.valueOf(DEST_TABLE)); - this.nameToDestTable.put(alias, tab); + nameToDestType.put(alias, Integer.valueOf(DEST_TABLE)); + nameToDestTable.put(alias, tab); } public void setDestForAlias(String alias, Partition part) { - this.nameToDestType.put(alias, Integer.valueOf(DEST_PARTITION)); - this.nameToDestPartition.put(alias, part); + nameToDestType.put(alias, Integer.valueOf(DEST_PARTITION)); + nameToDestPartition.put(alias, part); } public void setDestForAlias(String alias, String fname, boolean isDfsFile) { - this.nameToDestType.put(alias, - isDfsFile ? Integer.valueOf(DEST_DFS_FILE) : Integer.valueOf(DEST_LOCAL_FILE)); - this.nameToDestFile.put(alias, fname); + nameToDestType.put(alias, isDfsFile ? Integer.valueOf(DEST_DFS_FILE) + : Integer.valueOf(DEST_LOCAL_FILE)); + nameToDestFile.put(alias, fname); } public Integer getDestTypeForAlias(String alias) { - return this.nameToDestType.get(alias.toLowerCase()); + return nameToDestType.get(alias.toLowerCase()); } public Table getDestTableForAlias(String alias) { - return this.nameToDestTable.get(alias.toLowerCase()); + return nameToDestTable.get(alias.toLowerCase()); } public Partition getDestPartitionForAlias(String alias) { - return this.nameToDestPartition.get(alias.toLowerCase()); + return nameToDestPartition.get(alias.toLowerCase()); } public String getDestFileForAlias(String alias) { - return this.nameToDestFile.get(alias.toLowerCase()); + return nameToDestFile.get(alias.toLowerCase()); } public Table getSrcForAlias(String alias) { - return this.aliasToTable.get(alias.toLowerCase()); + return aliasToTable.get(alias.toLowerCase()); } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (working copy) @@ -18,30 +18,34 @@ package org.apache.hadoop.hive.ql.parse; -import java.util.*; -import java.io.File; -import java.io.IOException; import java.io.Serializable; import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.metadata.*; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; -import org.apache.hadoop.hive.ql.metadata.Partition; - import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; - -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.InvalidTableException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; import org.apache.hadoop.mapred.SequenceFileInputFormat; @@ -69,12 +73,18 @@ */ protected Set outputs; - protected static final String TEXTFILE_INPUT = TextInputFormat.class.getName(); - protected static final String TEXTFILE_OUTPUT = IgnoreKeyTextOutputFormat.class.getName(); - protected static final String SEQUENCEFILE_INPUT = SequenceFileInputFormat.class.getName(); - protected static final String SEQUENCEFILE_OUTPUT = SequenceFileOutputFormat.class.getName(); - protected static final String RCFILE_INPUT = RCFileInputFormat.class.getName(); - protected static final String RCFILE_OUTPUT = RCFileOutputFormat.class.getName(); + protected static final String TEXTFILE_INPUT = TextInputFormat.class + .getName(); + protected static final String TEXTFILE_OUTPUT = IgnoreKeyTextOutputFormat.class + .getName(); + protected static final String SEQUENCEFILE_INPUT = SequenceFileInputFormat.class + .getName(); + protected static final String SEQUENCEFILE_OUTPUT = SequenceFileOutputFormat.class + .getName(); + protected static final String RCFILE_INPUT = RCFileInputFormat.class + .getName(); + protected static final String RCFILE_OUTPUT = RCFileOutputFormat.class + .getName(); protected static final String COLUMNAR_SERDE = ColumnarSerDe.class.getName(); public BaseSemanticAnalyzer(HiveConf conf) throws SemanticException { @@ -84,20 +94,18 @@ rootTasks = new ArrayList>(); LOG = LogFactory.getLog(this.getClass().getName()); console = new LogHelper(LOG); - this.idToTableNameMap = new HashMap(); + idToTableNameMap = new HashMap(); inputs = new LinkedHashSet(); outputs = new LinkedHashSet(); } catch (Exception e) { - throw new SemanticException (e); + throw new SemanticException(e); } } - public HashMap getIdToTableNameMap() { return idToTableNameMap; } - public abstract void analyzeInternal(ASTNode ast) throws SemanticException; public void analyze(ASTNode ast, Context ctx) throws SemanticException { @@ -121,7 +129,8 @@ } /** - * @param fetchTask the fetchTask to set + * @param fetchTask + * the fetchTask to set */ public void setFetchTask(Task fetchTask) { this.fetchTask = fetchTask; @@ -148,42 +157,42 @@ } public static String charSetString(String charSetName, String charSetString) - throws SemanticException { - try + throws SemanticException { + try { + // The character set name starts with a _, so strip that + charSetName = charSetName.substring(1); + if (charSetString.charAt(0) == '\'') { + return new String(unescapeSQLString(charSetString).getBytes(), + charSetName); + } else // hex input is also supported { - // The character set name starts with a _, so strip that - charSetName = charSetName.substring(1); - if (charSetString.charAt(0) == '\'') - return new String(unescapeSQLString(charSetString).getBytes(), charSetName); - else // hex input is also supported - { - assert charSetString.charAt(0) == '0'; - assert charSetString.charAt(1) == 'x'; - charSetString = charSetString.substring(2); + assert charSetString.charAt(0) == '0'; + assert charSetString.charAt(1) == 'x'; + charSetString = charSetString.substring(2); - byte[] bArray = new byte[charSetString.length()/2]; - int j = 0; - for (int i = 0; i < charSetString.length(); i += 2) - { - int val = Character.digit(charSetString.charAt(i), 16) * 16 + Character.digit(charSetString.charAt(i+1), 16); - if (val > 127) - val = val - 256; - bArray[j++] = new Integer(val).byteValue(); - } + byte[] bArray = new byte[charSetString.length() / 2]; + int j = 0; + for (int i = 0; i < charSetString.length(); i += 2) { + int val = Character.digit(charSetString.charAt(i), 16) * 16 + + Character.digit(charSetString.charAt(i + 1), 16); + if (val > 127) { + val = val - 256; + } + bArray[j++] = new Integer(val).byteValue(); + } - String res = new String(bArray, charSetName); - return res; - } - } catch (UnsupportedEncodingException e) { + String res = new String(bArray, charSetName); + return res; + } + } catch (UnsupportedEncodingException e) { throw new SemanticException(e); } } /** - * Remove the encapsulating "`" pair from the identifier. - * We allow users to use "`" to escape identifier for table names, - * column names and aliases, in case that coincide with Hive language - * keywords. + * Remove the encapsulating "`" pair from the identifier. We allow users to + * use "`" to escape identifier for table names, column names and aliases, in + * case that coincide with Hive language keywords. */ public static String unescapeIdentifier(String val) { if (val == null) { @@ -196,7 +205,7 @@ } @SuppressWarnings("nls") - public static String unescapeSQLString(String b) { + public static String unescapeSQLString(String b) { Character enclosure = null; @@ -204,7 +213,7 @@ // delimiter can be passed in as \002 - So, we first check if the // string is a unicode number, else go back to the old behavior StringBuilder sb = new StringBuilder(b.length()); - for (int i=0; i < b.length(); i++) { + for (int i = 0; i < b.length(); i++) { char currentChar = b.charAt(i); if (enclosure == null) { @@ -220,40 +229,61 @@ continue; } - if (currentChar == '\\' && (i+4 < b.length())) { - char i1 = b.charAt(i+1); - char i2 = b.charAt(i+2); - char i3 = b.charAt(i+3); - if ((i1 >= '0' && i1 <= '1') && - (i2 >= '0' && i2 <= '7') && - (i3 >= '0' && i3 <= '7')) - { - byte bVal = (byte)((i3 - '0') + ((i2 - '0') * 8 ) + ((i1 - '0') * 8 * 8)); - byte[] bValArr = new byte[1]; - bValArr[0] = bVal; - String tmp = new String(bValArr); - sb.append(tmp); - i += 3; - continue; - } + if (currentChar == '\\' && (i + 4 < b.length())) { + char i1 = b.charAt(i + 1); + char i2 = b.charAt(i + 2); + char i3 = b.charAt(i + 3); + if ((i1 >= '0' && i1 <= '1') && (i2 >= '0' && i2 <= '7') + && (i3 >= '0' && i3 <= '7')) { + byte bVal = (byte) ((i3 - '0') + ((i2 - '0') * 8) + ((i1 - '0') * 8 * 8)); + byte[] bValArr = new byte[1]; + bValArr[0] = bVal; + String tmp = new String(bValArr); + sb.append(tmp); + i += 3; + continue; + } } - if (currentChar == '\\' && (i+2 < b.length())) { - char n=b.charAt(i+1); - switch(n) { - case '0': sb.append("\0"); break; - case '\'': sb.append("'"); break; - case '"': sb.append("\""); break; - case 'b': sb.append("\b"); break; - case 'n': sb.append("\n"); break; - case 'r': sb.append("\r"); break; - case 't': sb.append("\t"); break; - case 'Z': sb.append("\u001A"); break; - case '\\': sb.append("\\"); break; - // The following 2 lines are exactly what MySQL does - case '%': sb.append("\\%"); break; - case '_': sb.append("\\_"); break; - default: sb.append(n); + if (currentChar == '\\' && (i + 2 < b.length())) { + char n = b.charAt(i + 1); + switch (n) { + case '0': + sb.append("\0"); + break; + case '\'': + sb.append("'"); + break; + case '"': + sb.append("\""); + break; + case 'b': + sb.append("\b"); + break; + case 'n': + sb.append("\n"); + break; + case 'r': + sb.append("\r"); + break; + case 't': + sb.append("\t"); + break; + case 'Z': + sb.append("\u001A"); + break; + case '\\': + sb.append("\\"); + break; + // The following 2 lines are exactly what MySQL does + case '%': + sb.append("\\%"); + break; + case '_': + sb.append("\\_"); + break; + default: + sb.append(n); } i++; } else { @@ -272,114 +302,123 @@ } /** - * Get the list of FieldSchema out of the ASTNode. + * Get the list of FieldSchema out of the ASTNode. */ - protected List getColumns(ASTNode ast) throws SemanticException - { + protected List getColumns(ASTNode ast) throws SemanticException { List colList = new ArrayList(); int numCh = ast.getChildCount(); for (int i = 0; i < numCh; i++) { FieldSchema col = new FieldSchema(); - ASTNode child = (ASTNode)ast.getChild(i); - + ASTNode child = (ASTNode) ast.getChild(i); + // child 0 is the name of the column col.setName(unescapeIdentifier(child.getChild(0).getText())); // child 1 is the type of the column - ASTNode typeChild = (ASTNode)(child.getChild(1)); + ASTNode typeChild = (ASTNode) (child.getChild(1)); col.setType(getTypeStringFromAST(typeChild)); - + // child 2 is the optional comment of the column - if (child.getChildCount() == 3) + if (child.getChildCount() == 3) { col.setComment(unescapeSQLString(child.getChild(2).getText())); + } colList.add(col); } return colList; } - - protected List getColumnNames(ASTNode ast) - { + + protected List getColumnNames(ASTNode ast) { List colList = new ArrayList(); int numCh = ast.getChildCount(); for (int i = 0; i < numCh; i++) { - ASTNode child = (ASTNode)ast.getChild(i); + ASTNode child = (ASTNode) ast.getChild(i); colList.add(unescapeIdentifier(child.getText())); } return colList; } - - protected List getColumnNamesOrder(ASTNode ast) - { + + protected List getColumnNamesOrder(ASTNode ast) { List colList = new ArrayList(); int numCh = ast.getChildCount(); for (int i = 0; i < numCh; i++) { - ASTNode child = (ASTNode)ast.getChild(i); - if (child.getToken().getType() == HiveParser.TOK_TABSORTCOLNAMEASC) - colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()), 1)); - else - colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()), 0)); + ASTNode child = (ASTNode) ast.getChild(i); + if (child.getToken().getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { + colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()), + 1)); + } else { + colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()), + 0)); + } } return colList; } - - protected static String getTypeStringFromAST(ASTNode typeNode) throws SemanticException { + + protected static String getTypeStringFromAST(ASTNode typeNode) + throws SemanticException { switch (typeNode.getType()) { case HiveParser.TOK_LIST: return Constants.LIST_TYPE_NAME + "<" - + getTypeStringFromAST((ASTNode)typeNode.getChild(0)) + ">"; + + getTypeStringFromAST((ASTNode) typeNode.getChild(0)) + ">"; case HiveParser.TOK_MAP: return Constants.MAP_TYPE_NAME + "<" - + getTypeStringFromAST((ASTNode)typeNode.getChild(0)) + "," - + getTypeStringFromAST((ASTNode)typeNode.getChild(1)) + ">"; + + getTypeStringFromAST((ASTNode) typeNode.getChild(0)) + "," + + getTypeStringFromAST((ASTNode) typeNode.getChild(1)) + ">"; case HiveParser.TOK_STRUCT: return getStructTypeStringFromAST(typeNode); default: return DDLSemanticAnalyzer.getTypeName(typeNode.getType()); } } - + private static String getStructTypeStringFromAST(ASTNode typeNode) throws SemanticException { String typeStr = Constants.STRUCT_TYPE_NAME + "<"; typeNode = (ASTNode) typeNode.getChild(0); int children = typeNode.getChildCount(); - if(children <= 0) + if (children <= 0) { throw new SemanticException("empty struct not allowed."); + } for (int i = 0; i < children; i++) { ASTNode child = (ASTNode) typeNode.getChild(i); typeStr += unescapeIdentifier(child.getChild(0).getText()) + ":"; typeStr += getTypeStringFromAST((ASTNode) child.getChild(1)); - if (i < children - 1) + if (i < children - 1) { typeStr += ","; + } } - + typeStr += ">"; return typeStr; } - - + public static class tableSpec { public String tableName; public Table tableHandle; public HashMap partSpec; public Partition partHandle; - public tableSpec(Hive db, HiveConf conf, ASTNode ast) throws SemanticException { + public tableSpec(Hive db, HiveConf conf, ASTNode ast) + throws SemanticException { - assert(ast.getToken().getType() == HiveParser.TOK_TAB); + assert (ast.getToken().getType() == HiveParser.TOK_TAB); int childIndex = 0; try { // get table metadata tableName = unescapeIdentifier(ast.getChild(0).getText()); boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE); - if (testMode) - tableName = conf.getVar(HiveConf.ConfVars.HIVETESTMODEPREFIX) + tableName; + if (testMode) { + tableName = conf.getVar(HiveConf.ConfVars.HIVETESTMODEPREFIX) + + tableName; + } - tableHandle = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); + tableHandle = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, + tableName); } catch (InvalidTableException ite) { - throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(ast.getChild(0)), ite); + throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(ast + .getChild(0)), ite); } catch (HiveException e) { - throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg(ast.getChild(childIndex), e.getMessage()), e); + throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg(ast + .getChild(childIndex), e.getMessage()), e); } // get partition metadata if partition specified if (ast.getChildCount() == 2) { @@ -389,23 +428,26 @@ for (int i = 0; i < partspec.getChildCount(); ++i) { ASTNode partspec_val = (ASTNode) partspec.getChild(i); String val = stripQuotes(partspec_val.getChild(1).getText()); - partSpec.put(unescapeIdentifier(partspec_val.getChild(0).getText().toLowerCase()), val); + partSpec.put(unescapeIdentifier(partspec_val.getChild(0).getText() + .toLowerCase()), val); } try { // this doesn't create partition. partition is created in MoveTask partHandle = new Partition(tableHandle, partSpec, null); } catch (HiveException e) { - throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(ast.getChild(childIndex))); + throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(ast + .getChild(childIndex))); } } } - + @Override public String toString() { - if(partHandle != null) + if (partHandle != null) { return partHandle.toString(); - else + } else { return tableHandle.toString(); + } } } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNodeOrigin.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNodeOrigin.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNodeOrigin.java (working copy) @@ -20,20 +20,20 @@ /** * ASTNodeOrigin contains contextual information about the object from whose - * definition a particular ASTNode originated. For example, suppose a view v - * is defined as select x+1 as y from t, and we're processing a - * query select v1.y from v as v1, and there's a type-checking - * problem with the expression x+1 due to an ALTER TABLE on t - * subsequent to the creation of v. Then, when reporting the error, we want to - * provide the parser location with respect to the definition of v (rather than - * with respect to the top-level query, since that represents a completely - * different "parser coordinate system"). - * + * definition a particular ASTNode originated. For example, suppose a view v is + * defined as select x+1 as y from t, and we're processing a query + * select v1.y from v as v1, and there's a type-checking problem + * with the expression x+1 due to an ALTER TABLE on t subsequent to + * the creation of v. Then, when reporting the error, we want to provide the + * parser location with respect to the definition of v (rather than with respect + * to the top-level query, since that represents a completely different + * "parser coordinate system"). + * *

- * + * * So, when expanding the definition of v while analyzing the top-level query, - * we tag each ASTNode with a reference to an ASTNodeOrign describing v - * and its usage within the query. + * we tag each ASTNode with a reference to an ASTNodeOrign describing v and its + * usage within the query. */ public class ASTNodeOrigin { private final String objectType; @@ -42,12 +42,8 @@ private final String usageAlias; private final ASTNode usageNode; - public ASTNodeOrigin( - String objectType, - String objectName, - String objectDefinition, - String usageAlias, - ASTNode usageNode) { + public ASTNodeOrigin(String objectType, String objectName, + String objectDefinition, String usageAlias, ASTNode usageNode) { this.objectType = objectType; this.objectName = objectName; this.objectDefinition = objectDefinition; @@ -56,32 +52,31 @@ } /** - * @return the type of the object from which an ASTNode originated, - * e.g. "view". + * @return the type of the object from which an ASTNode originated, e.g. + * "view". */ public String getObjectType() { return objectType; } /** - * @return the name of the object from which an ASTNode originated, - * e.g. "v". + * @return the name of the object from which an ASTNode originated, e.g. "v". */ public String getObjectName() { return objectName; } /** - * @return the definition of the object from which an ASTNode originated, - * e.g. select x+1 as y from t. + * @return the definition of the object from which an ASTNode originated, e.g. + * select x+1 as y from t. */ public String getObjectDefinition() { return objectDefinition; } /** - * @return the alias of the object from which an ASTNode originated, - * e.g. "v1" (this can help with debugging context-dependent expansions) + * @return the alias of the object from which an ASTNode originated, e.g. "v1" + * (this can help with debugging context-dependent expansions) */ public String getUsageAlias() { return usageAlias; @@ -89,8 +84,8 @@ /** * @return the expression node triggering usage of an object from which an - * ASTNode originated, e.g. v as v1 (this can help with - * debugging context-dependent expansions) + * ASTNode originated, e.g. v as v1 (this can help with + * debugging context-dependent expansions) */ public ASTNode getUsageNode() { return usageNode; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -79,7 +79,6 @@ import org.apache.hadoop.hive.ql.metadata.InvalidTableException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory; import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1; import org.apache.hadoop.hive.ql.optimizer.GenMROperator; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext; @@ -93,7 +92,6 @@ import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory; import org.apache.hadoop.hive.ql.optimizer.Optimizer; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; -import org.apache.hadoop.hive.ql.optimizer.physical.GenMRSkewJoinProcessor; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; @@ -113,7 +111,6 @@ import org.apache.hadoop.hive.ql.plan.fetchWork; import org.apache.hadoop.hive.ql.plan.fileSinkDesc; import org.apache.hadoop.hive.ql.plan.filterDesc; -import org.apache.hadoop.hive.ql.plan.filterDesc.sampleDesc; import org.apache.hadoop.hive.ql.plan.forwardDesc; import org.apache.hadoop.hive.ql.plan.groupByDesc; import org.apache.hadoop.hive.ql.plan.joinDesc; @@ -131,6 +128,7 @@ import org.apache.hadoop.hive.ql.plan.tableScanDesc; import org.apache.hadoop.hive.ql.plan.udtfDesc; import org.apache.hadoop.hive.ql.plan.unionDesc; +import org.apache.hadoop.hive.ql.plan.filterDesc.sampleDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; @@ -164,7 +162,7 @@ private List loadTableWork; private List loadFileWork; private Map joinContext; - private HashMap topToTable; + private final HashMap topToTable; private QB qb; private ASTNode ast; private int destTableId; @@ -175,7 +173,7 @@ Map prunedPartitions; private createViewDesc createVwDesc; private ASTNode viewSelect; - private UnparseTranslator unparseTranslator; + private final UnparseTranslator unparseTranslator; private static class Phase1Ctx { String dest; @@ -186,39 +184,39 @@ super(conf); - this.opToPartPruner = new HashMap(); - this.opToSamplePruner = new HashMap(); - this.topOps = new HashMap>(); - this.topSelOps = new HashMap>(); - this.loadTableWork = new ArrayList(); - this.loadFileWork = new ArrayList(); + opToPartPruner = new HashMap(); + opToSamplePruner = new HashMap(); + topOps = new HashMap>(); + topSelOps = new HashMap>(); + loadTableWork = new ArrayList(); + loadFileWork = new ArrayList(); opParseCtx = new LinkedHashMap, OpParseContext>(); joinContext = new HashMap(); topToTable = new HashMap(); - this.destTableId = 1; - this.uCtx = null; - this.listMapJoinOpsNoReducer = new ArrayList(); - this.groupOpToInputTables = new HashMap>(); - prunedPartitions = new HashMap (); + destTableId = 1; + uCtx = null; + listMapJoinOpsNoReducer = new ArrayList(); + groupOpToInputTables = new HashMap>(); + prunedPartitions = new HashMap(); unparseTranslator = new UnparseTranslator(); } @Override protected void reset() { super.reset(); - this.loadTableWork.clear(); - this.loadFileWork.clear(); - this.topOps.clear(); - this.topSelOps.clear(); - this.destTableId = 1; - this.idToTableNameMap.clear(); + loadTableWork.clear(); + loadFileWork.clear(); + topOps.clear(); + topSelOps.clear(); + destTableId = 1; + idToTableNameMap.clear(); qb = null; ast = null; uCtx = null; - this.joinContext.clear(); - this.opParseCtx.clear(); - this.groupOpToInputTables.clear(); - this.prunedPartitions.clear(); + joinContext.clear(); + opParseCtx.clear(); + groupOpToInputTables.clear(); + prunedPartitions.clear(); } public void init(ParseContext pctx) { @@ -233,24 +231,23 @@ ctx = pctx.getContext(); destTableId = pctx.getDestTableId(); idToTableNameMap = pctx.getIdToTableNameMap(); - this.uCtx = pctx.getUCtx(); - this.listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer(); + uCtx = pctx.getUCtx(); + listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer(); qb = pctx.getQB(); - this.groupOpToInputTables = pctx.getGroupOpToInputTables(); - this.prunedPartitions = pctx.getPrunedPartitions(); + groupOpToInputTables = pctx.getGroupOpToInputTables(); + prunedPartitions = pctx.getPrunedPartitions(); } public ParseContext getParseContext() { - return new ParseContext(conf, qb, ast, opToPartPruner, topOps, - topSelOps, opParseCtx, joinContext, topToTable, loadTableWork, - loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, - listMapJoinOpsNoReducer, - groupOpToInputTables, prunedPartitions, opToSamplePruner); + return new ParseContext(conf, qb, ast, opToPartPruner, topOps, topSelOps, + opParseCtx, joinContext, topToTable, loadTableWork, loadFileWork, ctx, + idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, + groupOpToInputTables, prunedPartitions, opToSamplePruner); } @SuppressWarnings("nls") - public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, - String alias) throws SemanticException { + public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias) + throws SemanticException { assert (ast.getToken() != null); switch (ast.getToken().getType()) { @@ -296,7 +293,7 @@ /** * DFS-scan the expressionTree to find all aggregation subtrees and put them * in aggregations. - * + * * @param expressionTree * @param aggregations * the key to the HashTable is the toStringTree() representation of @@ -308,13 +305,14 @@ || expressionTree.getToken().getType() == HiveParser.TOK_FUNCTIONDI) { assert (expressionTree.getChildCount() != 0); if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { - String functionName = unescapeIdentifier(expressionTree.getChild(0).getText()); + String functionName = unescapeIdentifier(expressionTree.getChild(0) + .getText()); if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) { aggregations.put(expressionTree.toStringTree(), expressionTree); FunctionInfo fi = FunctionRegistry.getFunctionInfo(functionName); if (!fi.isNative()) { - unparseTranslator.addIdentifierTranslation( - (ASTNode) expressionTree.getChild(0)); + unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree + .getChild(0)); } return; } @@ -336,7 +334,8 @@ if (expr == null) { expr = value; } else { - throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.getMsg()); + throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS + .getMsg()); } } } @@ -347,7 +346,7 @@ * Goes though the tabref tree and finds the alias for the table. Once found, * it records the table name-> alias association in aliasToTabs. It also makes * an association from the alias to the table AST in parse info. - * + * * @return the alias of the table */ private String processTable(QB qb, ASTNode tabref) throws SemanticException { @@ -360,47 +359,48 @@ // tablename tablesample // OR // tablename alias - ASTNode ct = (ASTNode)tabref.getChild(1); + ASTNode ct = (ASTNode) tabref.getChild(1); if (ct.getToken().getType() == HiveParser.TOK_TABLESAMPLE) { tableSamplePresent = true; - } - else { + } else { aliasIndex = 1; } - } - else if (tabref.getChildCount() == 3) { + } else if (tabref.getChildCount() == 3) { // table name table sample alias aliasIndex = 2; tableSamplePresent = true; } - ASTNode tableTree = (ASTNode)(tabref.getChild(0)); + ASTNode tableTree = (ASTNode) (tabref.getChild(0)); String alias = unescapeIdentifier(tabref.getChild(aliasIndex).getText()); // If the alias is already there then we have a conflict if (qb.exists(alias)) { - throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref.getChild(aliasIndex))); + throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref + .getChild(aliasIndex))); } if (tableSamplePresent) { - ASTNode sampleClause = (ASTNode)tabref.getChild(1); + ASTNode sampleClause = (ASTNode) tabref.getChild(1); ArrayList sampleCols = new ArrayList(); if (sampleClause.getChildCount() > 2) { for (int i = 2; i < sampleClause.getChildCount(); i++) { - sampleCols.add((ASTNode)sampleClause.getChild(i)); + sampleCols.add((ASTNode) sampleClause.getChild(i)); } } // TODO: For now only support sampling on up to two columns // Need to change it to list of columns if (sampleCols.size() > 2) { - throw new SemanticException(ErrorMsg.SAMPLE_RESTRICTION.getMsg(tabref.getChild(0))); + throw new SemanticException(ErrorMsg.SAMPLE_RESTRICTION.getMsg(tabref + .getChild(0))); } - qb.getParseInfo().setTabSample(alias, new TableSample( - unescapeIdentifier(sampleClause.getChild(0).getText()), - unescapeIdentifier(sampleClause.getChild(1).getText()), - sampleCols) - ); + qb.getParseInfo().setTabSample( + alias, + new TableSample( + unescapeIdentifier(sampleClause.getChild(0).getText()), + unescapeIdentifier(sampleClause.getChild(1).getText()), + sampleCols)); if (unparseTranslator.isEnabled()) { for (ASTNode sampleCol : sampleCols) { - unparseTranslator.addIdentifierTranslation( - (ASTNode) sampleCol.getChild(0)); + unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol + .getChild(0)); } } } @@ -412,10 +412,10 @@ unparseTranslator.addIdentifierTranslation(tableTree); if (aliasIndex != 0) { - unparseTranslator.addIdentifierTranslation( - (ASTNode) tabref.getChild(aliasIndex)); + unparseTranslator.addIdentifierTranslation((ASTNode) tabref + .getChild(aliasIndex)); } - + return alias; } @@ -435,26 +435,26 @@ // If the alias is already there then we have a conflict if (qb.exists(alias)) { - throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq.getChild(1))); + throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq + .getChild(1))); } // Insert this map into the stats qb.setSubqAlias(alias, qbexpr); - unparseTranslator.addIdentifierTranslation( - (ASTNode) subq.getChild(1)); + unparseTranslator.addIdentifierTranslation((ASTNode) subq.getChild(1)); return alias; } - private boolean isJoinToken(ASTNode node) - { - if ((node.getToken().getType() == HiveParser.TOK_JOIN) || - (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) || - (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) || - (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN) || - (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN) || - (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) + private boolean isJoinToken(ASTNode node) { + if ((node.getToken().getType() == HiveParser.TOK_JOIN) + || (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) + || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) + || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN) + || (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN) + || (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) { return true; + } return false; } @@ -462,7 +462,7 @@ /** * Given the AST with TOK_JOIN as the root, get all the aliases for the tables * or subqueries in the join. - * + * * @param qb * @param join * @throws SemanticException @@ -471,8 +471,9 @@ private void processJoin(QB qb, ASTNode join) throws SemanticException { int numChildren = join.getChildCount(); if ((numChildren != 2) && (numChildren != 3) - && join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) + && join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) { throw new SemanticException("Join with multiple children"); + } for (int num = 0; num < numChildren; num++) { ASTNode child = (ASTNode) join.getChild(num); @@ -485,8 +486,8 @@ // is not supported. Instead, the lateral view must be in a subquery // SELECT * FROM (SELECT * FROM src1 LATERAL VIEW udtf() AS myTable) a // JOIN src2 ... - throw new - SemanticException(ErrorMsg.LATERAL_VIEW_WITH_JOIN.getMsg(join)); + throw new SemanticException(ErrorMsg.LATERAL_VIEW_WITH_JOIN + .getMsg(join)); } else if (isJoinToken(child)) { processJoin(qb, child); } @@ -497,7 +498,7 @@ * Given the AST with TOK_LATERAL_VIEW as the root, get the alias for the * table or subquery in the lateral view and also make a mapping from the * alias to all the lateral view AST's - * + * * @param qb * @param lateralView * @return the alias for the table/subquery @@ -505,15 +506,15 @@ */ private String processLateralView(QB qb, ASTNode lateralView) - throws SemanticException { + throws SemanticException { int numChildren = lateralView.getChildCount(); - assert(numChildren == 2); + assert (numChildren == 2); ASTNode next = (ASTNode) lateralView.getChild(1); String alias = null; - switch(next.getToken().getType()) { + switch (next.getToken().getType()) { case HiveParser.TOK_TABREF: alias = processTable(qb, next); break; @@ -524,8 +525,8 @@ alias = processLateralView(qb, next); break; default: - throw new SemanticException( - ErrorMsg.LATERAL_VIEW_INVALID_CHILD.getMsg(lateralView)); + throw new SemanticException(ErrorMsg.LATERAL_VIEW_INVALID_CHILD + .getMsg(lateralView)); } qb.getParseInfo().addLateralViewForAlias(alias, lateralView); return alias; @@ -533,23 +534,21 @@ /** * Phase 1: (including, but not limited to): - * + * * 1. Gets all the aliases for all the tables / subqueries and makes the - * appropriate mapping in aliasToTabs, aliasToSubq - * 2. Gets the location of the destination and names the clase "inclause" + i - * 3. Creates a map from a string representation of an aggregation tree to the - * actual aggregation AST + * appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the + * destination and names the clase "inclause" + i 3. Creates a map from a + * string representation of an aggregation tree to the actual aggregation AST * 4. Creates a mapping from the clause name to the select expression AST in - * destToSelExpr - * 5. Creates a mapping from a table alias to the lateral view AST's in - * aliasToLateralViews - * + * destToSelExpr 5. Creates a mapping from a table alias to the lateral view + * AST's in aliasToLateralViews + * * @param ast * @param qb * @param ctx_1 * @throws SemanticException */ - @SuppressWarnings({"fallthrough", "nls"}) + @SuppressWarnings( { "fallthrough", "nls" }) public void doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1) throws SemanticException { @@ -566,8 +565,9 @@ qb.countSel(); qbp.setSelExprForClause(ctx_1.dest, ast); - if (((ASTNode)ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) - qbp.setHints((ASTNode)ast.getChild(0)); + if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) { + qbp.setHints((ASTNode) ast.getChild(0)); + } LinkedHashMap aggregations = doPhase1GetAggregationsFromSelect(ast); qbp.setAggregationExprsForClause(ctx_1.dest, aggregations); @@ -585,10 +585,12 @@ // is there a insert in the subquery if (qbp.getIsSubQ()) { - ASTNode ch = (ASTNode)ast.getChild(0); - if ((ch.getToken().getType() != HiveParser.TOK_DIR) || - (((ASTNode)ch.getChild(0)).getToken().getType() != HiveParser.TOK_TMP_FILE)) - throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast)); + ASTNode ch = (ASTNode) ast.getChild(0); + if ((ch.getToken().getType() != HiveParser.TOK_DIR) + || (((ASTNode) ch.getChild(0)).getToken().getType() != HiveParser.TOK_TMP_FILE)) { + throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY + .getMsg(ast)); + } } qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0)); @@ -596,8 +598,9 @@ case HiveParser.TOK_FROM: int child_count = ast.getChildCount(); - if (child_count != 1) + if (child_count != 1) { throw new SemanticException("Multiple Children " + child_count); + } // Check if this is a subquery / lateral view ASTNode frm = (ASTNode) ast.getChild(0); @@ -620,15 +623,17 @@ break; case HiveParser.TOK_DISTRIBUTEBY: - // Get the distribute by aliases - these are aliased to the entries in the + // Get the distribute by aliases - these are aliased to the entries in + // the // select list qbp.setDistributeByExprForClause(ctx_1.dest, ast); if (qbp.getClusterByForClause(ctx_1.dest) != null) { - throw new SemanticException(ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg(ast)); + throw new SemanticException(ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT + .getMsg(ast)); + } else if (qbp.getOrderByForClause(ctx_1.dest) != null) { + throw new SemanticException(ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT + .getMsg(ast)); } - else if (qbp.getOrderByForClause(ctx_1.dest) != null) { - throw new SemanticException(ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg(ast)); - } break; case HiveParser.TOK_SORTBY: @@ -636,11 +641,12 @@ // select list qbp.setSortByExprForClause(ctx_1.dest, ast); if (qbp.getClusterByForClause(ctx_1.dest) != null) { - throw new SemanticException(ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg(ast)); + throw new SemanticException(ErrorMsg.CLUSTERBY_SORTBY_CONFLICT + .getMsg(ast)); + } else if (qbp.getOrderByForClause(ctx_1.dest) != null) { + throw new SemanticException(ErrorMsg.ORDERBY_SORTBY_CONFLICT + .getMsg(ast)); } - else if (qbp.getOrderByForClause(ctx_1.dest) != null) { - throw new SemanticException(ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg(ast)); - } break; @@ -649,7 +655,8 @@ // select list qbp.setOrderByExprForClause(ctx_1.dest, ast); if (qbp.getClusterByForClause(ctx_1.dest) != null) { - throw new SemanticException(ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg(ast)); + throw new SemanticException(ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT + .getMsg(ast)); } break; @@ -657,7 +664,8 @@ // Get the groupby aliases - these are aliased to the entries in the // select list if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) { - throw new SemanticException(ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg(ast)); + throw new SemanticException(ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY + .getMsg(ast)); } qbp.setGroupByExprForClause(ctx_1.dest, ast); skipRecursion = true; @@ -668,10 +676,12 @@ break; case HiveParser.TOK_UNION: - // currently, we dont support subq1 union subq2 - the user has to explicitly say: + // currently, we dont support subq1 union subq2 - the user has to + // explicitly say: // select * from (subq1 union subq2) subqalias - if (!qbp.getIsSubQ()) + if (!qbp.getIsSubQ()) { throw new SemanticException(ErrorMsg.UNION_NOTIN_SUBQ.getMsg()); + } default: skipRecursion = false; @@ -713,18 +723,20 @@ String tab_name = qb.getTabNameForAlias(alias); Table tab = null; try { - tab = this.db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tab_name); + tab = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tab_name); + } catch (InvalidTableException ite) { + throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(qb + .getParseInfo().getSrcForAlias(alias))); } - catch (InvalidTableException ite) { - throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(qb.getParseInfo().getSrcForAlias(alias))); - } if (tab.isView()) { replaceViewReferenceWithDefinition(qb, tab, tab_name, alias); continue; } - if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) - throw new SemanticException(ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg(qb.getParseInfo().getSrcForAlias(alias))); + if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) { + throw new SemanticException(ErrorMsg.INVALID_INPUT_FORMAT_TYPE + .getMsg(qb.getParseInfo().getSrcForAlias(alias))); + } qb.getMetaData().setSrcForAlias(alias, tab); } @@ -745,15 +757,18 @@ ASTNode ast = qbp.getDestForClause(name); switch (ast.getToken().getType()) { case HiveParser.TOK_TAB: { - tableSpec ts = new tableSpec(this.db, conf, ast); + tableSpec ts = new tableSpec(db, conf, ast); if (ts.tableHandle.isView()) { throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg()); } - if (!HiveOutputFormat.class.isAssignableFrom(ts.tableHandle.getOutputFormatClass())) - throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg(ast)); + if (!HiveOutputFormat.class.isAssignableFrom(ts.tableHandle + .getOutputFormatClass())) { + throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE + .getMsg(ast)); + } - if(ts.partSpec == null) { + if (ts.partSpec == null) { // This is a table qb.getMetaData().setDestForAlias(name, ts.tableHandle); } else { @@ -763,49 +778,45 @@ break; } case HiveParser.TOK_LOCAL_DIR: - case HiveParser.TOK_DIR: - { - // This is a dfs file - String fname = stripQuotes(ast.getChild(0).getText()); - if ((!qb.getParseInfo().getIsSubQ()) && - (((ASTNode)ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) - { - fname = ctx.getMRTmpFileURI(); - ctx.setResDir(new Path(fname)); + case HiveParser.TOK_DIR: { + // This is a dfs file + String fname = stripQuotes(ast.getChild(0).getText()); + if ((!qb.getParseInfo().getIsSubQ()) + && (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) { + fname = ctx.getMRTmpFileURI(); + ctx.setResDir(new Path(fname)); - if ( qb.isCTAS() ) { - qb.setIsQuery(false); - } else { - qb.setIsQuery(true); - } + if (qb.isCTAS()) { + qb.setIsQuery(false); + } else { + qb.setIsQuery(true); } - qb.getMetaData().setDestForAlias(name, fname, - (ast.getToken().getType() == HiveParser.TOK_DIR)); - break; } + qb.getMetaData().setDestForAlias(name, fname, + (ast.getToken().getType() == HiveParser.TOK_DIR)); + break; + } default: - throw new SemanticException("Unknown Token Type " + ast.getToken().getType()); + throw new SemanticException("Unknown Token Type " + + ast.getToken().getType()); } } } catch (HiveException e) { - // Has to use full name to make sure it does not conflict with org.apache.commons.lang.StringUtils + // Has to use full name to make sure it does not conflict with + // org.apache.commons.lang.StringUtils LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); } } - private void replaceViewReferenceWithDefinition( - QB qb, Table tab, String tab_name, String alias) throws SemanticException { + private void replaceViewReferenceWithDefinition(QB qb, Table tab, + String tab_name, String alias) throws SemanticException { ParseDriver pd = new ParseDriver(); ASTNode viewTree; - final ASTNodeOrigin viewOrigin = - new ASTNodeOrigin( - "VIEW", - tab.getName(), - tab.getViewExpandedText(), - alias, - qb.getParseInfo().getSrcForAlias(alias)); + final ASTNodeOrigin viewOrigin = new ASTNodeOrigin("VIEW", tab.getName(), + tab.getViewExpandedText(), alias, qb.getParseInfo().getSrcForAlias( + alias)); try { String viewText = tab.getViewExpandedText(); // Reparse text, passing null for context to avoid clobbering @@ -813,19 +824,17 @@ ASTNode tree = pd.parse(viewText, null); tree = ParseUtils.findRootNonNullToken(tree); viewTree = tree; - Dispatcher nodeOriginDispatcher = new Dispatcher() - { - public Object dispatch( - Node nd, java.util.Stack stack, Object... nodeOutputs) - { - ((ASTNode) nd).setOrigin(viewOrigin); - return null; - } - }; - GraphWalker nodeOriginTagger = - new DefaultGraphWalker(nodeOriginDispatcher); - nodeOriginTagger.startWalking( - java.util.Collections.singleton(viewTree), null); + Dispatcher nodeOriginDispatcher = new Dispatcher() { + public Object dispatch(Node nd, java.util.Stack stack, + Object... nodeOutputs) { + ((ASTNode) nd).setOrigin(viewOrigin); + return null; + } + }; + GraphWalker nodeOriginTagger = new DefaultGraphWalker( + nodeOriginDispatcher); + nodeOriginTagger.startWalking(java.util.Collections + . singleton(viewTree), null); } catch (ParseException e) { // A user could encounter this if a stored view definition contains // an old SQL construct which has been eliminated in a later Hive @@ -843,43 +852,51 @@ } private boolean isPresent(String[] list, String elem) { - for (String s : list) - if (s.equals(elem)) + for (String s : list) { + if (s.equals(elem)) { return true; + } + } return false; } @SuppressWarnings("nls") - private void parseJoinCondPopulateAlias(QBJoinTree joinTree, - ASTNode condn, Vector leftAliases, Vector rightAliases, - ArrayList fields) - throws SemanticException { + private void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn, + Vector leftAliases, Vector rightAliases, + ArrayList fields) throws SemanticException { // String[] allAliases = joinTree.getAllAliases(); switch (condn.getToken().getType()) { case HiveParser.TOK_TABLE_OR_COL: - String tableOrCol = unescapeIdentifier(condn.getChild(0).getText().toLowerCase()); - unparseTranslator.addIdentifierTranslation( - (ASTNode) condn.getChild(0)); + String tableOrCol = unescapeIdentifier(condn.getChild(0).getText() + .toLowerCase()); + unparseTranslator.addIdentifierTranslation((ASTNode) condn.getChild(0)); if (isPresent(joinTree.getLeftAliases(), tableOrCol)) { - if (!leftAliases.contains(tableOrCol)) + if (!leftAliases.contains(tableOrCol)) { leftAliases.add(tableOrCol); + } } else if (isPresent(joinTree.getRightAliases(), tableOrCol)) { - if (!rightAliases.contains(tableOrCol)) + if (!rightAliases.contains(tableOrCol)) { rightAliases.add(tableOrCol); + } } else { - // We don't support columns without table prefix in JOIN condition right now. - // We need to pass Metadata here to know which table the column belongs to. - throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(condn.getChild(0))); + // We don't support columns without table prefix in JOIN condition right + // now. + // We need to pass Metadata here to know which table the column belongs + // to. + throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(condn + .getChild(0))); } break; case HiveParser.Identifier: - // it may be a field name, return the identifier and let the caller decide whether it is or not - if ( fields != null ) { - fields.add(unescapeIdentifier(condn.getToken().getText().toLowerCase())); + // it may be a field name, return the identifier and let the caller decide + // whether it is or not + if (fields != null) { + fields + .add(unescapeIdentifier(condn.getToken().getText().toLowerCase())); } - unparseTranslator.addIdentifierTranslation((ASTNode) condn); + unparseTranslator.addIdentifierTranslation(condn); break; case HiveParser.Number: case HiveParser.StringLiteral: @@ -890,22 +907,24 @@ case HiveParser.TOK_FUNCTION: // check all the arguments - for (int i = 1; i < condn.getChildCount(); i++) + for (int i = 1; i < condn.getChildCount(); i++) { parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(i), leftAliases, rightAliases, null); + } break; default: // This is an operator - so check whether it is unary or binary operator - if (condn.getChildCount() == 1) + if (condn.getChildCount() == 1) { parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0), leftAliases, rightAliases, null); - else if (condn.getChildCount() == 2) { + } else if (condn.getChildCount() == 2) { ArrayList fields1 = null; - // if it is a dot operator, remember the field name of the rhs of the left semijoin - if (joinTree.getNoSemiJoin() == false && - condn.getToken().getType() == HiveParser.DOT) { + // if it is a dot operator, remember the field name of the rhs of the + // left semijoin + if (joinTree.getNoSemiJoin() == false + && condn.getToken().getType() == HiveParser.DOT) { // get the semijoin rhs table name and field name fields1 = new ArrayList(); int rhssize = rightAliases.size(); @@ -913,13 +932,13 @@ leftAliases, rightAliases, null); String rhsAlias = null; - if ( rightAliases.size() > rhssize ) { // the new table is rhs table - rhsAlias = rightAliases.get(rightAliases.size()-1); + if (rightAliases.size() > rhssize) { // the new table is rhs table + rhsAlias = rightAliases.get(rightAliases.size() - 1); } parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1), leftAliases, rightAliases, fields1); - if ( rhsAlias != null && fields1.size() > 0 ) { + if (rhsAlias != null && fields1.size() > 0) { joinTree.addRHSSemijoinColumns(rhsAlias, condn); } } else { @@ -928,9 +947,10 @@ parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1), leftAliases, rightAliases, fields1); } - } else + } else { throw new SemanticException(condn.toStringTree() + " encountered with " + condn.getChildCount() + " children"); + } break; } } @@ -938,86 +958,107 @@ private void populateAliases(Vector leftAliases, Vector rightAliases, ASTNode condn, QBJoinTree joinTree, Vector leftSrc) throws SemanticException { - if ((leftAliases.size() != 0) && (rightAliases.size() != 0)) - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(condn)); + if ((leftAliases.size() != 0) && (rightAliases.size() != 0)) { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1 + .getMsg(condn)); + } if (rightAliases.size() != 0) { assert rightAliases.size() == 1; joinTree.getExpressions().get(1).add(condn); } else if (leftAliases.size() != 0) { joinTree.getExpressions().get(0).add(condn); - for (String s : leftAliases) - if (!leftSrc.contains(s)) + for (String s : leftAliases) { + if (!leftSrc.contains(s)) { leftSrc.add(s); - } else - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_2.getMsg(condn)); + } + } + } else { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_2 + .getMsg(condn)); + } } /** - * Parse the join condition. - * If the condition is a join condition, throw an error if it is not an equality. Otherwise, break it into left and - * right expressions and store in the join tree. - * If the condition is a join filter, add it to the filter list of join tree. The join condition can contains conditions - * on both the left and tree trees and filters on either. Currently, we only support equi-joins, so we throw an error - * if the condition involves both subtrees and is not a equality. Also, we only support AND i.e ORs are not supported - * currently as their semantics are not very clear, may lead to data explosion and there is no usecase. - * @param joinTree jointree to be populated - * @param joinCond join condition - * @param leftSrc left sources + * Parse the join condition. If the condition is a join condition, throw an + * error if it is not an equality. Otherwise, break it into left and right + * expressions and store in the join tree. If the condition is a join filter, + * add it to the filter list of join tree. The join condition can contains + * conditions on both the left and tree trees and filters on either. + * Currently, we only support equi-joins, so we throw an error if the + * condition involves both subtrees and is not a equality. Also, we only + * support AND i.e ORs are not supported currently as their semantics are not + * very clear, may lead to data explosion and there is no usecase. + * + * @param joinTree + * jointree to be populated + * @param joinCond + * join condition + * @param leftSrc + * left sources * @throws SemanticException */ - private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, Vector leftSrc) - throws SemanticException { - if (joinCond == null) + private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, + Vector leftSrc) throws SemanticException { + if (joinCond == null) { return; + } switch (joinCond.getToken().getType()) { case HiveParser.KW_OR: - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3.getMsg(joinCond)); + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3 + .getMsg(joinCond)); case HiveParser.KW_AND: - parseJoinCondition(joinTree, (ASTNode) joinCond - .getChild(0), leftSrc); - parseJoinCondition(joinTree, (ASTNode) joinCond - .getChild(1), leftSrc); + parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(0), leftSrc); + parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(1), leftSrc); break; case HiveParser.EQUAL: ASTNode leftCondn = (ASTNode) joinCond.getChild(0); Vector leftCondAl1 = new Vector(); Vector leftCondAl2 = new Vector(); - parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2, null); + parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2, + null); ASTNode rightCondn = (ASTNode) joinCond.getChild(1); Vector rightCondAl1 = new Vector(); Vector rightCondAl2 = new Vector(); - parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1, rightCondAl2, null); + parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1, + rightCondAl2, null); // is it a filter or a join condition - if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0)) || - ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(joinCond)); + if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0)) + || ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1 + .getMsg(joinCond)); + } if (leftCondAl1.size() != 0) { - if ((rightCondAl1.size() != 0) || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) + if ((rightCondAl1.size() != 0) + || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) { joinTree.getFilters().get(0).add(joinCond); - else if (rightCondAl2.size() != 0) { - populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree, leftSrc); - populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree, leftSrc); + } else if (rightCondAl2.size() != 0) { + populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree, + leftSrc); + populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree, + leftSrc); } - } - else if (leftCondAl2.size() != 0) { - if ((rightCondAl2.size() != 0) || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) + } else if (leftCondAl2.size() != 0) { + if ((rightCondAl2.size() != 0) + || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) { joinTree.getFilters().get(1).add(joinCond); - else if (rightCondAl1.size() != 0) { - populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree, leftSrc); - populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree, leftSrc); + } else if (rightCondAl1.size() != 0) { + populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree, + leftSrc); + populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree, + leftSrc); } - } - else if (rightCondAl1.size() != 0) + } else if (rightCondAl1.size() != 0) { joinTree.getFilters().get(0).add(joinCond); - else + } else { joinTree.getFilters().get(1).add(joinCond); + } break; @@ -1026,17 +1067,22 @@ // Create all children int childrenBegin = (isFunction ? 1 : 0); - ArrayList> leftAlias = new ArrayList>(joinCond.getChildCount() - childrenBegin); - ArrayList> rightAlias = new ArrayList>(joinCond.getChildCount() - childrenBegin); + ArrayList> leftAlias = new ArrayList>( + joinCond.getChildCount() - childrenBegin); + ArrayList> rightAlias = new ArrayList>( + joinCond.getChildCount() - childrenBegin); for (int ci = 0; ci < joinCond.getChildCount() - childrenBegin; ci++) { - Vector left = new Vector(); + Vector left = new Vector(); Vector right = new Vector(); leftAlias.add(left); rightAlias.add(right); } - for (int ci=childrenBegin; ci left : leftAlias) { @@ -1054,69 +1100,80 @@ } } - if (!leftAliasNull && !rightAliasNull) - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(joinCond)); + if (!leftAliasNull && !rightAliasNull) { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1 + .getMsg(joinCond)); + } - if (!leftAliasNull) + if (!leftAliasNull) { joinTree.getFilters().get(0).add(joinCond); - else + } else { joinTree.getFilters().get(1).add(joinCond); + } break; } } @SuppressWarnings("nls") - public Operator putOpInsertMap(Operator op, RowResolver rr) - { + public Operator putOpInsertMap(Operator op, + RowResolver rr) { OpParseContext ctx = new OpParseContext(rr); opParseCtx.put(op, ctx); return op; } @SuppressWarnings("nls") - private Operator genFilterPlan(String dest, QB qb, - Operator input) throws SemanticException { + private Operator genFilterPlan(String dest, QB qb, Operator input) + throws SemanticException { ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); - return genFilterPlan(qb, (ASTNode)whereExpr.getChild(0), input); + return genFilterPlan(qb, (ASTNode) whereExpr.getChild(0), input); } /** * create a filter plan. The condition and the inputs are specified. - * @param qb current query block - * @param condn The condition to be resolved - * @param input the input operator + * + * @param qb + * current query block + * @param condn + * The condition to be resolved + * @param input + * the input operator */ @SuppressWarnings("nls") - private Operator genFilterPlan(QB qb, ASTNode condn, Operator input) throws SemanticException { + private Operator genFilterPlan(QB qb, ASTNode condn, Operator input) + throws SemanticException { OpParseContext inputCtx = opParseCtx.get(input); RowResolver inputRR = inputCtx.getRR(); - Operator output = putOpInsertMap( - OperatorFactory.getAndMakeChild( - new filterDesc(genExprNodeDesc(condn, inputRR), false), - new RowSchema(inputRR.getColumnInfos()), input), inputRR); + Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( + new filterDesc(genExprNodeDesc(condn, inputRR), false), new RowSchema( + inputRR.getColumnInfos()), input), inputRR); - LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: " + inputRR.toString()); + LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: " + + inputRR.toString()); return output; } @SuppressWarnings("nls") - private Integer genColListRegex(String colRegex, String tabAlias, String alias, ASTNode sel, - ArrayList col_list, RowResolver input, Integer pos, - RowResolver output) throws SemanticException { + private Integer genColListRegex(String colRegex, String tabAlias, + String alias, ASTNode sel, ArrayList col_list, + RowResolver input, Integer pos, RowResolver output) + throws SemanticException { // The table alias should exist - if (tabAlias != null && !input.hasTableAlias(tabAlias)) + if (tabAlias != null && !input.hasTableAlias(tabAlias)) { throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(sel)); + } // TODO: Have to put in the support for AS clause Pattern regex = null; try { regex = Pattern.compile(colRegex, Pattern.CASE_INSENSITIVE); } catch (PatternSyntaxException e) { - throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel, e.getMessage())); + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel, e + .getMessage())); } StringBuilder replacementText = new StringBuilder(); @@ -1124,9 +1181,9 @@ // This is the tab.* case // In this case add all the columns to the fieldList // from the input schema - for(ColumnInfo colInfo: input.getColumnInfos()) { + for (ColumnInfo colInfo : input.getColumnInfos()) { String name = colInfo.getInternalName(); - String [] tmp = input.reverseLookup(name); + String[] tmp = input.reverseLookup(name); // Skip the colinfos which are not for this particular alias if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) { @@ -1139,14 +1196,13 @@ } exprNodeColumnDesc expr = new exprNodeColumnDesc(colInfo.getType(), name, - colInfo.getTabAlias(), - colInfo.getIsPartitionCol()); + colInfo.getTabAlias(), colInfo.getIsPartitionCol()); col_list.add(expr); output.put(tmp[0], tmp[1], - new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), - colInfo.getTabAlias(), colInfo.getIsPartitionCol())); + new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), colInfo + .getTabAlias(), colInfo.getIsPartitionCol())); pos = Integer.valueOf(pos.intValue() + 1); - matched ++; + matched++; if (unparseTranslator.isEnabled()) { if (replacementText.length() > 0) { @@ -1171,27 +1227,28 @@ return HiveConf.getColumnInternalName(pos); } - /** * If the user script command needs any modifications - do it here */ private String getFixedCmd(String cmd) { SessionState ss = SessionState.get(); - if(ss == null) + if (ss == null) { return cmd; + } // for local mode - replace any references to packaged files by name with // the reference to the original file path - if(ss.getConf().get("mapred.job.tracker", "local").equals("local")) { - Set files = ss.list_resource(SessionState.ResourceType.FILE, null); - if((files != null) && !files.isEmpty()) { + if (ss.getConf().get("mapred.job.tracker", "local").equals("local")) { + Set files = ss + .list_resource(SessionState.ResourceType.FILE, null); + if ((files != null) && !files.isEmpty()) { int end = cmd.indexOf(" "); String prog = (end == -1) ? cmd : cmd.substring(0, end); - String args = (end == -1) ? "" : cmd.substring(end, cmd.length()); + String args = (end == -1) ? "" : cmd.substring(end, cmd.length()); - for(String oneFile: files) { + for (String oneFile : files) { Path p = new Path(oneFile); - if(p.getName().equals(prog)) { + if (p.getName().equals(prog)) { cmd = oneFile + args; break; } @@ -1202,60 +1259,72 @@ return cmd; } - private tableDesc getTableDescFromSerDe(ASTNode child, String cols, String colTypes, boolean defaultCols) throws SemanticException { + private tableDesc getTableDescFromSerDe(ASTNode child, String cols, + String colTypes, boolean defaultCols) throws SemanticException { if (child.getType() == HiveParser.TOK_SERDENAME) { String serdeName = unescapeSQLString(child.getChild(0).getText()); Class serdeClass = null; try { - serdeClass = (Class)Class.forName(serdeName, true, JavaUtils.getClassLoader()); + serdeClass = (Class) Class.forName(serdeName, + true, JavaUtils.getClassLoader()); } catch (ClassNotFoundException e) { throw new SemanticException(e); } - tableDesc tblDesc = PlanUtils.getTableDesc(serdeClass, Integer.toString(Utilities.tabCode), cols, colTypes, defaultCols, true); + tableDesc tblDesc = PlanUtils.getTableDesc(serdeClass, Integer + .toString(Utilities.tabCode), cols, colTypes, defaultCols, true); // copy all the properties if (child.getChildCount() == 2) { - ASTNode prop = (ASTNode)((ASTNode)child.getChild(1)).getChild(0); + ASTNode prop = (ASTNode) ((ASTNode) child.getChild(1)).getChild(0); for (int propChild = 0; propChild < prop.getChildCount(); propChild++) { - String key = unescapeSQLString(prop.getChild(propChild).getChild(0).getText()); - String value = unescapeSQLString(prop.getChild(propChild).getChild(1).getText()); - tblDesc.getProperties().setProperty(key,value); + String key = unescapeSQLString(prop.getChild(propChild).getChild(0) + .getText()); + String value = unescapeSQLString(prop.getChild(propChild).getChild(1) + .getText()); + tblDesc.getProperties().setProperty(key, value); } } return tblDesc; - } - else if (child.getType() == HiveParser.TOK_SERDEPROPS) { - tableDesc tblDesc = PlanUtils.getDefaultTableDesc(Integer.toString(Utilities.ctrlaCode), cols, colTypes, defaultCols); + } else if (child.getType() == HiveParser.TOK_SERDEPROPS) { + tableDesc tblDesc = PlanUtils.getDefaultTableDesc(Integer + .toString(Utilities.ctrlaCode), cols, colTypes, defaultCols); int numChildRowFormat = child.getChildCount(); - for (int numC = 0; numC < numChildRowFormat; numC++) - { - ASTNode rowChild = (ASTNode)child.getChild(numC); + for (int numC = 0; numC < numChildRowFormat; numC++) { + ASTNode rowChild = (ASTNode) child.getChild(numC); switch (rowChild.getToken().getType()) { case HiveParser.TOK_TABLEROWFORMATFIELD: String fieldDelim = unescapeSQLString(rowChild.getChild(0).getText()); - tblDesc.getProperties().setProperty(Constants.FIELD_DELIM, fieldDelim); - tblDesc.getProperties().setProperty(Constants.SERIALIZATION_FORMAT, fieldDelim); + tblDesc.getProperties() + .setProperty(Constants.FIELD_DELIM, fieldDelim); + tblDesc.getProperties().setProperty(Constants.SERIALIZATION_FORMAT, + fieldDelim); - if (rowChild.getChildCount()>=2) { - String fieldEscape = unescapeSQLString(rowChild.getChild(1).getText()); - tblDesc.getProperties().setProperty(Constants.ESCAPE_CHAR, fieldEscape); + if (rowChild.getChildCount() >= 2) { + String fieldEscape = unescapeSQLString(rowChild.getChild(1) + .getText()); + tblDesc.getProperties().setProperty(Constants.ESCAPE_CHAR, + fieldEscape); } break; case HiveParser.TOK_TABLEROWFORMATCOLLITEMS: - tblDesc.getProperties().setProperty(Constants.COLLECTION_DELIM, unescapeSQLString(rowChild.getChild(0).getText())); + tblDesc.getProperties().setProperty(Constants.COLLECTION_DELIM, + unescapeSQLString(rowChild.getChild(0).getText())); break; case HiveParser.TOK_TABLEROWFORMATMAPKEYS: - tblDesc.getProperties().setProperty(Constants.MAPKEY_DELIM, unescapeSQLString(rowChild.getChild(0).getText())); + tblDesc.getProperties().setProperty(Constants.MAPKEY_DELIM, + unescapeSQLString(rowChild.getChild(0).getText())); break; case HiveParser.TOK_TABLEROWFORMATLINES: String lineDelim = unescapeSQLString(rowChild.getChild(0).getText()); tblDesc.getProperties().setProperty(Constants.LINE_DELIM, lineDelim); if (!lineDelim.equals("\n") && !lineDelim.equals("10")) { - throw new SemanticException(ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg()); + throw new SemanticException( + ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg()); } break; - default: assert false; + default: + assert false; } } @@ -1266,68 +1335,77 @@ return null; } - private void failIfColAliasExists(Set nameSet, String name) throws SemanticException { - if(nameSet.contains(name)) - throw new SemanticException(ErrorMsg.COLUMN_ALIAS_ALREADY_EXISTS.getMsg(name)); + private void failIfColAliasExists(Set nameSet, String name) + throws SemanticException { + if (nameSet.contains(name)) { + throw new SemanticException(ErrorMsg.COLUMN_ALIAS_ALREADY_EXISTS + .getMsg(name)); + } nameSet.add(name); } @SuppressWarnings("nls") - private Operator genScriptPlan(ASTNode trfm, QB qb, - Operator input) throws SemanticException { + private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) + throws SemanticException { // If there is no "AS" clause, the output schema will be "key,value" ArrayList outputCols = new ArrayList(); - int inputSerDeNum = 1, inputRecordWriterNum = 2; - int outputSerDeNum = 4, outputRecordReaderNum = 5; - int outputColsNum = 6; + int inputSerDeNum = 1, inputRecordWriterNum = 2; + int outputSerDeNum = 4, outputRecordReaderNum = 5; + int outputColsNum = 6; boolean outputColNames = false, outputColSchemas = false; - int execPos = 3; + int execPos = 3; boolean defaultOutputCols = false; // Go over all the children if (trfm.getChildCount() > outputColsNum) { - ASTNode outCols = (ASTNode)trfm.getChild(outputColsNum); - if (outCols.getType() == HiveParser.TOK_ALIASLIST) + ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum); + if (outCols.getType() == HiveParser.TOK_ALIASLIST) { outputColNames = true; - else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) + } else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) { outputColSchemas = true; + } } // If column type is not specified, use a string if (!outputColNames && !outputColSchemas) { String intName = getColumnInternalName(0); - ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false); + ColumnInfo colInfo = new ColumnInfo(intName, + TypeInfoFactory.stringTypeInfo, null, false); colInfo.setAlias("key"); outputCols.add(colInfo); intName = getColumnInternalName(1); - colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false); + colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, + false); colInfo.setAlias("value"); outputCols.add(colInfo); defaultOutputCols = true; - } - else { + } else { ASTNode collist = (ASTNode) trfm.getChild(outputColsNum); int ccount = collist.getChildCount(); Set colAliasNamesDuplicateCheck = new HashSet(); if (outputColNames) { - for (int i=0; i < ccount; ++i) { - String colAlias = unescapeIdentifier(((ASTNode)collist.getChild(i)).getText()); + for (int i = 0; i < ccount; ++i) { + String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i)) + .getText()); failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias); String intName = getColumnInternalName(i); - ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false); + ColumnInfo colInfo = new ColumnInfo(intName, + TypeInfoFactory.stringTypeInfo, null, false); colInfo.setAlias(colAlias); outputCols.add(colInfo); } - } - else { - for (int i=0; i < ccount; ++i) { + } else { + for (int i = 0; i < ccount; ++i) { ASTNode child = (ASTNode) collist.getChild(i); assert child.getType() == HiveParser.TOK_TABCOL; - String colAlias = unescapeIdentifier(((ASTNode)child.getChild(0)).getText()); + String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0)) + .getText()); failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias); String intName = getColumnInternalName(i); - ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils.getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode)child.getChild(1))), null, false); + ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils + .getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child + .getChild(1))), null, false); colInfo.setAlias(colAlias); outputCols.add(colInfo); } @@ -1347,15 +1425,14 @@ columns.append(outputCols.get(i).getInternalName()); columnTypes.append(outputCols.get(i).getType().getTypeName()); - out_rwsch.put( - qb.getParseInfo().getAlias(), - outputCols.get(i).getAlias(), - outputCols.get(i)); + out_rwsch.put(qb.getParseInfo().getAlias(), outputCols.get(i).getAlias(), + outputCols.get(i)); } StringBuilder inpColumns = new StringBuilder(); StringBuilder inpColumnTypes = new StringBuilder(); - Vector inputSchema = opParseCtx.get(input).getRR().getColumnInfos(); + Vector inputSchema = opParseCtx.get(input).getRR() + .getColumnInfos(); for (int i = 0; i < inputSchema.size(); ++i) { if (i != 0) { inpColumns.append(","); @@ -1368,80 +1445,102 @@ tableDesc outInfo; tableDesc inInfo; - String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE); + String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE); Class serde; try { - serde = (Class)Class.forName(defaultSerdeName, true, JavaUtils.getClassLoader()); + serde = (Class) Class.forName(defaultSerdeName, + true, JavaUtils.getClassLoader()); } catch (ClassNotFoundException e) { throw new SemanticException(e); } // Input and Output Serdes - if (trfm.getChild(inputSerDeNum).getChildCount() > 0) - inInfo = getTableDescFromSerDe((ASTNode)(((ASTNode)trfm.getChild(inputSerDeNum))).getChild(0), inpColumns.toString(), inpColumnTypes.toString(), false); - else - inInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), inpColumns.toString(), inpColumnTypes.toString(), false, true); + if (trfm.getChild(inputSerDeNum).getChildCount() > 0) { + inInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm + .getChild(inputSerDeNum))).getChild(0), inpColumns.toString(), + inpColumnTypes.toString(), false); + } else { + inInfo = PlanUtils.getTableDesc(serde, Integer + .toString(Utilities.tabCode), inpColumns.toString(), inpColumnTypes + .toString(), false, true); + } - if (trfm.getChild(outputSerDeNum).getChildCount() > 0) - outInfo = getTableDescFromSerDe((ASTNode)(((ASTNode)trfm.getChild(outputSerDeNum))).getChild(0), columns.toString(), columnTypes.toString(), false); - // This is for backward compatibility. If the user did not specify the output column list, we assume that there are 2 columns: key and value. - // However, if the script outputs: col1, col2, col3 seperated by TAB, the requirement is: key is col and value is (col2 TAB col3) - else - outInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), columns.toString(), columnTypes.toString(), defaultOutputCols); + if (trfm.getChild(outputSerDeNum).getChildCount() > 0) { + outInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm + .getChild(outputSerDeNum))).getChild(0), columns.toString(), + columnTypes.toString(), false); + // This is for backward compatibility. If the user did not specify the + // output column list, we assume that there are 2 columns: key and value. + // However, if the script outputs: col1, col2, col3 seperated by TAB, the + // requirement is: key is col and value is (col2 TAB col3) + } else { + outInfo = PlanUtils.getTableDesc(serde, Integer + .toString(Utilities.tabCode), columns.toString(), columnTypes + .toString(), defaultOutputCols); + } // Output record readers - Class outRecordReader = getRecordReader((ASTNode)trfm.getChild(outputRecordReaderNum)); - Class inRecordWriter = getRecordWriter((ASTNode)trfm.getChild(inputRecordWriterNum)); + Class outRecordReader = getRecordReader((ASTNode) trfm + .getChild(outputRecordReaderNum)); + Class inRecordWriter = getRecordWriter((ASTNode) trfm + .getChild(inputRecordWriterNum)); - Operator output = putOpInsertMap(OperatorFactory - .getAndMakeChild( - new scriptDesc(getFixedCmd(stripQuotes(trfm.getChild(execPos).getText())), - inInfo, inRecordWriter, outInfo, outRecordReader), - new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch); + Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( + new scriptDesc( + getFixedCmd(stripQuotes(trfm.getChild(execPos).getText())), inInfo, + inRecordWriter, outInfo, outRecordReader), new RowSchema(out_rwsch + .getColumnInfos()), input), out_rwsch); return output; } - private Class getRecordReader(ASTNode node) throws SemanticException { + private Class getRecordReader(ASTNode node) + throws SemanticException { String name; - if (node.getChildCount() == 0) + if (node.getChildCount() == 0) { name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER); - else + } else { name = unescapeSQLString(node.getChild(0).getText()); + } try { - return (Class)Class.forName(name, true, JavaUtils.getClassLoader()); + return (Class) Class.forName(name, true, + JavaUtils.getClassLoader()); } catch (ClassNotFoundException e) { throw new SemanticException(e); } } - private Class getRecordWriter(ASTNode node) throws SemanticException { + private Class getRecordWriter(ASTNode node) + throws SemanticException { String name; - if (node.getChildCount() == 0) + if (node.getChildCount() == 0) { name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDWRITER); - else + } else { name = unescapeSQLString(node.getChild(0).getText()); + } try { - return (Class)Class.forName(name, true, JavaUtils.getClassLoader()); + return (Class) Class.forName(name, true, + JavaUtils.getClassLoader()); } catch (ClassNotFoundException e) { throw new SemanticException(e); } } /** - * This function is a wrapper of parseInfo.getGroupByForClause which automatically - * translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY a,b,c. + * This function is a wrapper of parseInfo.getGroupByForClause which + * automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY + * a,b,c. */ static List getGroupByForClause(QBParseInfo parseInfo, String dest) { if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) { ASTNode selectExprs = parseInfo.getSelForClause(dest); - List result = new ArrayList(selectExprs == null - ? 0 : selectExprs.getChildCount()); + List result = new ArrayList(selectExprs == null ? 0 + : selectExprs.getChildCount()); if (selectExprs != null) { for (int i = 0; i < selectExprs.getChildCount(); ++i) { // table.column AS alias @@ -1452,8 +1551,8 @@ return result; } else { ASTNode grpByExprs = parseInfo.getGroupByForClause(dest); - List result = new ArrayList(grpByExprs == null - ? 0 : grpByExprs.getChildCount()); + List result = new ArrayList(grpByExprs == null ? 0 + : grpByExprs.getChildCount()); if (grpByExprs != null) { for (int i = 0; i < grpByExprs.getChildCount(); ++i) { ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i); @@ -1464,14 +1563,15 @@ } } - private static String[] getColAlias(ASTNode selExpr, String defaultName, RowResolver inputRR) { + private static String[] getColAlias(ASTNode selExpr, String defaultName, + RowResolver inputRR) { String colAlias = null; String tabAlias = null; String[] colRef = new String[2]; if (selExpr.getChildCount() == 2) { // return zz for "xx + yy AS zz" - colAlias = unescapeIdentifier(selExpr.getChild(1).getText()); + colAlias = unescapeIdentifier(selExpr.getChild(1).getText()); colRef[0] = tabAlias; colRef[1] = colAlias; return colRef; @@ -1501,7 +1601,7 @@ } } - if(colAlias == null) { + if (colAlias == null) { // Return defaultName if selExpr is not a simple xx.yy.zz colAlias = defaultName; } @@ -1512,11 +1612,11 @@ } /** - * Returns whether the pattern is a regex expression (instead of a normal string). - * Normal string is a string with all alphabets/digits and "_". + * Returns whether the pattern is a regex expression (instead of a normal + * string). Normal string is a string with all alphabets/digits and "_". */ private static boolean isRegex(String pattern) { - for(int i=0; i genSelectPlan(String dest, QB qb, - Operator input) throws SemanticException { + private Operator genSelectPlan(String dest, QB qb, Operator input) + throws SemanticException { ASTNode selExprList = qb.getParseInfo().getSelForClause(dest); Operator op = genSelectPlan(selExprList, qb, input); LOG.debug("Created Select Plan for clause: " + dest); return op; } + @SuppressWarnings("nls") private Operator genSelectPlan(ASTNode selExprList, QB qb, - Operator input) throws SemanticException { + Operator input) throws SemanticException { LOG.debug("tree: " + selExprList.toStringTree()); ArrayList col_list = new ArrayList(); @@ -1552,8 +1653,7 @@ posn++; } - boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() - == HiveParser.TOK_TRANSFORM); + boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM); if (isInTransform) { trfm = (ASTNode) selExprList.getChild(posn).getChild(0); } @@ -1569,17 +1669,16 @@ GenericUDTF genericUDTF = null; if (udtfExpr.getType() == HiveParser.TOK_FUNCTION) { - String funcName = - TypeCheckProcFactory.DefaultExprProcessor.getFunctionText( - udtfExpr, true); + String funcName = TypeCheckProcFactory.DefaultExprProcessor + .getFunctionText(udtfExpr, true); FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName); if (fi != null) { genericUDTF = fi.getGenericUDTF(); } isUDTF = (genericUDTF != null); if (isUDTF && !fi.isNative()) { - unparseTranslator.addIdentifierTranslation( - (ASTNode) udtfExpr.getChild(0)); + unparseTranslator.addIdentifierTranslation((ASTNode) udtfExpr + .getChild(0)); } } @@ -1595,7 +1694,7 @@ } // Get the column / table aliases from the expression. Start from 1 as // 0 is the TOK_FUNCTION - for (int i=1; i 2) { throw new SemanticException(ErrorMsg.INVALID_AS.getMsg()); @@ -1660,20 +1759,19 @@ tabAlias = colRef[0]; colAlias = colRef[1]; if (hasAsClause) { - unparseTranslator.addIdentifierTranslation( - (ASTNode) child.getChild(1)); + unparseTranslator.addIdentifierTranslation((ASTNode) child + .getChild(1)); } // Get rid of TOK_SELEXPR - expr = (ASTNode)child.getChild(0); + expr = (ASTNode) child.getChild(0); } if (expr.getType() == HiveParser.TOK_ALLCOLREF) { - pos = genColListRegex(".*", - expr.getChildCount() == 0 ? null : unescapeIdentifier(expr.getChild(0).getText().toLowerCase()), + pos = genColListRegex(".*", expr.getChildCount() == 0 ? null + : unescapeIdentifier(expr.getChild(0).getText().toLowerCase()), alias, expr, col_list, inputRR, pos, out_rwsch); selectStar = true; - } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL - && !hasAsClause + } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause && !inputRR.getIsExprResolver() && isRegex(unescapeIdentifier(expr.getChild(0).getText()))) { // In case the expression is a regex COL. @@ -1683,27 +1781,27 @@ null, alias, expr, col_list, inputRR, pos, out_rwsch); } else if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL - && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase())) - && !hasAsClause + && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0) + .getChild(0).getText().toLowerCase())) && !hasAsClause && !inputRR.getIsExprResolver() && isRegex(unescapeIdentifier(expr.getChild(1).getText()))) { // In case the expression is TABLE.COL (col can be regex). // This can only happen without AS clause // We don't allow this for ExprResolver - the Group By case pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()), - unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), - alias, expr, col_list, inputRR, pos, out_rwsch); + unescapeIdentifier(expr.getChild(0).getChild(0).getText() + .toLowerCase()), alias, expr, col_list, inputRR, pos, out_rwsch); } else { // Case when this is an expression exprNodeDesc exp = genExprNodeDesc(expr, inputRR); col_list.add(exp); - if (!StringUtils.isEmpty(alias) && - (out_rwsch.get(null, colAlias) != null)) { - throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(expr.getChild(1))); + if (!StringUtils.isEmpty(alias) + && (out_rwsch.get(null, colAlias) != null)) { + throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(expr + .getChild(1))); } - out_rwsch.put(tabAlias, colAlias, - new ColumnInfo(getColumnInternalName(pos), - exp.getTypeInfo(), tabAlias, false)); + out_rwsch.put(tabAlias, colAlias, new ColumnInfo( + getColumnInternalName(pos), exp.getTypeInfo(), tabAlias, false)); pos = Integer.valueOf(pos.intValue() + 1); } @@ -1712,10 +1810,11 @@ ArrayList columnNames = new ArrayList(); Map colExprMap = new HashMap(); - for (int i=0; i getTypeInfo(ArrayList exprs) { ArrayList result = new ArrayList(); - for(exprNodeDesc expr: exprs) { + for (exprNodeDesc expr : exprs) { result.add(expr.getTypeInfo()); } return result; @@ -1764,7 +1863,7 @@ */ static ObjectInspector[] getStandardObjectInspector(ArrayList exprs) { ObjectInspector[] result = new ObjectInspector[exprs.size()]; - for (int i=0; i aggParameters, - ASTNode aggTree) throws SemanticException { + ArrayList aggParameters, ASTNode aggTree) + throws SemanticException { ArrayList originalParameterTypeInfos = getTypeInfo(aggParameters); GenericUDAFEvaluator result = FunctionRegistry.getGenericUDAFEvaluator( aggName, originalParameterTypeInfos); if (null == result) { - String reason = "Looking for UDAF Evaluator\"" + aggName + "\" with parameters " - + originalParameterTypeInfos; - throw new SemanticException(ErrorMsg.INVALID_FUNCTION_SIGNATURE. - getMsg((ASTNode)aggTree.getChild(0), reason)); + String reason = "Looking for UDAF Evaluator\"" + aggName + + "\" with parameters " + originalParameterTypeInfos; + throw new SemanticException(ErrorMsg.INVALID_FUNCTION_SIGNATURE.getMsg( + (ASTNode) aggTree.getChild(0), reason)); } return result; } /** * Returns the GenericUDAFInfo struct for the aggregation. - * @param aggName The name of the UDAF. - * @param aggParameters The exprNodeDesc of the original parameters - * @param aggTree The ASTNode node of the UDAF in the query. + * + * @param aggName + * The name of the UDAF. + * @param aggParameters + * The exprNodeDesc of the original parameters + * @param aggTree + * The ASTNode node of the UDAF in the query. * @return GenericUDAFInfo - * @throws SemanticException when the UDAF is not found or has problems. + * @throws SemanticException + * when the UDAF is not found or has problems. */ static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, GenericUDAFEvaluator.Mode emode, ArrayList aggParameters) @@ -1810,8 +1914,7 @@ // set r.returnType ObjectInspector returnOI = null; try { - ObjectInspector[] aggObjectInspectors = - getStandardObjectInspector(getTypeInfo(aggParameters)); + ObjectInspector[] aggObjectInspectors = getStandardObjectInspector(getTypeInfo(aggParameters)); returnOI = r.genericUDAFEvaluator.init(emode, aggObjectInspectors); r.returnType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI); } catch (HiveException e) { @@ -1824,35 +1927,49 @@ return r; } - private static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode(groupByDesc.Mode mode, boolean isDistinct) { + private static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode( + groupByDesc.Mode mode, boolean isDistinct) { switch (mode) { - case COMPLETE: return GenericUDAFEvaluator.Mode.COMPLETE; - case PARTIAL1: return GenericUDAFEvaluator.Mode.PARTIAL1; - case PARTIAL2: return GenericUDAFEvaluator.Mode.PARTIAL2; - case PARTIALS: return isDistinct ? GenericUDAFEvaluator.Mode.PARTIAL1 : GenericUDAFEvaluator.Mode.PARTIAL2; - case FINAL: return GenericUDAFEvaluator.Mode.FINAL; - case HASH: return GenericUDAFEvaluator.Mode.PARTIAL1; - case MERGEPARTIAL: return isDistinct ? GenericUDAFEvaluator.Mode.COMPLETE : GenericUDAFEvaluator.Mode.FINAL; - default: - throw new RuntimeException("internal error in groupByDescModeToUDAFMode"); + case COMPLETE: + return GenericUDAFEvaluator.Mode.COMPLETE; + case PARTIAL1: + return GenericUDAFEvaluator.Mode.PARTIAL1; + case PARTIAL2: + return GenericUDAFEvaluator.Mode.PARTIAL2; + case PARTIALS: + return isDistinct ? GenericUDAFEvaluator.Mode.PARTIAL1 + : GenericUDAFEvaluator.Mode.PARTIAL2; + case FINAL: + return GenericUDAFEvaluator.Mode.FINAL; + case HASH: + return GenericUDAFEvaluator.Mode.PARTIAL1; + case MERGEPARTIAL: + return isDistinct ? GenericUDAFEvaluator.Mode.COMPLETE + : GenericUDAFEvaluator.Mode.FINAL; + default: + throw new RuntimeException("internal error in groupByDescModeToUDAFMode"); } } + /** * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)). * The new GroupByOperator will be a child of the reduceSinkOperatorInfo. - * - * @param mode The mode of the aggregation (PARTIAL1 or COMPLETE) - * @param genericUDAFEvaluators If not null, this function will store the mapping - * from Aggregation StringTree to the genericUDAFEvaluator in this parameter, - * so it can be used in the next-stage GroupBy aggregations. + * + * @param mode + * The mode of the aggregation (PARTIAL1 or COMPLETE) + * @param genericUDAFEvaluators + * If not null, this function will store the mapping from Aggregation + * StringTree to the genericUDAFEvaluator in this parameter, so it + * can be used in the next-stage GroupBy aggregations. * @return the new GroupByOperator */ @SuppressWarnings("nls") - private Operator genGroupByPlanGroupByOperator( - QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo, - groupByDesc.Mode mode, Map genericUDAFEvaluators) - throws SemanticException { - RowResolver groupByInputRowResolver = opParseCtx.get(reduceSinkOperatorInfo).getRR(); + private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo, + String dest, Operator reduceSinkOperatorInfo, groupByDesc.Mode mode, + Map genericUDAFEvaluators) + throws SemanticException { + RowResolver groupByInputRowResolver = opParseCtx + .get(reduceSinkOperatorInfo).getRR(); RowResolver groupByOutputRowResolver = new RowResolver(); groupByOutputRowResolver.setIsExprResolver(true); ArrayList groupByKeys = new ArrayList(); @@ -1863,18 +1980,18 @@ for (int i = 0; i < grpByExprs.size(); ++i) { ASTNode grpbyExpr = grpByExprs.get(i); String text = grpbyExpr.toStringTree(); - ColumnInfo exprInfo = groupByInputRowResolver.get("",text); + ColumnInfo exprInfo = groupByInputRowResolver.get("", text); if (exprInfo == null) { throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr)); } - groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), - exprInfo.getInternalName(), "", false)); + groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), exprInfo + .getInternalName(), "", false)); String field = getColumnInternalName(i); outputColumnNames.add(field); - groupByOutputRowResolver.put("",grpbyExpr.toStringTree(), - new ColumnInfo(field, exprInfo.getType(), null, false)); + groupByOutputRowResolver.put("", grpbyExpr.toStringTree(), + new ColumnInfo(field, exprInfo.getType(), null, false)); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } // For each aggregation @@ -1892,44 +2009,45 @@ // 0 is the function name for (int i = 1; i < value.getChildCount(); i++) { String text = value.getChild(i).toStringTree(); - ASTNode paraExpr = (ASTNode)value.getChild(i); - ColumnInfo paraExprInfo = groupByInputRowResolver.get("",text); + ASTNode paraExpr = (ASTNode) value.getChild(i); + ColumnInfo paraExprInfo = groupByInputRowResolver.get("", text); if (paraExprInfo == null) { throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr)); } String paraExpression = paraExprInfo.getInternalName(); - assert(paraExpression != null); + assert (paraExpression != null); aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), - paraExprInfo.getInternalName(), - paraExprInfo.getTabAlias(), - paraExprInfo.getIsPartitionCol())); + paraExprInfo.getInternalName(), paraExprInfo.getTabAlias(), + paraExprInfo.getIsPartitionCol())); } boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); - GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value); - assert(genericUDAFEvaluator != null); - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); - aggregations.add(new aggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, - isDistinct, amode)); - String field = getColumnInternalName(groupByKeys.size() + aggregations.size() -1); + GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator( + aggName, aggParameters, value); + assert (genericUDAFEvaluator != null); + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, + aggParameters); + aggregations.add(new aggregationDesc(aggName.toLowerCase(), + udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, + amode)); + String field = getColumnInternalName(groupByKeys.size() + + aggregations.size() - 1); outputColumnNames.add(field); - groupByOutputRowResolver.put("",value.toStringTree(), - new ColumnInfo(field, - udaf.returnType, "", false)); - // Save the evaluator so that it can be used by the next-stage GroupByOperators + groupByOutputRowResolver.put("", value.toStringTree(), new ColumnInfo( + field, udaf.returnType, "", false)); + // Save the evaluator so that it can be used by the next-stage + // GroupByOperators if (genericUDAFEvaluators != null) { genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator); } } - Operator op = - putOpInsertMap(OperatorFactory.getAndMakeChild(new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false), - new RowSchema(groupByOutputRowResolver.getColumnInfos()), - reduceSinkOperatorInfo), - groupByOutputRowResolver - ); + Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( + new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, + false), new RowSchema(groupByOutputRowResolver.getColumnInfos()), + reduceSinkOperatorInfo), groupByOutputRowResolver); op.setColumnExprMap(colExprMap); return op; } @@ -1937,20 +2055,24 @@ /** * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)). * The new GroupByOperator will be a child of the reduceSinkOperatorInfo. - * - * @param mode The mode of the aggregation (MERGEPARTIAL, PARTIAL2) - * @param genericUDAFEvaluators The mapping from Aggregation StringTree to the - * genericUDAFEvaluator. - * @param distPartAggr partial aggregation for distincts + * + * @param mode + * The mode of the aggregation (MERGEPARTIAL, PARTIAL2) + * @param genericUDAFEvaluators + * The mapping from Aggregation StringTree to the + * genericUDAFEvaluator. + * @param distPartAggr + * partial aggregation for distincts * @return the new GroupByOperator */ @SuppressWarnings("nls") - private Operator genGroupByPlanGroupByOperator1( - QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo, - groupByDesc.Mode mode, Map genericUDAFEvaluators, boolean distPartAgg) - throws SemanticException { + private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, + String dest, Operator reduceSinkOperatorInfo, groupByDesc.Mode mode, + Map genericUDAFEvaluators, + boolean distPartAgg) throws SemanticException { ArrayList outputColumnNames = new ArrayList(); - RowResolver groupByInputRowResolver = opParseCtx.get(reduceSinkOperatorInfo).getRR(); + RowResolver groupByInputRowResolver = opParseCtx + .get(reduceSinkOperatorInfo).getRR(); RowResolver groupByOutputRowResolver = new RowResolver(); groupByOutputRowResolver.setIsExprResolver(true); ArrayList groupByKeys = new ArrayList(); @@ -1960,20 +2082,19 @@ for (int i = 0; i < grpByExprs.size(); ++i) { ASTNode grpbyExpr = grpByExprs.get(i); String text = grpbyExpr.toStringTree(); - ColumnInfo exprInfo = groupByInputRowResolver.get("",text); + ColumnInfo exprInfo = groupByInputRowResolver.get("", text); if (exprInfo == null) { throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr)); } - groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), - exprInfo.getInternalName(), - exprInfo.getTabAlias(), - exprInfo.getIsPartitionCol())); + groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), exprInfo + .getInternalName(), exprInfo.getTabAlias(), exprInfo + .getIsPartitionCol())); String field = getColumnInternalName(i); outputColumnNames.add(field); - groupByOutputRowResolver.put("",grpbyExpr.toStringTree(), - new ColumnInfo(field, exprInfo.getType(), "", false)); + groupByOutputRowResolver.put("", grpbyExpr.toStringTree(), + new ColumnInfo(field, exprInfo.getType(), "", false)); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } @@ -1984,91 +2105,102 @@ String aggName = value.getChild(0).getText(); ArrayList aggParameters = new ArrayList(); - // If the function is distinct, partial aggregartion has not been done on the client side. - // If distPartAgg is set, the client is letting us know that partial aggregation has not been done. + // If the function is distinct, partial aggregartion has not been done on + // the client side. + // If distPartAgg is set, the client is letting us know that partial + // aggregation has not been done. // For eg: select a, count(b+c), count(distinct d+e) group by a - // For count(b+c), if partial aggregation has been performed, then we directly look for count(b+c). + // For count(b+c), if partial aggregation has been performed, then we + // directly look for count(b+c). // Otherwise, we look for b+c. - // For distincts, partial aggregation is never performed on the client side, so always look for the parameters: d+e + // For distincts, partial aggregation is never performed on the client + // side, so always look for the parameters: d+e boolean partialAggDone = !(distPartAgg || (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI)); if (!partialAggDone) { // 0 is the function name for (int i = 1; i < value.getChildCount(); i++) { String text = value.getChild(i).toStringTree(); - ASTNode paraExpr = (ASTNode)value.getChild(i); - ColumnInfo paraExprInfo = groupByInputRowResolver.get("",text); + ASTNode paraExpr = (ASTNode) value.getChild(i); + ColumnInfo paraExprInfo = groupByInputRowResolver.get("", text); if (paraExprInfo == null) { - throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr)); + throw new SemanticException(ErrorMsg.INVALID_COLUMN + .getMsg(paraExpr)); } String paraExpression = paraExprInfo.getInternalName(); - assert(paraExpression != null); + assert (paraExpression != null); aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), - paraExprInfo.getInternalName(), - paraExprInfo.getTabAlias(), - paraExprInfo.getIsPartitionCol())); + paraExprInfo.getInternalName(), paraExprInfo.getTabAlias(), + paraExprInfo.getIsPartitionCol())); } - } - else { + } else { String text = entry.getKey(); - ColumnInfo paraExprInfo = groupByInputRowResolver.get("",text); + ColumnInfo paraExprInfo = groupByInputRowResolver.get("", text); if (paraExprInfo == null) { throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value)); } String paraExpression = paraExprInfo.getInternalName(); - assert(paraExpression != null); - aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), paraExpression, - paraExprInfo.getTabAlias(), - paraExprInfo.getIsPartitionCol())); + assert (paraExpression != null); + aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), + paraExpression, paraExprInfo.getTabAlias(), paraExprInfo + .getIsPartitionCol())); } boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI); Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); GenericUDAFEvaluator genericUDAFEvaluator = null; // For distincts, partial aggregations have not been done if (distPartAgg) { - genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value); - assert(genericUDAFEvaluator != null); + genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, + value); + assert (genericUDAFEvaluator != null); genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator); - } - else { + } else { genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey()); - assert(genericUDAFEvaluator != null); + assert (genericUDAFEvaluator != null); } - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); - aggregations.add(new aggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, + aggParameters); + aggregations.add(new aggregationDesc(aggName.toLowerCase(), + udaf.genericUDAFEvaluator, udaf.convertedParameters, (mode != groupByDesc.Mode.FINAL && isDistinct), amode)); - String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1); + String field = getColumnInternalName(groupByKeys.size() + + aggregations.size() - 1); outputColumnNames.add(field); - groupByOutputRowResolver.put("", value.toStringTree(), - new ColumnInfo(field, - udaf.returnType, "", false)); + groupByOutputRowResolver.put("", value.toStringTree(), new ColumnInfo( + field, udaf.returnType, "", false)); } - Operator op = putOpInsertMap( - OperatorFactory.getAndMakeChild(new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, distPartAgg), - new RowSchema(groupByOutputRowResolver.getColumnInfos()), - reduceSinkOperatorInfo), + Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( + new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, + distPartAgg), new RowSchema(groupByOutputRowResolver + .getColumnInfos()), reduceSinkOperatorInfo), groupByOutputRowResolver); op.setColumnExprMap(colExprMap); return op; } /** - * Generate the map-side GroupByOperator for the Query Block (qb.getParseInfo().getXXX(dest)). - * The new GroupByOperator will be a child of the inputOperatorInfo. - * - * @param mode The mode of the aggregation (HASH) - * @param genericUDAFEvaluators If not null, this function will store the mapping - * from Aggregation StringTree to the genericUDAFEvaluator in this parameter, - * so it can be used in the next-stage GroupBy aggregations. + * Generate the map-side GroupByOperator for the Query Block + * (qb.getParseInfo().getXXX(dest)). The new GroupByOperator will be a child + * of the inputOperatorInfo. + * + * @param mode + * The mode of the aggregation (HASH) + * @param genericUDAFEvaluators + * If not null, this function will store the mapping from Aggregation + * StringTree to the genericUDAFEvaluator in this parameter, so it + * can be used in the next-stage GroupBy aggregations. * @return the new GroupByOperator */ @SuppressWarnings("nls") - private Operator genGroupByPlanMapGroupByOperator(QB qb, String dest, Operator inputOperatorInfo, - groupByDesc.Mode mode, Map genericUDAFEvaluators) throws SemanticException { + private Operator genGroupByPlanMapGroupByOperator(QB qb, String dest, + Operator inputOperatorInfo, groupByDesc.Mode mode, + Map genericUDAFEvaluators) + throws SemanticException { - RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRR(); + RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo) + .getRR(); QBParseInfo parseInfo = qb.getParseInfo(); RowResolver groupByOutputRowResolver = new RowResolver(); groupByOutputRowResolver.setIsExprResolver(true); @@ -2079,31 +2211,34 @@ List grpByExprs = getGroupByForClause(parseInfo, dest); for (int i = 0; i < grpByExprs.size(); ++i) { ASTNode grpbyExpr = grpByExprs.get(i); - exprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, groupByInputRowResolver); + exprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, + groupByInputRowResolver); groupByKeys.add(grpByExprNode); String field = getColumnInternalName(i); outputColumnNames.add(field); - groupByOutputRowResolver.put("",grpbyExpr.toStringTree(), - new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false)); + groupByOutputRowResolver.put("", grpbyExpr.toStringTree(), + new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false)); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } // If there is a distinctFuncExp, add all parameters to the reduceKeys. if (parseInfo.getDistinctFuncExprForClause(dest) != null) { ASTNode value = parseInfo.getDistinctFuncExprForClause(dest); - int numDistn=0; + int numDistn = 0; // 0 is function name for (int i = 1; i < value.getChildCount(); i++) { ASTNode parameter = (ASTNode) value.getChild(i); String text = parameter.toStringTree(); - if (groupByOutputRowResolver.get("",text) == null) { - exprNodeDesc distExprNode = genExprNodeDesc(parameter, groupByInputRowResolver); + if (groupByOutputRowResolver.get("", text) == null) { + exprNodeDesc distExprNode = genExprNodeDesc(parameter, + groupByInputRowResolver); groupByKeys.add(distExprNode); numDistn++; - String field = getColumnInternalName(grpByExprs.size() + numDistn -1); + String field = getColumnInternalName(grpByExprs.size() + numDistn - 1); outputColumnNames.add(field); - groupByOutputRowResolver.put("", text, new ColumnInfo(field, distExprNode.getTypeInfo(), "", false)); + groupByOutputRowResolver.put("", text, new ColumnInfo(field, + distExprNode.getTypeInfo(), "", false)); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } } @@ -2118,11 +2253,12 @@ ASTNode value = entry.getValue(); String aggName = unescapeIdentifier(value.getChild(0).getText()); ArrayList aggParameters = new ArrayList(); - ArrayList> aggClasses = new ArrayList>(); + new ArrayList>(); // 0 is the function name for (int i = 1; i < value.getChildCount(); i++) { - ASTNode paraExpr = (ASTNode)value.getChild(i); - exprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver); + ASTNode paraExpr = (ASTNode) value.getChild(i); + exprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, + groupByInputRowResolver); aggParameters.add(paraExprNode); } @@ -2130,49 +2266,55 @@ boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); - GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value); - assert(genericUDAFEvaluator != null); - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); - aggregations.add(new aggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, - isDistinct, amode)); - String field = getColumnInternalName(groupByKeys.size() + aggregations.size() -1); + GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator( + aggName, aggParameters, value); + assert (genericUDAFEvaluator != null); + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, + aggParameters); + aggregations.add(new aggregationDesc(aggName.toLowerCase(), + udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, + amode)); + String field = getColumnInternalName(groupByKeys.size() + + aggregations.size() - 1); outputColumnNames.add(field); - groupByOutputRowResolver.put("",value.toStringTree(), - new ColumnInfo(field, - udaf.returnType, "", false)); - // Save the evaluator so that it can be used by the next-stage GroupByOperators + groupByOutputRowResolver.put("", value.toStringTree(), new ColumnInfo( + field, udaf.returnType, "", false)); + // Save the evaluator so that it can be used by the next-stage + // GroupByOperators if (genericUDAFEvaluators != null) { genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator); } } - Operator op = putOpInsertMap( - OperatorFactory.getAndMakeChild(new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false), - new RowSchema(groupByOutputRowResolver.getColumnInfos()), - inputOperatorInfo), - groupByOutputRowResolver); + Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( + new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, + false), new RowSchema(groupByOutputRowResolver.getColumnInfos()), + inputOperatorInfo), groupByOutputRowResolver); op.setColumnExprMap(colExprMap); return op; } - /** - * Generate the ReduceSinkOperator for the Group By Query Block (qb.getPartInfo().getXXX(dest)). - * The new ReduceSinkOperator will be a child of inputOperatorInfo. - * - * It will put all Group By keys and the distinct field (if any) in the map-reduce sort key, - * and all other fields in the map-reduce value. - * - * @param numPartitionFields the number of fields for map-reduce partitioning. - * This is usually the number of fields in the Group By keys. + * Generate the ReduceSinkOperator for the Group By Query Block + * (qb.getPartInfo().getXXX(dest)). The new ReduceSinkOperator will be a child + * of inputOperatorInfo. + * + * It will put all Group By keys and the distinct field (if any) in the + * map-reduce sort key, and all other fields in the map-reduce value. + * + * @param numPartitionFields + * the number of fields for map-reduce partitioning. This is usually + * the number of fields in the Group By keys. * @return the new ReduceSinkOperator. * @throws SemanticException */ @SuppressWarnings("nls") - private Operator genGroupByPlanReduceSinkOperator(QB qb, - String dest, Operator inputOperatorInfo, int numPartitionFields, int numReducers, boolean mapAggrDone) throws SemanticException { + private Operator genGroupByPlanReduceSinkOperator(QB qb, String dest, + Operator inputOperatorInfo, int numPartitionFields, int numReducers, + boolean mapAggrDone) throws SemanticException { - RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo).getRR(); + RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo) + .getRR(); QBParseInfo parseInfo = qb.getParseInfo(); RowResolver reduceSinkOutputRowResolver = new RowResolver(); reduceSinkOutputRowResolver.setIsExprResolver(true); @@ -2184,18 +2326,21 @@ List grpByExprs = getGroupByForClause(parseInfo, dest); for (int i = 0; i < grpByExprs.size(); ++i) { ASTNode grpbyExpr = grpByExprs.get(i); - exprNodeDesc inputExpr = genExprNodeDesc(grpbyExpr, reduceSinkInputRowResolver); + exprNodeDesc inputExpr = genExprNodeDesc(grpbyExpr, + reduceSinkInputRowResolver); reduceKeys.add(inputExpr); String text = grpbyExpr.toStringTree(); if (reduceSinkOutputRowResolver.get("", text) == null) { outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); - String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1); - ColumnInfo colInfo = new ColumnInfo(field, - reduceKeys.get(reduceKeys.size()-1).getTypeInfo(), null, false); + String field = Utilities.ReduceField.KEY.toString() + "." + + getColumnInternalName(reduceKeys.size() - 1); + ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get( + reduceKeys.size() - 1).getTypeInfo(), null, false); reduceSinkOutputRowResolver.put("", text, colInfo); colExprMap.put(colInfo.getInternalName(), inputExpr); } else { - throw new SemanticException(ErrorMsg.DUPLICATE_GROUPBY_KEY.getMsg(grpbyExpr)); + throw new SemanticException(ErrorMsg.DUPLICATE_GROUPBY_KEY + .getMsg(grpbyExpr)); } } @@ -2206,20 +2351,24 @@ for (int i = 1; i < value.getChildCount(); i++) { ASTNode parameter = (ASTNode) value.getChild(i); String text = parameter.toStringTree(); - if (reduceSinkOutputRowResolver.get("",text) == null) { - reduceKeys.add(genExprNodeDesc(parameter, reduceSinkInputRowResolver)); + if (reduceSinkOutputRowResolver.get("", text) == null) { + reduceKeys + .add(genExprNodeDesc(parameter, reduceSinkInputRowResolver)); outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); - String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1); - ColumnInfo colInfo = new ColumnInfo(field, - reduceKeys.get(reduceKeys.size()-1).getTypeInfo(), null, false); + String field = Utilities.ReduceField.KEY.toString() + "." + + getColumnInternalName(reduceKeys.size() - 1); + ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get( + reduceKeys.size() - 1).getTypeInfo(), null, false); reduceSinkOutputRowResolver.put("", text, colInfo); - colExprMap.put(colInfo.getInternalName(), reduceKeys.get(reduceKeys.size()-1)); + colExprMap.put(colInfo.getInternalName(), reduceKeys.get(reduceKeys + .size() - 1)); } } } ArrayList reduceValues = new ArrayList(); - HashMap aggregationTrees = parseInfo.getAggregationExprsForClause(dest); + HashMap aggregationTrees = parseInfo + .getAggregationExprsForClause(dest); if (!mapAggrDone) { // Put parameters to aggregations in reduceValues @@ -2229,66 +2378,69 @@ for (int i = 1; i < value.getChildCount(); i++) { ASTNode parameter = (ASTNode) value.getChild(i); String text = parameter.toStringTree(); - if (reduceSinkOutputRowResolver.get("",text) == null) { - reduceValues.add(genExprNodeDesc(parameter, reduceSinkInputRowResolver)); - outputColumnNames.add(getColumnInternalName(reduceValues.size() - 1)); - String field = Utilities.ReduceField.VALUE.toString() + "." + getColumnInternalName(reduceValues.size() - 1); - reduceSinkOutputRowResolver.put("", text, - new ColumnInfo(field, - reduceValues.get(reduceValues.size()-1).getTypeInfo(), - null, false)); + if (reduceSinkOutputRowResolver.get("", text) == null) { + reduceValues.add(genExprNodeDesc(parameter, + reduceSinkInputRowResolver)); + outputColumnNames + .add(getColumnInternalName(reduceValues.size() - 1)); + String field = Utilities.ReduceField.VALUE.toString() + "." + + getColumnInternalName(reduceValues.size() - 1); + reduceSinkOutputRowResolver.put("", text, new ColumnInfo(field, + reduceValues.get(reduceValues.size() - 1).getTypeInfo(), null, + false)); } } } - } - else - { + } else { // Put partial aggregation results in reduceValues int inputField = reduceKeys.size(); for (Map.Entry entry : aggregationTrees.entrySet()) { - TypeInfo type = reduceSinkInputRowResolver.getColumnInfos().get(inputField).getType(); - reduceValues.add(new exprNodeColumnDesc(type, getColumnInternalName(inputField), - "", false)); + TypeInfo type = reduceSinkInputRowResolver.getColumnInfos().get( + inputField).getType(); + reduceValues.add(new exprNodeColumnDesc(type, + getColumnInternalName(inputField), "", false)); inputField++; outputColumnNames.add(getColumnInternalName(reduceValues.size() - 1)); - String field = Utilities.ReduceField.VALUE.toString() + "." + getColumnInternalName(reduceValues.size() - 1); - reduceSinkOutputRowResolver.put("", ((ASTNode)entry.getValue()).toStringTree(), - new ColumnInfo(field, - type, null, false)); + String field = Utilities.ReduceField.VALUE.toString() + "." + + getColumnInternalName(reduceValues.size() - 1); + reduceSinkOutputRowResolver.put("", (entry.getValue()).toStringTree(), + new ColumnInfo(field, type, null, false)); } } - ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( - OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, numPartitionFields, - numReducers), - new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), - inputOperatorInfo), - reduceSinkOutputRowResolver - ); + ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( + OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, + reduceValues, outputColumnNames, true, -1, numPartitionFields, + numReducers), new RowSchema(reduceSinkOutputRowResolver + .getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver); rsOp.setColumnExprMap(colExprMap); return rsOp; } /** - * Generate the second ReduceSinkOperator for the Group By Plan (parseInfo.getXXX(dest)). - * The new ReduceSinkOperator will be a child of groupByOperatorInfo. - * - * The second ReduceSinkOperator will put the group by keys in the map-reduce sort - * key, and put the partial aggregation results in the map-reduce value. - * - * @param numPartitionFields the number of fields in the map-reduce partition key. - * This should always be the same as the number of Group By keys. We should be - * able to remove this parameter since in this phase there is no distinct any more. + * Generate the second ReduceSinkOperator for the Group By Plan + * (parseInfo.getXXX(dest)). The new ReduceSinkOperator will be a child of + * groupByOperatorInfo. + * + * The second ReduceSinkOperator will put the group by keys in the map-reduce + * sort key, and put the partial aggregation results in the map-reduce value. + * + * @param numPartitionFields + * the number of fields in the map-reduce partition key. This should + * always be the same as the number of Group By keys. We should be + * able to remove this parameter since in this phase there is no + * distinct any more. * @return the new ReduceSinkOperator. * @throws SemanticException */ @SuppressWarnings("nls") - private Operator genGroupByPlanReduceSinkOperator2MR( - QBParseInfo parseInfo, String dest, Operator groupByOperatorInfo, int numPartitionFields, int numReducers) - throws SemanticException { - RowResolver reduceSinkInputRowResolver2 = opParseCtx.get(groupByOperatorInfo).getRR(); + private Operator genGroupByPlanReduceSinkOperator2MR(QBParseInfo parseInfo, + String dest, Operator groupByOperatorInfo, int numPartitionFields, + int numReducers) throws SemanticException { + RowResolver reduceSinkInputRowResolver2 = opParseCtx.get( + groupByOperatorInfo).getRR(); RowResolver reduceSinkOutputRowResolver2 = new RowResolver(); reduceSinkOutputRowResolver2.setIsExprResolver(true); Map colExprMap = new HashMap(); @@ -2300,13 +2452,14 @@ ASTNode grpbyExpr = grpByExprs.get(i); String field = getColumnInternalName(i); outputColumnNames.add(field); - TypeInfo typeInfo = reduceSinkInputRowResolver2.get("", grpbyExpr.toStringTree()).getType(); - exprNodeColumnDesc inputExpr = new exprNodeColumnDesc(typeInfo, field, "", false); + TypeInfo typeInfo = reduceSinkInputRowResolver2.get("", + grpbyExpr.toStringTree()).getType(); + exprNodeColumnDesc inputExpr = new exprNodeColumnDesc(typeInfo, field, + "", false); reduceKeys.add(inputExpr); - ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + field, - typeInfo, "", false); - reduceSinkOutputRowResolver2.put("", grpbyExpr.toStringTree(), - colInfo); + ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + + "." + field, typeInfo, "", false); + reduceSinkOutputRowResolver2.put("", grpbyExpr.toStringTree(), colInfo); colExprMap.put(colInfo.getInternalName(), inputExpr); } // Get partial aggregation results and store in reduceValues @@ -2317,45 +2470,48 @@ for (Map.Entry entry : aggregationTrees.entrySet()) { String field = getColumnInternalName(inputField); ASTNode t = entry.getValue(); - TypeInfo typeInfo = reduceSinkInputRowResolver2.get("", t.toStringTree()).getType(); + TypeInfo typeInfo = reduceSinkInputRowResolver2.get("", t.toStringTree()) + .getType(); reduceValues.add(new exprNodeColumnDesc(typeInfo, field, "", false)); inputField++; - String col = getColumnInternalName(reduceValues.size()-1); + String col = getColumnInternalName(reduceValues.size() - 1); outputColumnNames.add(col); - reduceSinkOutputRowResolver2.put("", t.toStringTree(), - new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + col, - typeInfo, "", false)); + reduceSinkOutputRowResolver2.put("", t.toStringTree(), new ColumnInfo( + Utilities.ReduceField.VALUE.toString() + "." + col, typeInfo, "", + false)); } ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( - OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, - numPartitionFields, numReducers), - new RowSchema(reduceSinkOutputRowResolver2.getColumnInfos()), - groupByOperatorInfo), - reduceSinkOutputRowResolver2 - ); + OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, + reduceValues, outputColumnNames, true, -1, numPartitionFields, + numReducers), new RowSchema(reduceSinkOutputRowResolver2 + .getColumnInfos()), groupByOperatorInfo), + reduceSinkOutputRowResolver2); rsOp.setColumnExprMap(colExprMap); return rsOp; } /** - * Generate the second GroupByOperator for the Group By Plan (parseInfo.getXXX(dest)). - * The new GroupByOperator will do the second aggregation based on the partial aggregation - * results. - * - * @param mode the mode of aggregation (FINAL) - * @param genericUDAFEvaluators The mapping from Aggregation StringTree to the - * genericUDAFEvaluator. + * Generate the second GroupByOperator for the Group By Plan + * (parseInfo.getXXX(dest)). The new GroupByOperator will do the second + * aggregation based on the partial aggregation results. + * + * @param mode + * the mode of aggregation (FINAL) + * @param genericUDAFEvaluators + * The mapping from Aggregation StringTree to the + * genericUDAFEvaluator. * @return the new GroupByOperator * @throws SemanticException */ @SuppressWarnings("nls") - private Operator genGroupByPlanGroupByOperator2MR( - QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo2, - groupByDesc.Mode mode, Map genericUDAFEvaluators) - throws SemanticException { - RowResolver groupByInputRowResolver2 = opParseCtx.get(reduceSinkOperatorInfo2).getRR(); + private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, + String dest, Operator reduceSinkOperatorInfo2, groupByDesc.Mode mode, + Map genericUDAFEvaluators) + throws SemanticException { + RowResolver groupByInputRowResolver2 = opParseCtx.get( + reduceSinkOperatorInfo2).getRR(); RowResolver groupByOutputRowResolver2 = new RowResolver(); groupByOutputRowResolver2.setIsExprResolver(true); ArrayList groupByKeys = new ArrayList(); @@ -2366,19 +2522,18 @@ for (int i = 0; i < grpByExprs.size(); ++i) { ASTNode grpbyExpr = grpByExprs.get(i); String text = grpbyExpr.toStringTree(); - ColumnInfo exprInfo = groupByInputRowResolver2.get("",text); + ColumnInfo exprInfo = groupByInputRowResolver2.get("", text); if (exprInfo == null) { throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr)); } String expression = exprInfo.getInternalName(); groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), expression, - exprInfo.getTabAlias(), - exprInfo.getIsPartitionCol())); + exprInfo.getTabAlias(), exprInfo.getIsPartitionCol())); String field = getColumnInternalName(i); outputColumnNames.add(field); - groupByOutputRowResolver2.put("",grpbyExpr.toStringTree(), - new ColumnInfo(field, exprInfo.getType(), "", false)); + groupByOutputRowResolver2.put("", grpbyExpr.toStringTree(), + new ColumnInfo(field, exprInfo.getType(), "", false)); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } HashMap aggregationTrees = parseInfo @@ -2387,39 +2542,43 @@ ArrayList aggParameters = new ArrayList(); ASTNode value = entry.getValue(); String text = entry.getKey(); - ColumnInfo paraExprInfo = groupByInputRowResolver2.get("",text); + ColumnInfo paraExprInfo = groupByInputRowResolver2.get("", text); if (paraExprInfo == null) { throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value)); } String paraExpression = paraExprInfo.getInternalName(); - assert(paraExpression != null); - aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), paraExpression, - paraExprInfo.getTabAlias(), - paraExprInfo.getIsPartitionCol())); + assert (paraExpression != null); + aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), + paraExpression, paraExprInfo.getTabAlias(), paraExprInfo + .getIsPartitionCol())); String aggName = value.getChild(0).getText(); boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); - GenericUDAFEvaluator genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey()); - assert(genericUDAFEvaluator != null); - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); - aggregations.add(new aggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, - (mode != groupByDesc.Mode.FINAL && value.getToken().getType() == HiveParser.TOK_FUNCTIONDI), - amode)); - String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1); + GenericUDAFEvaluator genericUDAFEvaluator = genericUDAFEvaluators + .get(entry.getKey()); + assert (genericUDAFEvaluator != null); + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, + aggParameters); + aggregations + .add(new aggregationDesc( + aggName.toLowerCase(), + udaf.genericUDAFEvaluator, + udaf.convertedParameters, + (mode != groupByDesc.Mode.FINAL && value.getToken().getType() == HiveParser.TOK_FUNCTIONDI), + amode)); + String field = getColumnInternalName(groupByKeys.size() + + aggregations.size() - 1); outputColumnNames.add(field); - groupByOutputRowResolver2.put("", value.toStringTree(), - new ColumnInfo(field, - udaf.returnType, "", false)); + groupByOutputRowResolver2.put("", value.toStringTree(), new ColumnInfo( + field, udaf.returnType, "", false)); } - Operator op = putOpInsertMap( - OperatorFactory.getAndMakeChild(new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false), - new RowSchema(groupByOutputRowResolver2.getColumnInfos()), - reduceSinkOperatorInfo2), - groupByOutputRowResolver2 - ); + Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( + new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, + false), new RowSchema(groupByOutputRowResolver2.getColumnInfos()), + reduceSinkOperatorInfo2), groupByOutputRowResolver2); op.setColumnExprMap(colExprMap); return op; } @@ -2427,46 +2586,43 @@ /** * Generate a Group-By plan using a single map-reduce job (3 operators will be * inserted): - * + * * ReduceSink ( keys = (K1_EXP, K2_EXP, DISTINCT_EXP), values = (A1_EXP, * A2_EXP) ) SortGroupBy (keys = (KEY.0,KEY.1), aggregations = * (count_distinct(KEY.2), sum(VALUE.0), count(VALUE.1))) Select (final * selects) - * + * * @param dest * @param qb * @param input * @return * @throws SemanticException - * - * Generate a Group-By plan using 1 map-reduce job. - * Spray by the group by key, and sort by the distinct key (if any), and - * compute aggregates * - * The agggregation evaluation functions are as follows: - * Partitioning Key: - * grouping key - * - * Sorting Key: - * grouping key if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Reducer: iterate/merge - * (mode = COMPLETE) + * + * Generate a Group-By plan using 1 map-reduce job. Spray by the + * group by key, and sort by the distinct key (if any), and compute + * aggregates * The agggregation evaluation functions are as + * follows: Partitioning Key: grouping key + * + * Sorting Key: grouping key if no DISTINCT grouping + distinct key + * if DISTINCT + * + * Reducer: iterate/merge (mode = COMPLETE) **/ - @SuppressWarnings({ "unused", "nls" }) - private Operator genGroupByPlan1MR(String dest, QB qb, - Operator input) throws SemanticException { + @SuppressWarnings( { "unused", "nls" }) + private Operator genGroupByPlan1MR(String dest, QB qb, Operator input) + throws SemanticException { QBParseInfo parseInfo = qb.getParseInfo(); int numReducers = -1; List grpByExprs = getGroupByForClause(parseInfo, dest); - if (grpByExprs.isEmpty()) + if (grpByExprs.isEmpty()) { numReducers = 1; + } // ////// 1. Generate ReduceSinkOperator - Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator( - qb, dest, input, grpByExprs.size(), numReducers, false); + Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator(qb, + dest, input, grpByExprs.size(), numReducers, false); // ////// 2. Generate GroupbyOperator Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo, @@ -2475,9 +2631,10 @@ return groupByOperatorInfo; } - static ArrayList getUDAFEvaluators(ArrayList aggs) { + static ArrayList getUDAFEvaluators( + ArrayList aggs) { ArrayList result = new ArrayList(); - for (int i=0; i genericUDAFEvaluators = - new LinkedHashMap(); + Map genericUDAFEvaluators = new LinkedHashMap(); QBParseInfo parseInfo = qb.getParseInfo(); // ////// 2. Generate GroupbyOperator - Operator groupByOperatorInfo = - genGroupByPlanGroupByOperator1(parseInfo, dest, input, groupByDesc.Mode.HASH, genericUDAFEvaluators, true); + Operator groupByOperatorInfo = genGroupByPlanGroupByOperator1(parseInfo, + dest, input, groupByDesc.Mode.HASH, genericUDAFEvaluators, true); int numReducers = -1; List grpByExprs = getGroupByForClause(parseInfo, dest); // ////// 3. Generate ReduceSinkOperator2 Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR( - parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers); + parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers); // ////// 4. Generate GroupbyOperator2 - Operator groupByOperatorInfo2 = - genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo2, groupByDesc.Mode.FINAL, genericUDAFEvaluators); + Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator2MR(parseInfo, + dest, reduceSinkOperatorInfo2, groupByDesc.Mode.FINAL, + genericUDAFEvaluators); return groupByOperatorInfo2; } @@ -2548,282 +2699,274 @@ /** * Generate a Group-By plan using a 2 map-reduce jobs (5 operators will be * inserted): - * + * * ReduceSink ( keys = (K1_EXP, K2_EXP, DISTINCT_EXP), values = (A1_EXP, * A2_EXP) ) NOTE: If DISTINCT_EXP is null, partition by rand() SortGroupBy * (keys = (KEY.0,KEY.1), aggregations = (count_distinct(KEY.2), sum(VALUE.0), * count(VALUE.1))) ReduceSink ( keys = (0,1), values=(2,3,4)) SortGroupBy * (keys = (KEY.0,KEY.1), aggregations = (sum(VALUE.0), sum(VALUE.1), * sum(VALUE.2))) Select (final selects) - * + * * @param dest * @param qb * @param input * @return * @throws SemanticException - * - * Generate a Group-By plan using a 2 map-reduce jobs. - * Spray by the grouping key and distinct key (or a random number, if no distinct is - * present) in hope of getting a uniform distribution, and compute partial aggregates - * grouped by the reduction key (grouping key + distinct key). - * Evaluate partial aggregates first, and spray by the grouping key to compute actual - * aggregates in the second phase. - * The agggregation evaluation functions are as follows: - * Partitioning Key: - * random() if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Sorting Key: - * grouping key if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Reducer: iterate/terminatePartial - * (mode = PARTIAL1) - * - * STAGE 2 - * - * Partitioning Key: - * grouping key - * - * Sorting Key: - * grouping key if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Reducer: merge/terminate - * (mode = FINAL) + * + * Generate a Group-By plan using a 2 map-reduce jobs. Spray by the + * grouping key and distinct key (or a random number, if no distinct + * is present) in hope of getting a uniform distribution, and + * compute partial aggregates grouped by the reduction key (grouping + * key + distinct key). Evaluate partial aggregates first, and spray + * by the grouping key to compute actual aggregates in the second + * phase. The agggregation evaluation functions are as follows: + * Partitioning Key: random() if no DISTINCT grouping + distinct key + * if DISTINCT + * + * Sorting Key: grouping key if no DISTINCT grouping + distinct key + * if DISTINCT + * + * Reducer: iterate/terminatePartial (mode = PARTIAL1) + * + * STAGE 2 + * + * Partitioning Key: grouping key + * + * Sorting Key: grouping key if no DISTINCT grouping + distinct key + * if DISTINCT + * + * Reducer: merge/terminate (mode = FINAL) */ @SuppressWarnings("nls") - private Operator genGroupByPlan2MR(String dest, QB qb, - Operator input) throws SemanticException { + private Operator genGroupByPlan2MR(String dest, QB qb, Operator input) + throws SemanticException { QBParseInfo parseInfo = qb.getParseInfo(); // ////// 1. Generate ReduceSinkOperator - // There is a special case when we want the rows to be randomly distributed to - // reducers for load balancing problem. That happens when there is no DISTINCT - // operator. We set the numPartitionColumns to -1 for this purpose. This is + // There is a special case when we want the rows to be randomly distributed + // to + // reducers for load balancing problem. That happens when there is no + // DISTINCT + // operator. We set the numPartitionColumns to -1 for this purpose. This is // captured by WritableComparableHiveObject.hashCode() function. - Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator( - qb, dest, input, (parseInfo.getDistinctFuncExprForClause(dest) == null ? -1 - : Integer.MAX_VALUE), -1, false); + Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator(qb, + dest, input, (parseInfo.getDistinctFuncExprForClause(dest) == null ? -1 + : Integer.MAX_VALUE), -1, false); // ////// 2. Generate GroupbyOperator - Map genericUDAFEvaluators = - new LinkedHashMap(); - GroupByOperator groupByOperatorInfo = (GroupByOperator)genGroupByPlanGroupByOperator(parseInfo, - dest, reduceSinkOperatorInfo, groupByDesc.Mode.PARTIAL1, genericUDAFEvaluators); + Map genericUDAFEvaluators = new LinkedHashMap(); + GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanGroupByOperator( + parseInfo, dest, reduceSinkOperatorInfo, groupByDesc.Mode.PARTIAL1, + genericUDAFEvaluators); int numReducers = -1; List grpByExprs = getGroupByForClause(parseInfo, dest); - if (grpByExprs.isEmpty()) + if (grpByExprs.isEmpty()) { numReducers = 1; + } // ////// 3. Generate ReduceSinkOperator2 Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR( - parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers); + parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers); // ////// 4. Generate GroupbyOperator2 - Operator groupByOperatorInfo2 = - genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo2, - groupByDesc.Mode.FINAL, genericUDAFEvaluators); + Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator2MR(parseInfo, + dest, reduceSinkOperatorInfo2, groupByDesc.Mode.FINAL, + genericUDAFEvaluators); return groupByOperatorInfo2; } private boolean optimizeMapAggrGroupBy(String dest, QB qb) { List grpByExprs = getGroupByForClause(qb.getParseInfo(), dest); - if ((grpByExprs != null) && !grpByExprs.isEmpty()) + if ((grpByExprs != null) && !grpByExprs.isEmpty()) { return false; + } - if (qb.getParseInfo().getDistinctFuncExprForClause(dest) != null) + if (qb.getParseInfo().getDistinctFuncExprForClause(dest) != null) { return false; + } return true; } /** - * Generate a Group-By plan using 1 map-reduce job. - * First perform a map-side partial aggregation (to reduce the amount of data), at this - * point of time, we may turn off map-side partial aggregation based on its performance. - * Then spray by the group by key, and sort by the distinct key (if any), and + * Generate a Group-By plan using 1 map-reduce job. First perform a map-side + * partial aggregation (to reduce the amount of data), at this point of time, + * we may turn off map-side partial aggregation based on its performance. Then + * spray by the group by key, and sort by the distinct key (if any), and * compute aggregates based on actual aggregates - * - * The agggregation evaluation functions are as follows: - * Mapper: iterate/terminatePartial - * (mode = HASH) - * - * Partitioning Key: - * grouping key - * - * Sorting Key: - * grouping key if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Reducer: iterate/terminate if DISTINCT - * merge/terminate if NO DISTINCT - * (mode = MERGEPARTIAL) + * + * The agggregation evaluation functions are as follows: Mapper: + * iterate/terminatePartial (mode = HASH) + * + * Partitioning Key: grouping key + * + * Sorting Key: grouping key if no DISTINCT grouping + distinct key if + * DISTINCT + * + * Reducer: iterate/terminate if DISTINCT merge/terminate if NO DISTINCT (mode + * = MERGEPARTIAL) */ @SuppressWarnings("nls") private Operator genGroupByPlanMapAggr1MR(String dest, QB qb, - Operator inputOperatorInfo) throws SemanticException { + Operator inputOperatorInfo) throws SemanticException { QBParseInfo parseInfo = qb.getParseInfo(); // ////// Generate GroupbyOperator for a map-side partial aggregation - Map genericUDAFEvaluators = - new LinkedHashMap(); - GroupByOperator groupByOperatorInfo = (GroupByOperator)genGroupByPlanMapGroupByOperator(qb, - dest, inputOperatorInfo, groupByDesc.Mode.HASH, genericUDAFEvaluators); + Map genericUDAFEvaluators = new LinkedHashMap(); + GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanMapGroupByOperator( + qb, dest, inputOperatorInfo, groupByDesc.Mode.HASH, + genericUDAFEvaluators); - this.groupOpToInputTables.put(groupByOperatorInfo, this.opParseCtx.get( + groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get( inputOperatorInfo).getRR().getTableNames()); int numReducers = -1; - // Optimize the scenario when there are no grouping keys - only 1 reducer is needed + // Optimize the scenario when there are no grouping keys - only 1 reducer is + // needed List grpByExprs = getGroupByForClause(parseInfo, dest); - if (grpByExprs.isEmpty()) + if (grpByExprs.isEmpty()) { numReducers = 1; + } // ////// Generate ReduceSink Operator - Operator reduceSinkOperatorInfo = - genGroupByPlanReduceSinkOperator(qb, dest, groupByOperatorInfo, - grpByExprs.size(), numReducers, true); + Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator(qb, + dest, groupByOperatorInfo, grpByExprs.size(), numReducers, true); - // This is a 1-stage map-reduce processing of the groupby. Tha map-side aggregates was just used to - // reduce output data. In case of distincts, partial results are not used, and so iterate is again - // invoked on the reducer. In case of non-distincts, partial results are used, and merge is invoked + // This is a 1-stage map-reduce processing of the groupby. Tha map-side + // aggregates was just used to + // reduce output data. In case of distincts, partial results are not used, + // and so iterate is again + // invoked on the reducer. In case of non-distincts, partial results are + // used, and merge is invoked // on the reducer. return genGroupByPlanGroupByOperator1(parseInfo, dest, reduceSinkOperatorInfo, groupByDesc.Mode.MERGEPARTIAL, - genericUDAFEvaluators, false); + genericUDAFEvaluators, false); } /** - * Generate a Group-By plan using a 2 map-reduce jobs. - * However, only 1 group-by plan is generated if the query involves no grouping key and - * no distincts. In that case, the plan is same as generated by genGroupByPlanMapAggr1MR. - * Otherwise, the following plan is generated: - * First perform a map side partial aggregation (to reduce the amount of data). Then - * spray by the grouping key and distinct key (or a random number, if no distinct is - * present) in hope of getting a uniform distribution, and compute partial aggregates - * grouped by the reduction key (grouping key + distinct key). - * Evaluate partial aggregates first, and spray by the grouping key to compute actual - * aggregates in the second phase. - * The agggregation evaluation functions are as follows: - * Mapper: iterate/terminatePartial - * (mode = HASH) - * - * Partitioning Key: - * random() if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Sorting Key: - * grouping key if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Reducer: iterate/terminatePartial if DISTINCT - * merge/terminatePartial if NO DISTINCT - * (mode = MERGEPARTIAL) - * - * STAGE 2 - * - * Partitioining Key: - * grouping key - * - * Sorting Key: - * grouping key if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Reducer: merge/terminate - * (mode = FINAL) + * Generate a Group-By plan using a 2 map-reduce jobs. However, only 1 + * group-by plan is generated if the query involves no grouping key and no + * distincts. In that case, the plan is same as generated by + * genGroupByPlanMapAggr1MR. Otherwise, the following plan is generated: First + * perform a map side partial aggregation (to reduce the amount of data). Then + * spray by the grouping key and distinct key (or a random number, if no + * distinct is present) in hope of getting a uniform distribution, and compute + * partial aggregates grouped by the reduction key (grouping key + distinct + * key). Evaluate partial aggregates first, and spray by the grouping key to + * compute actual aggregates in the second phase. The agggregation evaluation + * functions are as follows: Mapper: iterate/terminatePartial (mode = HASH) + * + * Partitioning Key: random() if no DISTINCT grouping + distinct key if + * DISTINCT + * + * Sorting Key: grouping key if no DISTINCT grouping + distinct key if + * DISTINCT + * + * Reducer: iterate/terminatePartial if DISTINCT merge/terminatePartial if NO + * DISTINCT (mode = MERGEPARTIAL) + * + * STAGE 2 + * + * Partitioining Key: grouping key + * + * Sorting Key: grouping key if no DISTINCT grouping + distinct key if + * DISTINCT + * + * Reducer: merge/terminate (mode = FINAL) */ @SuppressWarnings("nls") private Operator genGroupByPlanMapAggr2MR(String dest, QB qb, - Operator inputOperatorInfo) throws SemanticException { + Operator inputOperatorInfo) throws SemanticException { QBParseInfo parseInfo = qb.getParseInfo(); // ////// Generate GroupbyOperator for a map-side partial aggregation - Map genericUDAFEvaluators = - new LinkedHashMap(); - GroupByOperator groupByOperatorInfo = (GroupByOperator)genGroupByPlanMapGroupByOperator(qb, - dest, inputOperatorInfo, groupByDesc.Mode.HASH, genericUDAFEvaluators); + Map genericUDAFEvaluators = new LinkedHashMap(); + GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanMapGroupByOperator( + qb, dest, inputOperatorInfo, groupByDesc.Mode.HASH, + genericUDAFEvaluators); - this.groupOpToInputTables.put(groupByOperatorInfo, this.opParseCtx.get( + groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get( inputOperatorInfo).getRR().getTableNames()); - // Optimize the scenario when there are no grouping keys and no distinct - 2 map-reduce jobs are not needed + // Optimize the scenario when there are no grouping keys and no distinct - 2 + // map-reduce jobs are not needed // For eg: select count(1) from T where t.ds = .... if (!optimizeMapAggrGroupBy(dest, qb)) { // ////// Generate ReduceSink Operator - Operator reduceSinkOperatorInfo = - genGroupByPlanReduceSinkOperator(qb, dest, groupByOperatorInfo, - (parseInfo.getDistinctFuncExprForClause(dest) == null ? -1 - : Integer.MAX_VALUE), -1, true); + Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator(qb, + dest, groupByOperatorInfo, (parseInfo + .getDistinctFuncExprForClause(dest) == null ? -1 + : Integer.MAX_VALUE), -1, true); // ////// Generate GroupbyOperator for a partial aggregation Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator1(parseInfo, dest, reduceSinkOperatorInfo, groupByDesc.Mode.PARTIALS, - genericUDAFEvaluators, false); + genericUDAFEvaluators, false); int numReducers = -1; List grpByExprs = getGroupByForClause(parseInfo, dest); - if (grpByExprs.isEmpty()) + if (grpByExprs.isEmpty()) { numReducers = 1; + } - // ////// Generate ReduceSinkOperator2 - Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR(parseInfo, dest, groupByOperatorInfo2, - grpByExprs.size(), numReducers); + // ////// Generate ReduceSinkOperator2 + Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR( + parseInfo, dest, groupByOperatorInfo2, grpByExprs.size(), numReducers); // ////// Generate GroupbyOperator3 - return genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo2, groupByDesc.Mode.FINAL, genericUDAFEvaluators); - } - else { + return genGroupByPlanGroupByOperator2MR(parseInfo, dest, + reduceSinkOperatorInfo2, groupByDesc.Mode.FINAL, + genericUDAFEvaluators); + } else { // ////// Generate ReduceSink Operator - Operator reduceSinkOperatorInfo = - genGroupByPlanReduceSinkOperator(qb, dest, groupByOperatorInfo, getGroupByForClause(parseInfo, dest).size(), 1, true); + Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator(qb, + dest, groupByOperatorInfo, getGroupByForClause(parseInfo, dest) + .size(), 1, true); - return genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo, groupByDesc.Mode.FINAL, genericUDAFEvaluators); + return genGroupByPlanGroupByOperator2MR(parseInfo, dest, + reduceSinkOperatorInfo, groupByDesc.Mode.FINAL, genericUDAFEvaluators); } } @SuppressWarnings("nls") - private Operator genConversionOps(String dest, QB qb, - Operator input) throws SemanticException { + private Operator genConversionOps(String dest, QB qb, Operator input) + throws SemanticException { Integer dest_type = qb.getMetaData().getDestTypeForAlias(dest); - Table dest_tab = null; switch (dest_type.intValue()) { - case QBMetaData.DEST_TABLE: - { - dest_tab = qb.getMetaData().getDestTableForAlias(dest); - break; - } - case QBMetaData.DEST_PARTITION: - { - dest_tab = qb.getMetaData().getDestPartitionForAlias(dest).getTable(); - break; - } - default: - { - return input; - } + case QBMetaData.DEST_TABLE: { + qb.getMetaData().getDestTableForAlias(dest); + break; } + case QBMetaData.DEST_PARTITION: { + qb.getMetaData().getDestPartitionForAlias(dest).getTable(); + break; + } + default: { + return input; + } + } return input; } @SuppressWarnings("nls") - private Operator genFileSinkPlan(String dest, QB qb, - Operator input) throws SemanticException { + private Operator genFileSinkPlan(String dest, QB qb, Operator input) + throws SemanticException { RowResolver inputRR = opParseCtx.get(input).getRR(); QBMetaData qbm = qb.getMetaData(); Integer dest_type = qbm.getDestTypeForAlias(dest); - Table dest_tab; // destination table if any + Table dest_tab; // destination table if any String queryTmpdir; // the intermediate destination directory - Path dest_path; // the final destination directory + Path dest_path; // the final destination directory tableDesc table_desc = null; int currentTableId = 0; boolean isLocal = false; @@ -2831,154 +2974,160 @@ switch (dest_type.intValue()) { case QBMetaData.DEST_TABLE: { - dest_tab = qbm.getDestTableForAlias(dest); - //check for partition - List parts = dest_tab.getTTable().getPartitionKeys(); - if(parts != null && parts.size() > 0) { - throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg()); - } - dest_path = dest_tab.getPath(); - queryTmpdir = ctx.getExternalTmpFileURI(dest_path.toUri()); - table_desc = Utilities.getTableDesc(dest_tab); + dest_tab = qbm.getDestTableForAlias(dest); + // check for partition + List parts = dest_tab.getTTable().getPartitionKeys(); + if (parts != null && parts.size() > 0) { + throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg()); + } + dest_path = dest_tab.getPath(); + queryTmpdir = ctx.getExternalTmpFileURI(dest_path.toUri()); + table_desc = Utilities.getTableDesc(dest_tab); - this.idToTableNameMap.put( String.valueOf(this.destTableId), dest_tab.getName()); - currentTableId = this.destTableId; - this.destTableId ++; + idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getName()); + currentTableId = destTableId; + destTableId++; - // Create the work for moving the table - this.loadTableWork.add - (new loadTableDesc(queryTmpdir, - ctx.getExternalTmpFileURI(dest_path.toUri()), - table_desc, - new HashMap())); - if (!outputs.add(new WriteEntity(dest_tab))) { - throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_tab.getName())); - } - break; + // Create the work for moving the table + loadTableWork.add(new loadTableDesc(queryTmpdir, ctx + .getExternalTmpFileURI(dest_path.toUri()), table_desc, + new HashMap())); + if (!outputs.add(new WriteEntity(dest_tab))) { + throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES + .getMsg(dest_tab.getName())); } + break; + } case QBMetaData.DEST_PARTITION: { - Partition dest_part = qbm.getDestPartitionForAlias(dest); - dest_tab = dest_part.getTable(); - dest_path = dest_part.getPath()[0]; - queryTmpdir = ctx.getExternalTmpFileURI(dest_path.toUri()); - table_desc = Utilities.getTableDesc(dest_tab); + Partition dest_part = qbm.getDestPartitionForAlias(dest); + dest_tab = dest_part.getTable(); + dest_path = dest_part.getPath()[0]; + queryTmpdir = ctx.getExternalTmpFileURI(dest_path.toUri()); + table_desc = Utilities.getTableDesc(dest_tab); - this.idToTableNameMap.put(String.valueOf(this.destTableId), dest_tab.getName()); - currentTableId = this.destTableId; - this.destTableId ++; + idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getName()); + currentTableId = destTableId; + destTableId++; - this.loadTableWork.add - (new loadTableDesc(queryTmpdir, - ctx.getExternalTmpFileURI(dest_path.toUri()), - table_desc, dest_part.getSpec())); - if (!outputs.add(new WriteEntity(dest_part))) { - throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_tab.getName() + "@" + dest_part.getName())); - } - break; + loadTableWork.add(new loadTableDesc(queryTmpdir, ctx + .getExternalTmpFileURI(dest_path.toUri()), table_desc, dest_part + .getSpec())); + if (!outputs.add(new WriteEntity(dest_part))) { + throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES + .getMsg(dest_tab.getName() + "@" + dest_part.getName())); } + break; + } case QBMetaData.DEST_LOCAL_FILE: - isLocal = true; - // fall through + isLocal = true; + // fall through case QBMetaData.DEST_DFS_FILE: { - dest_path = new Path(qbm.getDestFileForAlias(dest)); - String destStr = dest_path.toString(); + dest_path = new Path(qbm.getDestFileForAlias(dest)); + String destStr = dest_path.toString(); - if (isLocal) { - // for local directory - we always write to map-red intermediate - // store and then copy to local fs - queryTmpdir = ctx.getMRTmpFileURI(); - } else { - // otherwise write to the file system implied by the directory - // no copy is required. we may want to revisit this policy in future + if (isLocal) { + // for local directory - we always write to map-red intermediate + // store and then copy to local fs + queryTmpdir = ctx.getMRTmpFileURI(); + } else { + // otherwise write to the file system implied by the directory + // no copy is required. we may want to revisit this policy in future - try { - Path qPath = FileUtils.makeQualified(dest_path, conf); - queryTmpdir = ctx.getExternalTmpFileURI(qPath.toUri()); - } catch (Exception e) { - throw new SemanticException("Error creating temporary folder on: " - + dest_path, e); - } + try { + Path qPath = FileUtils.makeQualified(dest_path, conf); + queryTmpdir = ctx.getExternalTmpFileURI(qPath.toUri()); + } catch (Exception e) { + throw new SemanticException("Error creating temporary folder on: " + + dest_path, e); } - String cols = new String(); - String colTypes = new String(); - Vector colInfos = inputRR.getColumnInfos(); + } + String cols = new String(); + String colTypes = new String(); + Vector colInfos = inputRR.getColumnInfos(); - // CTAS case: the file output format and serde are defined by the create table command - // rather than taking the default value - List field_schemas = null; - createTableDesc tblDesc = qb.getTableDesc(); - if ( tblDesc != null ) - field_schemas = new ArrayList(); + // CTAS case: the file output format and serde are defined by the create + // table command + // rather than taking the default value + List field_schemas = null; + createTableDesc tblDesc = qb.getTableDesc(); + if (tblDesc != null) { + field_schemas = new ArrayList(); + } - boolean first = true; - for (ColumnInfo colInfo:colInfos) { - String[] nm = inputRR.reverseLookup(colInfo.getInternalName()); + boolean first = true; + for (ColumnInfo colInfo : colInfos) { + String[] nm = inputRR.reverseLookup(colInfo.getInternalName()); - if ( nm[1] != null ) { // non-null column alias - colInfo.setAlias(nm[1]); - } + if (nm[1] != null) { // non-null column alias + colInfo.setAlias(nm[1]); + } - if ( field_schemas != null ) { - FieldSchema col = new FieldSchema(); - if ( nm[1] != null ) { - col.setName(colInfo.getAlias()); - } else { - col.setName(colInfo.getInternalName()); - } - col.setType(colInfo.getType().getTypeName()); - field_schemas.add(col); + if (field_schemas != null) { + FieldSchema col = new FieldSchema(); + if (nm[1] != null) { + col.setName(colInfo.getAlias()); + } else { + col.setName(colInfo.getInternalName()); } + col.setType(colInfo.getType().getTypeName()); + field_schemas.add(col); + } - if (!first) { - cols = cols.concat(","); - colTypes = colTypes.concat(":"); - } + if (!first) { + cols = cols.concat(","); + colTypes = colTypes.concat(":"); + } - first = false; - cols = cols.concat(colInfo.getInternalName()); + first = false; + cols = cols.concat(colInfo.getInternalName()); - // Replace VOID type with string when the output is a temp table or local files. - // A VOID type can be generated under the query: - // - // select NULL from tt; - // or - // insert overwrite local directory "abc" select NULL from tt; - // - // where there is no column type to which the NULL value should be converted. - // - String tName = colInfo.getType().getTypeName(); - if ( tName.equals(Constants.VOID_TYPE_NAME) ) - colTypes = colTypes.concat(Constants.STRING_TYPE_NAME); - else - colTypes = colTypes.concat(tName); + // Replace VOID type with string when the output is a temp table or + // local files. + // A VOID type can be generated under the query: + // + // select NULL from tt; + // or + // insert overwrite local directory "abc" select NULL from tt; + // + // where there is no column type to which the NULL value should be + // converted. + // + String tName = colInfo.getType().getTypeName(); + if (tName.equals(Constants.VOID_TYPE_NAME)) { + colTypes = colTypes.concat(Constants.STRING_TYPE_NAME); + } else { + colTypes = colTypes.concat(tName); } + } - // update the create table descriptor with the resulting schema. - if ( tblDesc != null ) - tblDesc.setCols(field_schemas); + // update the create table descriptor with the resulting schema. + if (tblDesc != null) { + tblDesc.setCols(field_schemas); + } - if (!ctx.isMRTmpFileURI(destStr)) { - this.idToTableNameMap.put( String.valueOf(this.destTableId), destStr); - currentTableId = this.destTableId; - this.destTableId ++; - } + if (!ctx.isMRTmpFileURI(destStr)) { + idToTableNameMap.put(String.valueOf(destTableId), destStr); + currentTableId = destTableId; + destTableId++; + } - boolean isDfsDir = (dest_type.intValue() == QBMetaData.DEST_DFS_FILE); - this.loadFileWork.add(new loadFileDesc(queryTmpdir, destStr, - isDfsDir, cols, colTypes)); + boolean isDfsDir = (dest_type.intValue() == QBMetaData.DEST_DFS_FILE); + loadFileWork.add(new loadFileDesc(queryTmpdir, destStr, isDfsDir, cols, + colTypes)); - if ( tblDesc == null ) { - table_desc = PlanUtils.getDefaultTableDesc(Integer.toString(Utilities.ctrlaCode), - cols, colTypes, false); - } else { - table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes); - } + if (tblDesc == null) { + table_desc = PlanUtils.getDefaultTableDesc(Integer + .toString(Utilities.ctrlaCode), cols, colTypes, false); + } else { + table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes); + } - if (!outputs.add(new WriteEntity(destStr, !isDfsDir))) { - throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(destStr)); - } - break; + if (!outputs.add(new WriteEntity(destStr, !isDfsDir))) { + throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES + .getMsg(destStr)); + } + break; } default: throw new SemanticException("Unknown destination type: " + dest_type); @@ -2990,41 +3139,42 @@ Vector vecCol = new Vector(); try { - StructObjectInspector rowObjectInspector = (StructObjectInspector)table_desc.getDeserializer().getObjectInspector(); - List fields = rowObjectInspector.getAllStructFieldRefs(); - for (int i=0; i fields = rowObjectInspector + .getAllStructFieldRefs(); + for (int i = 0; i < fields.size(); i++) { + vecCol.add(new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils + .getTypeInfoFromObjectInspector(fields.get(i) + .getFieldObjectInspector()), "", false)); + } + } catch (Exception e) { throw new SemanticException(e.getMessage()); } RowSchema fsRS = new RowSchema(vecCol); - Operator output = putOpInsertMap( - OperatorFactory.getAndMakeChild( - new fileSinkDesc(queryTmpdir, table_desc, - conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT), currentTableId), + Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( + new fileSinkDesc(queryTmpdir, table_desc, conf + .getBoolVar(HiveConf.ConfVars.COMPRESSRESULT), currentTableId), fsRS, input), inputRR); LOG.debug("Created FileSink Plan for clause: " + dest + "dest_path: " - + dest_path + " row schema: " - + inputRR.toString()); + + dest_path + " row schema: " + inputRR.toString()); return output; } /** - * Generate the conversion SelectOperator that converts the columns into - * the types that are expected by the table_desc. + * Generate the conversion SelectOperator that converts the columns into the + * types that are expected by the table_desc. */ - Operator genConversionSelectOperator(String dest, QB qb, - Operator input, tableDesc table_desc) throws SemanticException { + Operator genConversionSelectOperator(String dest, QB qb, Operator input, + tableDesc table_desc) throws SemanticException { StructObjectInspector oi = null; try { - Deserializer deserializer = table_desc.getDeserializerClass().newInstance(); + Deserializer deserializer = table_desc.getDeserializerClass() + .newInstance(); deserializer.initialize(conf, table_desc.getProperties()); oi = (StructObjectInspector) deserializer.getObjectInspector(); } catch (Exception e) { @@ -3033,10 +3183,11 @@ // Check column number List tableFields = oi.getAllStructFieldRefs(); - Vector rowFields = opParseCtx.get(input).getRR().getColumnInfos(); + Vector rowFields = opParseCtx.get(input).getRR() + .getColumnInfos(); if (tableFields.size() != rowFields.size()) { - String reason = "Table " + dest + " has " + tableFields.size() + " columns but query has " - + rowFields.size() + " columns."; + String reason = "Table " + dest + " has " + tableFields.size() + + " columns but query has " + rowFields.size() + " columns."; throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg( qb.getParseInfo().getDestForClause(dest), reason)); } @@ -3044,35 +3195,44 @@ // Check column types boolean converted = false; int columnNumber = tableFields.size(); - ArrayList expressions = new ArrayList(columnNumber); - // MetadataTypedColumnsetSerDe does not need type conversions because it does + ArrayList expressions = new ArrayList( + columnNumber); + // MetadataTypedColumnsetSerDe does not need type conversions because it + // does // the conversion to String by itself. - boolean isMetaDataSerDe = table_desc.getDeserializerClass().equals(MetadataTypedColumnsetSerDe.class); - boolean isLazySimpleSerDe = table_desc.getDeserializerClass().equals(LazySimpleSerDe.class); + boolean isMetaDataSerDe = table_desc.getDeserializerClass().equals( + MetadataTypedColumnsetSerDe.class); + boolean isLazySimpleSerDe = table_desc.getDeserializerClass().equals( + LazySimpleSerDe.class); if (!isMetaDataSerDe) { - for (int i=0; i colName = new ArrayList(); - for (int i=0; i colAliases, QB qb, Operator input) - throws SemanticException { + private Operator genUDTFPlan(GenericUDTF genericUDTF, + String outputTableAlias, ArrayList colAliases, QB qb, + Operator input) throws SemanticException { // No GROUP BY / DISTRIBUTE BY / SORT BY / CLUSTER BY QBParseInfo qbp = qb.getParseInfo(); @@ -3140,7 +3303,8 @@ throw new SemanticException(ErrorMsg.UDTF_LATERAL_VIEW.getMsg()); } - LOG.debug("Table alias: " + outputTableAlias + " Col aliases: " + colAliases); + LOG.debug("Table alias: " + outputTableAlias + " Col aliases: " + + colAliases); // Use the RowResolver from the input operator to generate a input // ObjectInspector that can be used to initialize the UDTF. Then, the @@ -3152,11 +3316,12 @@ // Create the object inspector for the input columns and initialize the UDTF ArrayList colNames = new ArrayList(); - ObjectInspector [] colOIs = new ObjectInspector[inputCols.size()]; - for (int i=0; i udtf = - putOpInsertMap(OperatorFactory.getAndMakeChild( - new udtfDesc(genericUDTF), - new RowSchema(out_rwsch.getColumnInfos()), - input), out_rwsch); + Operator udtf = putOpInsertMap(OperatorFactory.getAndMakeChild( + new udtfDesc(genericUDTF), new RowSchema(out_rwsch.getColumnInfos()), + input), out_rwsch); return udtf; } @SuppressWarnings("nls") - private Operator genLimitMapRedPlan(String dest, QB qb, Operator input, int limit, boolean extraMRStep) - throws SemanticException { - // A map-only job can be optimized - instead of converting it to a map-reduce job, we can have another map - // job to do the same to avoid the cost of sorting in the map-reduce phase. A better approach would be to + private Operator genLimitMapRedPlan(String dest, QB qb, Operator input, + int limit, boolean extraMRStep) throws SemanticException { + // A map-only job can be optimized - instead of converting it to a + // map-reduce job, we can have another map + // job to do the same to avoid the cost of sorting in the map-reduce phase. + // A better approach would be to // write into a local file and then have a map-only job. // Add the limit operator to get the value fields Operator curr = genLimitPlan(dest, qb, input, limit); // the client requested that an extra map-reduce step be performed - if (!extraMRStep) + if (!extraMRStep) { return curr; + } // Create a reduceSink operator followed by another limit curr = genReduceSinkPlan(dest, qb, curr, 1); @@ -3223,13 +3388,14 @@ } @SuppressWarnings("nls") - private Operator genReduceSinkPlan(String dest, QB qb, - Operator input, int numReducers) throws SemanticException { + private Operator genReduceSinkPlan(String dest, QB qb, Operator input, + int numReducers) throws SemanticException { RowResolver inputRR = opParseCtx.get(input).getRR(); // First generate the expression for the partition and sort keys - // The cluster by clause / distribute by clause has the aliases for partition function + // The cluster by clause / distribute by clause has the aliases for + // partition function ASTNode partitionExprs = qb.getParseInfo().getClusterByForClause(dest); if (partitionExprs == null) { partitionExprs = qb.getParseInfo().getDistributeByForClause(dest); @@ -3237,8 +3403,8 @@ ArrayList partitionCols = new ArrayList(); if (partitionExprs != null) { int ccount = partitionExprs.getChildCount(); - for(int i=0; i colExprMap = new HashMap(); ArrayList valueCols = new ArrayList(); - for(ColumnInfo colInfo: inputRR.getColumnInfos()) { - valueCols.add(new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), - colInfo.getTabAlias(), colInfo.getIsPartitionCol())); - colExprMap.put(colInfo.getInternalName(), valueCols.get(valueCols.size() - 1)); + for (ColumnInfo colInfo : inputRR.getColumnInfos()) { + valueCols.add(new exprNodeColumnDesc(colInfo.getType(), colInfo + .getInternalName(), colInfo.getTabAlias(), colInfo + .getIsPartitionCol())); + colExprMap.put(colInfo.getInternalName(), valueCols + .get(valueCols.size() - 1)); } ArrayList outputColumns = new ArrayList(); - for (int i = 0; i < valueCols.size(); i++) + for (int i = 0; i < valueCols.size(); i++) { outputColumns.add(getColumnInternalName(i)); - Operator interim = putOpInsertMap( - OperatorFactory.getAndMakeChild( - PlanUtils.getReduceSinkDesc(sortCols, valueCols, outputColumns, false, -1, partitionCols, order.toString(), - numReducers), - new RowSchema(inputRR.getColumnInfos()), - input), inputRR); + } + Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils + .getReduceSinkDesc(sortCols, valueCols, outputColumns, false, -1, + partitionCols, order.toString(), numReducers), new RowSchema( + inputRR.getColumnInfos()), input), inputRR); interim.setColumnExprMap(colExprMap); // Add the extract operator to get the value fields RowResolver out_rwsch = new RowResolver(); RowResolver interim_rwsch = inputRR; Integer pos = Integer.valueOf(0); - for(ColumnInfo colInfo: interim_rwsch.getColumnInfos()) { - String [] info = interim_rwsch.reverseLookup(colInfo.getInternalName()); - out_rwsch.put(info[0], info[1], - new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), info[0], false)); + for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) { + String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName()); + out_rwsch.put(info[0], info[1], new ColumnInfo( + getColumnInternalName(pos), colInfo.getType(), info[0], false)); pos = Integer.valueOf(pos.intValue() + 1); } - Operator output = putOpInsertMap( - OperatorFactory.getAndMakeChild( + Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( new extractDesc(new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, - Utilities.ReduceField.VALUE.toString(), - "", false)), - new RowSchema(out_rwsch.getColumnInfos()), - interim), out_rwsch); + Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema( + out_rwsch.getColumnInfos()), interim), out_rwsch); LOG.debug("Created ReduceSink Plan for clause: " + dest + " row schema: " + out_rwsch.toString()); return output; } - private Operator genJoinOperatorChildren(QBJoinTree join, Operator left, Operator[] right, - HashSet omitOpts) - throws SemanticException { + private Operator genJoinOperatorChildren(QBJoinTree join, Operator left, + Operator[] right, HashSet omitOpts) throws SemanticException { RowResolver outputRS = new RowResolver(); ArrayList outputColumnNames = new ArrayList(); @@ -3343,45 +3510,46 @@ Map colExprMap = new HashMap(); HashMap> posToAliasMap = new HashMap>(); - for ( int pos = 0; pos < right.length; ++pos ) { + for (int pos = 0; pos < right.length; ++pos) { Operator input = right[pos]; - if (input == null) + if (input == null) { input = left; + } ArrayList keyDesc = new ArrayList(); - Byte tag = Byte.valueOf((byte)(((reduceSinkDesc)(input.getConf())).getTag())); + Byte tag = Byte.valueOf((byte) (((reduceSinkDesc) (input.getConf())) + .getTag())); // check whether this input operator produces output - if ( omitOpts == null || !omitOpts.contains(pos) ) { + if (omitOpts == null || !omitOpts.contains(pos)) { // prepare output descriptors for the input opt RowResolver inputRS = opParseCtx.get(input).getRR(); - Iterator keysIter = inputRS.getTableNames().iterator(); + Iterator keysIter = inputRS.getTableNames().iterator(); Set aliases = posToAliasMap.get(pos); - if(aliases == null) { + if (aliases == null) { aliases = new HashSet(); - posToAliasMap.put(pos, aliases); - } - while (keysIter.hasNext()) { + posToAliasMap.put(pos, aliases); + } + while (keysIter.hasNext()) { String key = keysIter.next(); aliases.add(key); HashMap map = inputRS.getFieldMap(key); Iterator fNamesIter = map.keySet().iterator(); - while (fNamesIter.hasNext()) { - String field = fNamesIter.next(); + while (fNamesIter.hasNext()) { + String field = fNamesIter.next(); ColumnInfo valueInfo = inputRS.get(key, field); - keyDesc.add(new exprNodeColumnDesc(valueInfo.getType(), - valueInfo.getInternalName(), - valueInfo.getTabAlias(), - valueInfo.getIsPartitionCol())); + keyDesc.add(new exprNodeColumnDesc(valueInfo.getType(), valueInfo + .getInternalName(), valueInfo.getTabAlias(), valueInfo + .getIsPartitionCol())); if (outputRS.get(key, field) == null) { String colName = getColumnInternalName(outputPos); outputPos++; outputColumnNames.add(colName); colExprMap.put(colName, keyDesc.get(keyDesc.size() - 1)); - outputRS.put(key, field, new ColumnInfo(colName, - valueInfo.getType(), key, false)); + outputRS.put(key, field, new ColumnInfo(colName, valueInfo + .getType(), key, false)); reversedExprs.put(colName, tag); } } @@ -3391,7 +3559,8 @@ rightOps[pos] = input; } - org.apache.hadoop.hive.ql.plan.joinCond[] joinCondns = new org.apache.hadoop.hive.ql.plan.joinCond[join.getJoinCond().length]; + org.apache.hadoop.hive.ql.plan.joinCond[] joinCondns = new org.apache.hadoop.hive.ql.plan.joinCond[join + .getJoinCond().length]; for (int i = 0; i < join.getJoinCond().length; i++) { joinCond condn = join.getJoinCond()[i]; joinCondns[i] = new org.apache.hadoop.hive.ql.plan.joinCond(condn); @@ -3400,7 +3569,7 @@ joinDesc desc = new joinDesc(exprMap, outputColumnNames, joinCondns); desc.setReversedExprs(reversedExprs); JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(desc, - new RowSchema(outputRS.getColumnInfos()), rightOps); + new RowSchema(outputRS.getColumnInfos()), rightOps); joinOp.setColumnExprMap(colExprMap); joinOp.setPosToAliasMap(posToAliasMap); return putOpInsertMap(joinOp, outputRS); @@ -3425,24 +3594,22 @@ ArrayList reduceValues = new ArrayList(); Iterator tblNamesIter = inputRS.getTableNames().iterator(); Map colExprMap = new HashMap(); - while (tblNamesIter.hasNext()) - { + while (tblNamesIter.hasNext()) { String src = tblNamesIter.next(); HashMap fMap = inputRS.getFieldMap(src); for (Map.Entry entry : fMap.entrySet()) { String field = entry.getKey(); ColumnInfo valueInfo = entry.getValue(); - exprNodeColumnDesc inputExpr = new exprNodeColumnDesc(valueInfo.getType(), - valueInfo.getInternalName(), - valueInfo.getTabAlias(), - valueInfo.getIsPartitionCol()); + exprNodeColumnDesc inputExpr = new exprNodeColumnDesc(valueInfo + .getType(), valueInfo.getInternalName(), valueInfo.getTabAlias(), + valueInfo.getIsPartitionCol()); reduceValues.add(inputExpr); if (outputRS.get(src, field) == null) { String col = getColumnInternalName(reduceValues.size() - 1); outputColumns.add(col); - ColumnInfo newColInfo = new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + - col, - valueInfo.getType(), src, false); + ColumnInfo newColInfo = new ColumnInfo(Utilities.ReduceField.VALUE + .toString() + + "." + col, valueInfo.getType(), src, false); colExprMap.put(newColInfo.getInternalName(), inputExpr); outputRS.put(src, field, newColInfo); } @@ -3456,15 +3623,17 @@ numReds = 1; // Cartesian product is not supported in strict mode - if (conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict")) + if (conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase( + "strict")) { throw new SemanticException(ErrorMsg.NO_CARTESIAN_PRODUCT.getMsg()); + } } - ReduceSinkOperator rsOp = (ReduceSinkOperator)putOpInsertMap( - OperatorFactory.getAndMakeChild( - PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumns, false, joinTree.getNextTag(), reduceKeys.size(), numReds), - new RowSchema(outputRS.getColumnInfos()), - child), outputRS); + ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( + OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, + reduceValues, outputColumns, false, joinTree.getNextTag(), + reduceKeys.size(), numReds), new RowSchema(outputRS + .getColumnInfos()), child), outputRS); rsOp.setColumnExprMap(colExprMap); return rsOp; } @@ -3473,29 +3642,31 @@ HashMap map) throws SemanticException { QBJoinTree leftChild = joinTree.getJoinSrc(); Operator joinSrcOp = null; - if (leftChild != null) - { + if (leftChild != null) { Operator joinOp = genJoinOperator(qb, leftChild, map); Vector filter = joinTree.getFilters().get(0); - for (ASTNode cond: filter) + for (ASTNode cond : filter) { joinOp = genFilterPlan(qb, cond, joinOp); + } joinSrcOp = genJoinReduceSinkChild(qb, joinTree, joinOp, null, 0); } Operator[] srcOps = new Operator[joinTree.getBaseSrc().length]; - HashSet omitOpts = null; // set of input to the join that should be omitted by the output + HashSet omitOpts = null; // set of input to the join that should be + // omitted by the output int pos = 0; for (String src : joinTree.getBaseSrc()) { if (src != null) { Operator srcOp = map.get(src); - // for left-semi join, generate an additional selection & group-by operator before ReduceSink + // for left-semi join, generate an additional selection & group-by + // operator before ReduceSink ArrayList fields = joinTree.getRHSSemijoinColumns(src); - if ( fields != null ) { + if (fields != null) { // the RHS table columns should be not be output from the join - if ( omitOpts == null ) { + if (omitOpts == null) { omitOpts = new HashSet(); } omitOpts.add(pos); @@ -3503,8 +3674,10 @@ // generate a selection operator for group-by keys only srcOp = insertSelectForSemijoin(fields, srcOp); - // generate a groupby operator (HASH mode) for a map-side partial aggregation for semijoin - srcOp = genMapGroupByForSemijoin(qb, fields, srcOp, groupByDesc.Mode.HASH); + // generate a groupby operator (HASH mode) for a map-side partial + // aggregation for semijoin + srcOp = genMapGroupByForSemijoin(qb, fields, srcOp, + groupByDesc.Mode.HASH); } // generate a ReduceSink operator for the join @@ -3519,151 +3692,110 @@ // Type checking and implicit type conversion for join keys genJoinOperatorTypeCheck(joinSrcOp, srcOps); - JoinOperator joinOp = (JoinOperator)genJoinOperatorChildren(joinTree, joinSrcOp, srcOps, omitOpts); + JoinOperator joinOp = (JoinOperator) genJoinOperatorChildren(joinTree, + joinSrcOp, srcOps, omitOpts); joinContext.put(joinOp, joinTree); return joinOp; } /** - * Construct a selection operator for semijoin that filter out all fields other than the group by keys. - * - * @param fields list of fields need to be output - * @param input input operator + * Construct a selection operator for semijoin that filter out all fields + * other than the group by keys. + * + * @param fields + * list of fields need to be output + * @param input + * input operator * @return the selection operator. * @throws SemanticException */ - private Operator insertSelectForSemijoin(ArrayList fields, Operator input) - throws SemanticException { + private Operator insertSelectForSemijoin(ArrayList fields, + Operator input) throws SemanticException { - RowResolver inputRR = opParseCtx.get(input).getRR(); + RowResolver inputRR = opParseCtx.get(input).getRR(); ArrayList colList = new ArrayList(); - ArrayList columnNames = new ArrayList(); + ArrayList columnNames = new ArrayList(); // construct the list of columns that need to be projected - for (ASTNode field: fields) { - exprNodeColumnDesc exprNode = (exprNodeColumnDesc) genExprNodeDesc(field, inputRR); + for (ASTNode field : fields) { + exprNodeColumnDesc exprNode = (exprNodeColumnDesc) genExprNodeDesc(field, + inputRR); colList.add(exprNode); columnNames.add(exprNode.getColumn()); } // create selection operator - Operator output = putOpInsertMap( - OperatorFactory.getAndMakeChild( - new selectDesc(colList, columnNames, false), - new RowSchema(inputRR.getColumnInfos()), - input), - inputRR); + Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( + new selectDesc(colList, columnNames, false), new RowSchema(inputRR + .getColumnInfos()), input), inputRR); output.setColumnExprMap(input.getColumnExprMap()); return output; } - private Operator genMapGroupByForSemijoin(QB qb, - ArrayList fields, // the ASTNode of the join key "tab.col" - Operator inputOperatorInfo, - groupByDesc.Mode mode) - throws SemanticException { + private Operator genMapGroupByForSemijoin(QB qb, ArrayList fields, // the + // ASTNode + // of + // the + // join + // key + // "tab.col" + Operator inputOperatorInfo, groupByDesc.Mode mode) + throws SemanticException { - RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRR(); - RowResolver groupByOutputRowResolver = new RowResolver(); - ArrayList groupByKeys = new ArrayList(); - ArrayList outputColumnNames = new ArrayList(); + RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo) + .getRR(); + RowResolver groupByOutputRowResolver = new RowResolver(); + ArrayList groupByKeys = new ArrayList(); + ArrayList outputColumnNames = new ArrayList(); ArrayList aggregations = new ArrayList(); - Map colExprMap = new HashMap(); - QBParseInfo parseInfo = qb.getParseInfo(); + Map colExprMap = new HashMap(); + qb.getParseInfo(); - groupByOutputRowResolver.setIsExprResolver(true); // join keys should only be columns but not be expressions + groupByOutputRowResolver.setIsExprResolver(true); // join keys should only + // be columns but not be + // expressions for (int i = 0; i < fields.size(); ++i) { // get the group by keys to ColumnInfo ASTNode colName = fields.get(i); - exprNodeDesc grpByExprNode = genExprNodeDesc(colName, groupByInputRowResolver); + exprNodeDesc grpByExprNode = genExprNodeDesc(colName, + groupByInputRowResolver); groupByKeys.add(grpByExprNode); // generate output column names String field = getColumnInternalName(i); outputColumnNames.add(field); - ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false); - groupByOutputRowResolver.put("", colName.toStringTree(), colInfo2); + ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(), + "", false); + groupByOutputRowResolver.put("", colName.toStringTree(), colInfo2); // establish mapping from the output column to the input column colExprMap.put(field, grpByExprNode); } // Generate group-by operator - Operator op = putOpInsertMap( - OperatorFactory.getAndMakeChild( - new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false), - new RowSchema(groupByOutputRowResolver.getColumnInfos()), - inputOperatorInfo), - groupByOutputRowResolver); + Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( + new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, + false), new RowSchema(groupByOutputRowResolver.getColumnInfos()), + inputOperatorInfo), groupByOutputRowResolver); op.setColumnExprMap(colExprMap); return op; } - private Operator genReduceSinkForSemijoin(QB qb, - ArrayList fields, // semijoin key for the rhs table - Operator inputOperatorInfo) - throws SemanticException { - - RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo).getRR(); - QBParseInfo parseInfo = qb.getParseInfo(); - RowResolver reduceSinkOutputRowResolver = new RowResolver(); - Map colExprMap = new HashMap(); - ArrayList reduceKeys = new ArrayList(); - List outputColumnNames = new ArrayList(); - - reduceSinkOutputRowResolver.setIsExprResolver(true); - - // Pre-compute group-by keys and store in reduceKeys - for (int i = 0; i < fields.size(); ++i) { - // based on the input row resolver, resolve the column names and construct expression node descriptors - ASTNode colName = fields.get(i); - exprNodeDesc inputExpr = genExprNodeDesc(colName, reduceSinkInputRowResolver); - - reduceKeys.add(inputExpr); - - // create new ColumnInfos for the groupby columns and put them into the output row resolver - if (reduceSinkOutputRowResolver.get("", colName.toStringTree()) == null) { - outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); - String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1); - ColumnInfo colInfo1 = new ColumnInfo(field, - reduceKeys.get(reduceKeys.size()-1).getTypeInfo(), - null, false); - reduceSinkOutputRowResolver.put("", colName.toStringTree(), colInfo1); - colExprMap.put(colInfo1.getInternalName(), inputExpr); - } else { - throw new SemanticException(ErrorMsg.DUPLICATE_GROUPBY_KEY.getMsg()); - } - } - - // SEMIJOIN HAS NO AGGREGATIONS, and we don't really use reduce values, so leave it as an empty list - ArrayList reduceValues = new ArrayList(); - int numPartitionFields = fields.size(); - - // finally generate the ReduceSink operator - ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( - OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, numPartitionFields, -1), - new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), - inputOperatorInfo), - reduceSinkOutputRowResolver); - rsOp.setColumnExprMap(colExprMap); - - return rsOp; - } - - private void genJoinOperatorTypeCheck(Operator left, Operator[] right) throws SemanticException { + private void genJoinOperatorTypeCheck(Operator left, Operator[] right) + throws SemanticException { // keys[i] -> ArrayList for the i-th join operator key list ArrayList> keys = new ArrayList>(); int keyLength = 0; - for (int i=0; i map) throws SemanticException { + private void pushJoinFilters(QB qb, QBJoinTree joinTree, + HashMap map) throws SemanticException { Vector> filters = joinTree.getFilters(); - if (joinTree.getJoinSrc() != null) + if (joinTree.getJoinSrc() != null) { pushJoinFilters(qb, joinTree.getJoinSrc(), map); + } int pos = 0; for (String src : joinTree.getBaseSrc()) { if (src != null) { Operator srcOp = map.get(src); Vector filter = filters.get(pos); - for (ASTNode cond: filter) + for (ASTNode cond : filter) { srcOp = genFilterPlan(qb, cond, srcOp); + } map.put(src, srcOp); } pos++; @@ -3734,14 +3871,16 @@ List cols = new ArrayList(); ASTNode hints = qb.getParseInfo().getHints(); for (int pos = 0; pos < hints.getChildCount(); pos++) { - ASTNode hint = (ASTNode)hints.getChild(pos); - if (((ASTNode)hint.getChild(0)).getToken().getType() == HiveParser.TOK_MAPJOIN) { - ASTNode hintTblNames = (ASTNode)hint.getChild(1); + ASTNode hint = (ASTNode) hints.getChild(pos); + if (((ASTNode) hint.getChild(0)).getToken().getType() == HiveParser.TOK_MAPJOIN) { + ASTNode hintTblNames = (ASTNode) hint.getChild(1); int numCh = hintTblNames.getChildCount(); for (int tblPos = 0; tblPos < numCh; tblPos++) { - String tblName = ((ASTNode)hintTblNames.getChild(tblPos)).getText().toLowerCase(); - if (!cols.contains(tblName)) + String tblName = ((ASTNode) hintTblNames.getChild(tblPos)).getText() + .toLowerCase(); + if (!cols.contains(tblName)) { cols.add(tblName); + } } } } @@ -3750,7 +3889,7 @@ } private QBJoinTree genUniqueJoinTree(QB qb, ASTNode joinParseTree) - throws SemanticException { + throws SemanticException { QBJoinTree joinTree = new QBJoinTree(); joinTree.setNoOuterJoin(false); @@ -3759,66 +3898,68 @@ // Create joinTree structures to fill them up later Vector rightAliases = new Vector(); - Vector leftAliases = new Vector(); - Vector baseSrc = new Vector(); - Vector preserved = new Vector(); + Vector leftAliases = new Vector(); + Vector baseSrc = new Vector(); + Vector preserved = new Vector(); boolean lastPreserved = false; int cols = -1; - for(int i = 0; i < joinParseTree.getChildCount(); i++) { + for (int i = 0; i < joinParseTree.getChildCount(); i++) { ASTNode child = (ASTNode) joinParseTree.getChild(i); - switch(child.getToken().getType()) { - case HiveParser.TOK_TABREF: - // Handle a table - populate aliases appropriately: - // leftAliases should contain the first table, rightAliases should - // contain all other tables and baseSrc should contain all tables + switch (child.getToken().getType()) { + case HiveParser.TOK_TABREF: + // Handle a table - populate aliases appropriately: + // leftAliases should contain the first table, rightAliases should + // contain all other tables and baseSrc should contain all tables - String table_name = unescapeIdentifier(child.getChild(0).getText()); - String alias = child.getChildCount() == 1 ? table_name : - unescapeIdentifier(child.getChild(child.getChildCount()-1).getText().toLowerCase()); + String table_name = unescapeIdentifier(child.getChild(0).getText()); + String alias = child.getChildCount() == 1 ? table_name + : unescapeIdentifier(child.getChild(child.getChildCount() - 1) + .getText().toLowerCase()); - if (i == 0) { - leftAliases.add(alias); - joinTree.setLeftAlias(alias); - } else { - rightAliases.add(alias); - } - baseSrc.add(alias); + if (i == 0) { + leftAliases.add(alias); + joinTree.setLeftAlias(alias); + } else { + rightAliases.add(alias); + } + baseSrc.add(alias); - preserved.add(lastPreserved); - lastPreserved = false; - break; + preserved.add(lastPreserved); + lastPreserved = false; + break; - case HiveParser.TOK_EXPLIST: - if (cols == -1 && child.getChildCount() != 0) { - cols = child.getChildCount(); - } else if(child.getChildCount() != cols) { - throw new SemanticException("Tables with different or invalid " + - "number of keys in UNIQUEJOIN"); - } + case HiveParser.TOK_EXPLIST: + if (cols == -1 && child.getChildCount() != 0) { + cols = child.getChildCount(); + } else if (child.getChildCount() != cols) { + throw new SemanticException("Tables with different or invalid " + + "number of keys in UNIQUEJOIN"); + } - Vector expressions = new Vector(); - Vector filt = new Vector(); + Vector expressions = new Vector(); + Vector filt = new Vector(); - for (Node exp: child.getChildren()) { - expressions.add((ASTNode)exp); - } + for (Node exp : child.getChildren()) { + expressions.add((ASTNode) exp); + } - joinTree.getExpressions().add(expressions); - joinTree.getFilters().add(filt); - break; + joinTree.getExpressions().add(expressions); + joinTree.getFilters().add(filt); + break; - case HiveParser.KW_PRESERVE: - lastPreserved = true; - break; + case HiveParser.KW_PRESERVE: + lastPreserved = true; + break; - case HiveParser.TOK_SUBQUERY: - throw new SemanticException("Subqueries are not supported in UNIQUEJOIN"); + case HiveParser.TOK_SUBQUERY: + throw new SemanticException( + "Subqueries are not supported in UNIQUEJOIN"); - default: - throw new SemanticException("Unexpected UNIQUEJOIN structure"); + default: + throw new SemanticException("Unexpected UNIQUEJOIN structure"); } } @@ -3844,7 +3985,7 @@ QBJoinTree joinTree = new QBJoinTree(); joinCond[] condn = new joinCond[1]; - switch (joinParseTree.getToken().getType() ) { + switch (joinParseTree.getToken().getType()) { case HiveParser.TOK_LEFTOUTERJOIN: joinTree.setNoOuterJoin(false); condn[0] = new joinCond(0, 1, joinType.LEFTOUTER); @@ -3875,8 +4016,9 @@ if ((left.getToken().getType() == HiveParser.TOK_TABREF) || (left.getToken().getType() == HiveParser.TOK_SUBQUERY)) { String table_name = unescapeIdentifier(left.getChild(0).getText()); - String alias = left.getChildCount() == 1 ? table_name : - unescapeIdentifier(left.getChild(left.getChildCount()-1).getText().toLowerCase()); + String alias = left.getChildCount() == 1 ? table_name + : unescapeIdentifier(left.getChild(left.getChildCount() - 1) + .getText().toLowerCase()); joinTree.setLeftAlias(alias); String[] leftAliases = new String[1]; leftAliases[0] = alias; @@ -3884,38 +4026,42 @@ String[] children = new String[2]; children[0] = alias; joinTree.setBaseSrc(children); - } - else if (isJoinToken(left)) { + } else if (isJoinToken(left)) { QBJoinTree leftTree = genJoinTree(qb, left); joinTree.setJoinSrc(leftTree); String[] leftChildAliases = leftTree.getLeftAliases(); String leftAliases[] = new String[leftChildAliases.length + 1]; - for (int i = 0; i < leftChildAliases.length; i++) + for (int i = 0; i < leftChildAliases.length; i++) { leftAliases[i] = leftChildAliases[i]; + } leftAliases[leftChildAliases.length] = leftTree.getRightAliases()[0]; joinTree.setLeftAliases(leftAliases); - } else + } else { assert (false); + } if ((right.getToken().getType() == HiveParser.TOK_TABREF) || (right.getToken().getType() == HiveParser.TOK_SUBQUERY)) { String table_name = unescapeIdentifier(right.getChild(0).getText()); - String alias = right.getChildCount() == 1 ? table_name : - unescapeIdentifier(right.getChild(right.getChildCount()-1).getText().toLowerCase()); + String alias = right.getChildCount() == 1 ? table_name + : unescapeIdentifier(right.getChild(right.getChildCount() - 1) + .getText().toLowerCase()); String[] rightAliases = new String[1]; rightAliases[0] = alias; joinTree.setRightAliases(rightAliases); String[] children = joinTree.getBaseSrc(); - if (children == null) + if (children == null) { children = new String[2]; + } children[1] = alias; joinTree.setBaseSrc(children); // remember rhs table for semijoin if (joinTree.getNoSemiJoin() == false) { joinTree.addRHSSemijoin(alias); } - } else + } else { assert false; + } Vector> expressions = new Vector>(); expressions.add(new Vector()); @@ -3930,24 +4076,28 @@ ASTNode joinCond = (ASTNode) joinParseTree.getChild(2); Vector leftSrc = new Vector(); parseJoinCondition(joinTree, joinCond, leftSrc); - if (leftSrc.size() == 1) + if (leftSrc.size() == 1) { joinTree.setLeftAlias(leftSrc.get(0)); + } - // check the hints to see if the user has specified a map-side join. This will be removed later on, once the cost-based + // check the hints to see if the user has specified a map-side join. This + // will be removed later on, once the cost-based // infrastructure is in place if (qb.getParseInfo().getHints() != null) { List mapSideTables = getMapSideJoinTables(qb); - List mapAliases = joinTree.getMapAliases(); + List mapAliases = joinTree.getMapAliases(); for (String mapTbl : mapSideTables) { boolean mapTable = false; for (String leftAlias : joinTree.getLeftAliases()) { - if (mapTbl.equalsIgnoreCase(leftAlias)) + if (mapTbl.equalsIgnoreCase(leftAlias)) { mapTable = true; + } } for (String rightAlias : joinTree.getRightAliases()) { - if (mapTbl.equalsIgnoreCase(rightAlias)) + if (mapTbl.equalsIgnoreCase(rightAlias)) { mapTable = true; + } } if (mapTable) { @@ -3970,8 +4120,8 @@ private void parseStreamTables(QBJoinTree joinTree, QB qb) { List streamAliases = joinTree.getStreamAliases(); - for (Node hintNode: qb.getParseInfo().getHints().getChildren()) { - ASTNode hint = (ASTNode)hintNode; + for (Node hintNode : qb.getParseInfo().getHints().getChildren()) { + ASTNode hint = (ASTNode) hintNode; if (hint.getChild(0).getType() == HiveParser.TOK_STREAMTABLE) { for (int i = 0; i < hint.getChild(1).getChildCount(); i++) { if (streamAliases == null) { @@ -3992,49 +4142,58 @@ String[] rightAliases = new String[nodeRightAliases.length + trgtRightAliases.length]; - for (int i = 0; i < trgtRightAliases.length; i++) + for (int i = 0; i < trgtRightAliases.length; i++) { rightAliases[i] = trgtRightAliases[i]; - for (int i = 0; i < nodeRightAliases.length; i++) + } + for (int i = 0; i < nodeRightAliases.length; i++) { rightAliases[i + trgtRightAliases.length] = nodeRightAliases[i]; + } target.setRightAliases(rightAliases); String[] nodeBaseSrc = node.getBaseSrc(); String[] trgtBaseSrc = target.getBaseSrc(); String[] baseSrc = new String[nodeBaseSrc.length + trgtBaseSrc.length - 1]; - for (int i = 0; i < trgtBaseSrc.length; i++) + for (int i = 0; i < trgtBaseSrc.length; i++) { baseSrc[i] = trgtBaseSrc[i]; - for (int i = 1; i < nodeBaseSrc.length; i++) + } + for (int i = 1; i < nodeBaseSrc.length; i++) { baseSrc[i + trgtBaseSrc.length - 1] = nodeBaseSrc[i]; + } target.setBaseSrc(baseSrc); Vector> expr = target.getExpressions(); - for (int i = 0; i < nodeRightAliases.length; i++) + for (int i = 0; i < nodeRightAliases.length; i++) { expr.add(node.getExpressions().get(i + 1)); + } Vector> filter = target.getFilters(); - for (int i = 0; i < nodeRightAliases.length; i++) + for (int i = 0; i < nodeRightAliases.length; i++) { filter.add(node.getFilters().get(i + 1)); + } if (node.getFilters().get(0).size() != 0) { Vector filterPos = filter.get(pos); filterPos.addAll(node.getFilters().get(0)); } - if (qb.getQbJoinTree() == node) + if (qb.getQbJoinTree() == node) { qb.setQbJoinTree(node.getJoinSrc()); - else + } else { parent.setJoinSrc(node.getJoinSrc()); + } - if (node.getNoOuterJoin() && target.getNoOuterJoin()) + if (node.getNoOuterJoin() && target.getNoOuterJoin()) { target.setNoOuterJoin(true); - else + } else { target.setNoOuterJoin(false); + } - if (node.getNoSemiJoin() && target.getNoSemiJoin()) + if (node.getNoSemiJoin() && target.getNoSemiJoin()) { target.setNoSemiJoin(true); - else + } else { target.setNoSemiJoin(false); + } target.mergeRHSSemijoin(node); @@ -4043,16 +4202,17 @@ joinCond[] targetCondns = target.getJoinCond(); int targetCondnsSize = targetCondns.length; joinCond[] newCondns = new joinCond[nodeCondnsSize + targetCondnsSize]; - for (int i = 0; i < targetCondnsSize; i++) + for (int i = 0; i < targetCondnsSize; i++) { newCondns[i] = targetCondns[i]; + } - for (int i = 0; i < nodeCondnsSize; i++) - { + for (int i = 0; i < nodeCondnsSize; i++) { joinCond nodeCondn = nodeCondns[i]; - if (nodeCondn.getLeft() == 0) + if (nodeCondn.getLeft() == 0) { nodeCondn.setLeft(pos); - else + } else { nodeCondn.setLeft(nodeCondn.getLeft() + targetCondnsSize); + } nodeCondn.setRight(nodeCondn.getRight() + targetCondnsSize); newCondns[targetCondnsSize + i] = nodeCondn; } @@ -4061,9 +4221,11 @@ if (target.isMapSideJoin()) { assert node.isMapSideJoin(); List mapAliases = target.getMapAliases(); - for (String mapTbl : node.getMapAliases()) - if (!mapAliases.contains(mapTbl)) + for (String mapTbl : node.getMapAliases()) { + if (!mapAliases.contains(mapTbl)) { mapAliases.add(mapTbl); + } + } target.setMapAliases(mapAliases); } } @@ -4071,18 +4233,17 @@ private int findMergePos(QBJoinTree node, QBJoinTree target) { int res = -1; String leftAlias = node.getLeftAlias(); - if (leftAlias == null) + if (leftAlias == null) { return -1; + } Vector nodeCondn = node.getExpressions().get(0); Vector targetCondn = null; - if (leftAlias.equals(target.getLeftAlias())) - { + if (leftAlias.equals(target.getLeftAlias())) { targetCondn = target.getExpressions().get(0); res = 0; - } - else + } else { for (int i = 0; i < target.getRightAliases().length; i++) { if (leftAlias.equals(target.getRightAliases()[i])) { targetCondn = target.getExpressions().get(i + 1); @@ -4090,22 +4251,27 @@ break; } } + } - if ((targetCondn == null) || (nodeCondn.size() != targetCondn.size())) + if ((targetCondn == null) || (nodeCondn.size() != targetCondn.size())) { return -1; + } - for (int i = 0; i < nodeCondn.size(); i++) + for (int i = 0; i < nodeCondn.size(); i++) { if (!nodeCondn.get(i).toStringTree().equals( - targetCondn.get(i).toStringTree())) + targetCondn.get(i).toStringTree())) { return -1; + } + } return res; } private boolean mergeJoinNodes(QB qb, QBJoinTree parent, QBJoinTree node, QBJoinTree target) { - if (target == null) + if (target == null) { return false; + } int res = findMergePos(node, target); if (res != -1) { @@ -4123,9 +4289,9 @@ boolean merged = mergeJoinNodes(qb, parent, root, root.getJoinSrc()); if (parent == null) { - if (merged) + if (merged) { root = qb.getQbJoinTree(); - else { + } else { parent = root; root = root.getJoinSrc(); } @@ -4146,11 +4312,12 @@ for (int i = 0; i < columns.size(); i++) { ColumnInfo col = columns.get(i); colList.add(new exprNodeColumnDesc(col.getType(), col.getInternalName(), - col.getTabAlias(), col.getIsPartitionCol())); + col.getTabAlias(), col.getIsPartitionCol())); columnNames.add(col.getInternalName()); } Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - new selectDesc(colList, columnNames, true), new RowSchema(inputRR.getColumnInfos()), input), inputRR); + new selectDesc(colList, columnNames, true), new RowSchema(inputRR + .getColumnInfos()), input), inputRR); output.setColumnExprMap(input.getColumnExprMap()); return output; } @@ -4165,30 +4332,32 @@ ks.addAll(qbp.getClauseNames()); // Go over all the destination tables - if (ks.size() <= 1) + if (ks.size() <= 1) { return null; + } List oldList = null; - List oldASTList = null; + List oldASTList = null; for (String dest : ks) { - Operator curr = input; - // If a filter is present, common processing is not possible - if (qbp.getWhrForClause(dest) != null) + if (qbp.getWhrForClause(dest) != null) { return null; + } if (qbp.getAggregationExprsForClause(dest).size() == 0 - && getGroupByForClause(qbp, dest).size() == 0) + && getGroupByForClause(qbp, dest).size() == 0) { return null; + } // All distinct expressions must be the same ASTNode value = qbp.getDistinctFuncExprForClause(dest); - if (value == null) + if (value == null) { return null; + } List currDestList = new ArrayList(); - List currASTList = new ArrayList(); + List currASTList = new ArrayList(); try { // 0 is function name for (int i = 1; i < value.getChildCount(); i++) { @@ -4201,16 +4370,16 @@ } if (oldList == null) { - oldList = currDestList; + oldList = currDestList; oldASTList = currASTList; - } - else { - if (oldList.size() != currDestList.size()) + } else { + if (oldList.size() != currDestList.size()) { return null; - for (int pos = 0; pos < oldList.size(); pos++) - { - if (!oldList.get(pos).isSame(currDestList.get(pos))) + } + for (int pos = 0; pos < oldList.size(); pos++) { + if (!oldList.get(pos).isSame(currDestList.get(pos))) { return null; + } } } } @@ -4218,7 +4387,8 @@ return oldASTList; } - private Operator createCommonReduceSink(QB qb, Operator input) throws SemanticException { + private Operator createCommonReduceSink(QB qb, Operator input) + throws SemanticException { // Go over all the tables and extract the common distinct key List distExprs = getCommonDistinctExprs(qb, input); @@ -4230,7 +4400,7 @@ RowResolver inputRR = opParseCtx.get(input).getRR(); RowResolver reduceSinkOutputRowResolver = new RowResolver(); reduceSinkOutputRowResolver.setIsExprResolver(true); - ArrayList reduceKeys = new ArrayList(); + ArrayList reduceKeys = new ArrayList(); ArrayList reduceValues = new ArrayList(); Map colExprMap = new HashMap(); @@ -4243,9 +4413,10 @@ String text = distn.toStringTree(); if (reduceSinkOutputRowResolver.get("", text) == null) { outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); - String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1); - ColumnInfo colInfo = new ColumnInfo(field, - reduceKeys.get(reduceKeys.size()-1).getTypeInfo(), "", false); + String field = Utilities.ReduceField.KEY.toString() + "." + + getColumnInternalName(reduceKeys.size() - 1); + ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get( + reduceKeys.size() - 1).getTypeInfo(), "", false); reduceSinkOutputRowResolver.put("", text, colInfo); colExprMap.put(colInfo.getInternalName(), distExpr); } @@ -4257,69 +4428,79 @@ List grpByExprs = getGroupByForClause(qbp, dest); for (int i = 0; i < grpByExprs.size(); ++i) { ASTNode grpbyExpr = grpByExprs.get(i); - String text = grpbyExpr.toStringTree(); + String text = grpbyExpr.toStringTree(); if (reduceSinkOutputRowResolver.get("", text) == null) { exprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, inputRR); reduceValues.add(grpByExprNode); - String field = Utilities.ReduceField.VALUE.toString() + "." + getColumnInternalName(reduceValues.size() - 1); - ColumnInfo colInfo = new ColumnInfo(field, reduceValues.get(reduceValues.size()-1).getTypeInfo(), "", false); + String field = Utilities.ReduceField.VALUE.toString() + "." + + getColumnInternalName(reduceValues.size() - 1); + ColumnInfo colInfo = new ColumnInfo(field, reduceValues.get( + reduceValues.size() - 1).getTypeInfo(), "", false); reduceSinkOutputRowResolver.put("", text, colInfo); outputColumnNames.add(getColumnInternalName(reduceValues.size() - 1)); } } // For each aggregation - HashMap aggregationTrees = qbp.getAggregationExprsForClause(dest); + HashMap aggregationTrees = qbp + .getAggregationExprsForClause(dest); assert (aggregationTrees != null); for (Map.Entry entry : aggregationTrees.entrySet()) { ASTNode value = entry.getValue(); - String aggName = value.getChild(0).getText(); + value.getChild(0).getText(); // 0 is the function name for (int i = 1; i < value.getChildCount(); i++) { - ASTNode paraExpr = (ASTNode)value.getChild(i); - String text = paraExpr.toStringTree(); + ASTNode paraExpr = (ASTNode) value.getChild(i); + String text = paraExpr.toStringTree(); if (reduceSinkOutputRowResolver.get("", text) == null) { exprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, inputRR); reduceValues.add(paraExprNode); - String field = Utilities.ReduceField.VALUE.toString() + "." + getColumnInternalName(reduceValues.size() - 1); - ColumnInfo colInfo = new ColumnInfo(field, reduceValues.get(reduceValues.size()-1).getTypeInfo(), "", false); + String field = Utilities.ReduceField.VALUE.toString() + "." + + getColumnInternalName(reduceValues.size() - 1); + ColumnInfo colInfo = new ColumnInfo(field, reduceValues.get( + reduceValues.size() - 1).getTypeInfo(), "", false); reduceSinkOutputRowResolver.put("", text, colInfo); - outputColumnNames.add(getColumnInternalName(reduceValues.size() - 1)); + outputColumnNames + .add(getColumnInternalName(reduceValues.size() - 1)); } } } } - ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( - OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, reduceKeys.size(), -1), - new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), input), - reduceSinkOutputRowResolver); + ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( + OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, + reduceValues, outputColumnNames, true, -1, reduceKeys.size(), -1), + new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), input), + reduceSinkOutputRowResolver); rsOp.setColumnExprMap(colExprMap); return rsOp; } @SuppressWarnings("nls") - private Operator genBodyPlan(QB qb, Operator input) - throws SemanticException { + private Operator genBodyPlan(QB qb, Operator input) throws SemanticException { QBParseInfo qbp = qb.getParseInfo(); TreeSet ks = new TreeSet(); ks.addAll(qbp.getClauseNames()); - // For multi-group by with the same distinct, we ignore all user hints currently. It doesnt matter whether he has asked to do + // For multi-group by with the same distinct, we ignore all user hints + // currently. It doesnt matter whether he has asked to do // map-side aggregation or not. Map side aggregation is turned off boolean optimizeMultiGroupBy = (getCommonDistinctExprs(qb, input) != null); Operator curr = null; - // If there are multiple group-bys, map-side aggregation is turned off, there are no filters - // and there is a single distinct, optimize that. Spray initially by the distinct key, - // no computation at the mapper. Have multiple group by operators at the reducer - and then + // If there are multiple group-bys, map-side aggregation is turned off, + // there are no filters + // and there is a single distinct, optimize that. Spray initially by the + // distinct key, + // no computation at the mapper. Have multiple group by operators at the + // reducer - and then // proceed if (optimizeMultiGroupBy) { curr = createCommonReduceSink(qb, input); @@ -4327,7 +4508,7 @@ RowResolver currRR = opParseCtx.get(curr).getRR(); // create a forward operator input = putOpInsertMap(OperatorFactory.getAndMakeChild(new forwardDesc(), - new RowSchema(currRR.getColumnInfos()), curr), currRR); + new RowSchema(currRR.getColumnInfos()), curr), currRR); for (String dest : ks) { curr = input; @@ -4340,86 +4521,94 @@ } curr = genFileSinkPlan(dest, qb, curr); } - } - else { - // Go over all the destination tables - for (String dest : ks) { - curr = input; + } else { + // Go over all the destination tables + for (String dest : ks) { + curr = input; - if (qbp.getWhrForClause(dest) != null) { - curr = genFilterPlan(dest, qb, curr); - } + if (qbp.getWhrForClause(dest) != null) { + curr = genFilterPlan(dest, qb, curr); + } - if (qbp.getAggregationExprsForClause(dest).size() != 0 - || getGroupByForClause(qbp, dest).size() > 0) - { - // insert a select operator here used by the ColumnPruner to reduce the data to shuffle - curr = insertSelectAllPlanForGroupBy(dest, curr); - if (conf.getVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE).equalsIgnoreCase("true")) { - if (conf.getVar(HiveConf.ConfVars.HIVEGROUPBYSKEW).equalsIgnoreCase("false")) - curr = genGroupByPlanMapAggr1MR(dest, qb, curr); - else - curr = genGroupByPlanMapAggr2MR(dest, qb, curr); + if (qbp.getAggregationExprsForClause(dest).size() != 0 + || getGroupByForClause(qbp, dest).size() > 0) { + // insert a select operator here used by the ColumnPruner to reduce + // the data to shuffle + curr = insertSelectAllPlanForGroupBy(dest, curr); + if (conf.getVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE) + .equalsIgnoreCase("true")) { + if (conf.getVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) + .equalsIgnoreCase("false")) { + curr = genGroupByPlanMapAggr1MR(dest, qb, curr); + } else { + curr = genGroupByPlanMapAggr2MR(dest, qb, curr); + } + } else if (conf.getVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) + .equalsIgnoreCase("true")) { + curr = genGroupByPlan2MR(dest, qb, curr); + } else { + curr = genGroupByPlan1MR(dest, qb, curr); + } } - else if (conf.getVar(HiveConf.ConfVars.HIVEGROUPBYSKEW).equalsIgnoreCase("true")) - curr = genGroupByPlan2MR(dest, qb, curr); - else - curr = genGroupByPlan1MR(dest, qb, curr); - } - curr = genSelectPlan(dest, qb, curr); - Integer limit = qbp.getDestLimit(dest); + curr = genSelectPlan(dest, qb, curr); + Integer limit = qbp.getDestLimit(dest); - if (qbp.getClusterByForClause(dest) != null - || qbp.getDistributeByForClause(dest) != null - || qbp.getOrderByForClause(dest) != null - || qbp.getSortByForClause(dest) != null) { + if (qbp.getClusterByForClause(dest) != null + || qbp.getDistributeByForClause(dest) != null + || qbp.getOrderByForClause(dest) != null + || qbp.getSortByForClause(dest) != null) { - int numReducers = -1; + int numReducers = -1; - // Use only 1 reducer if order by is present - if (qbp.getOrderByForClause(dest) != null) - numReducers = 1; + // Use only 1 reducer if order by is present + if (qbp.getOrderByForClause(dest) != null) { + numReducers = 1; + } - curr = genReduceSinkPlan(dest, qb, curr, numReducers); - } - - if (qbp.getIsSubQ()) { - if (limit != null) { - // In case of order by, only 1 reducer is used, so no need of another shuffle - curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), qbp.getOrderByForClause(dest) != null ? false : true); + curr = genReduceSinkPlan(dest, qb, curr, numReducers); } - } else { - curr = genConversionOps(dest, qb, curr); - // exact limit can be taken care of by the fetch operator - if (limit != null) { - boolean extraMRStep = true; - if (qb.getIsQuery() && - qbp.getClusterByForClause(dest) == null && - qbp.getSortByForClause(dest) == null) - extraMRStep = false; + if (qbp.getIsSubQ()) { + if (limit != null) { + // In case of order by, only 1 reducer is used, so no need of + // another shuffle + curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), qbp + .getOrderByForClause(dest) != null ? false : true); + } + } else { + curr = genConversionOps(dest, qb, curr); + // exact limit can be taken care of by the fetch operator + if (limit != null) { + boolean extraMRStep = true; - curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), extraMRStep); - qb.getParseInfo().setOuterQueryLimit(limit.intValue()); + if (qb.getIsQuery() && qbp.getClusterByForClause(dest) == null + && qbp.getSortByForClause(dest) == null) { + extraMRStep = false; + } + + curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), + extraMRStep); + qb.getParseInfo().setOuterQueryLimit(limit.intValue()); + } + curr = genFileSinkPlan(dest, qb, curr); } - curr = genFileSinkPlan(dest, qb, curr); - } - // change curr ops row resolver's tab aliases to query alias if it exists - if(qb.getParseInfo().getAlias() != null) { - RowResolver rr = opParseCtx.get(curr).getRR(); - RowResolver newRR = new RowResolver(); - String alias = qb.getParseInfo().getAlias(); - for(ColumnInfo colInfo: rr.getColumnInfos()) { - String name = colInfo.getInternalName(); - String [] tmp = rr.reverseLookup(name); - newRR.put(alias, tmp[1], colInfo); + // change curr ops row resolver's tab aliases to query alias if it + // exists + if (qb.getParseInfo().getAlias() != null) { + RowResolver rr = opParseCtx.get(curr).getRR(); + RowResolver newRR = new RowResolver(); + String alias = qb.getParseInfo().getAlias(); + for (ColumnInfo colInfo : rr.getColumnInfos()) { + String name = colInfo.getInternalName(); + String[] tmp = rr.reverseLookup(name); + newRR.put(alias, tmp[1], colInfo); + } + opParseCtx.get(curr).setRR(newRR); } - opParseCtx.get(curr).setRR(newRR); } } - } LOG.debug("Created Body Plan for Query Block " + qb.getId()); return curr; @@ -4430,41 +4619,48 @@ Operator leftOp, String rightalias, Operator rightOp) throws SemanticException { - // Currently, the unions are not merged - each union has only 2 parents. So, a n-way union will lead to (n-1) union operators. + // Currently, the unions are not merged - each union has only 2 parents. So, + // a n-way union will lead to (n-1) union operators. // This can be easily merged into 1 union RowResolver leftRR = opParseCtx.get(leftOp).getRR(); RowResolver rightRR = opParseCtx.get(rightOp).getRR(); HashMap leftmap = leftRR.getFieldMap(leftalias); HashMap rightmap = rightRR.getFieldMap(rightalias); // make sure the schemas of both sides are the same - if (leftmap.size() != rightmap.size()) + if (leftmap.size() != rightmap.size()) { throw new SemanticException("Schema of both sides of union should match."); - for (Map.Entry lEntry: leftmap.entrySet()) { + } + for (Map.Entry lEntry : leftmap.entrySet()) { String field = lEntry.getKey(); ColumnInfo lInfo = lEntry.getValue(); ColumnInfo rInfo = rightmap.get(field); if (rInfo == null) { - throw new SemanticException("Schema of both sides of union should match. " - + rightalias + " does not have the field " + field); + throw new SemanticException( + "Schema of both sides of union should match. " + rightalias + + " does not have the field " + field); } if (lInfo == null) { - throw new SemanticException("Schema of both sides of union should match. " - + leftalias + " does not have the field " + field); + throw new SemanticException( + "Schema of both sides of union should match. " + leftalias + + " does not have the field " + field); } if (!lInfo.getInternalName().equals(rInfo.getInternalName())) { - throw new SemanticException("Schema of both sides of union should match: " - + field + ":" + lInfo.getInternalName() + " " + rInfo.getInternalName()); + throw new SemanticException( + "Schema of both sides of union should match: " + field + ":" + + lInfo.getInternalName() + " " + rInfo.getInternalName()); } if (!lInfo.getType().getTypeName().equals(rInfo.getType().getTypeName())) { - throw new SemanticException("Schema of both sides of union should match: Column " - + field + " is of type " + lInfo.getType().getTypeName() + - " on first table and type " + rInfo.getType().getTypeName() + " on second table"); + throw new SemanticException( + "Schema of both sides of union should match: Column " + field + + " is of type " + lInfo.getType().getTypeName() + + " on first table and type " + rInfo.getType().getTypeName() + + " on second table"); } } // construct the forward operator RowResolver unionoutRR = new RowResolver(); - for (Map.Entry lEntry: leftmap.entrySet()) { + for (Map.Entry lEntry : leftmap.entrySet()) { String field = lEntry.getKey(); ColumnInfo lInfo = lEntry.getValue(); unionoutRR.put(unionalias, field, lInfo); @@ -4472,39 +4668,40 @@ // If one of the children is a union, merge with it // else create a new one - if ((leftOp instanceof UnionOperator) || (rightOp instanceof UnionOperator)) - { + if ((leftOp instanceof UnionOperator) || (rightOp instanceof UnionOperator)) { if (leftOp instanceof UnionOperator) { // make left a child of right List> child = new ArrayList>(); child.add(leftOp); rightOp.setChildOperators(child); - List> parent = leftOp.getParentOperators(); + List> parent = leftOp + .getParentOperators(); parent.add(rightOp); - unionDesc uDesc = ((UnionOperator)leftOp).getConf(); - uDesc.setNumInputs(uDesc.getNumInputs()+1); + unionDesc uDesc = ((UnionOperator) leftOp).getConf(); + uDesc.setNumInputs(uDesc.getNumInputs() + 1); return putOpInsertMap(leftOp, unionoutRR); - } - else { + } else { // make right a child of left List> child = new ArrayList>(); child.add(rightOp); leftOp.setChildOperators(child); - List> parent = rightOp.getParentOperators(); + List> parent = rightOp + .getParentOperators(); parent.add(leftOp); - unionDesc uDesc = ((UnionOperator)rightOp).getConf(); - uDesc.setNumInputs(uDesc.getNumInputs()+1); + unionDesc uDesc = ((UnionOperator) rightOp).getConf(); + uDesc.setNumInputs(uDesc.getNumInputs() + 1); return putOpInsertMap(rightOp, unionoutRR); } } // Create a new union operator - Operator unionforward = - OperatorFactory.getAndMakeChild(new unionDesc(), new RowSchema(unionoutRR.getColumnInfos())); + Operator unionforward = OperatorFactory + .getAndMakeChild(new unionDesc(), new RowSchema(unionoutRR + .getColumnInfos())); // set union operator as child of each of leftOp and rightOp List> child = new ArrayList>(); @@ -4525,76 +4722,86 @@ } /** - * Generates the sampling predicate from the TABLESAMPLE clause information. This function uses the - * bucket column list to decide the expression inputs to the predicate hash function in case useBucketCols - * is set to true, otherwise the expression list stored in the TableSample is used. The bucket columns of - * the table are used to generate this predicate in case no expressions are provided on the TABLESAMPLE - * clause and the table has clustering columns defined in it's metadata. - * The predicate created has the following structure: - * - * ((hash(expressions) & Integer.MAX_VALUE) % denominator) == numerator - * - * @param ts TABLESAMPLE clause information - * @param bucketCols The clustering columns of the table - * @param useBucketCols Flag to indicate whether the bucketCols should be used as input to the hash - * function - * @param alias The alias used for the table in the row resolver - * @param rwsch The row resolver used to resolve column references - * @param qbm The metadata information for the query block which is used to resolve unaliased columns - * @param planExpr The plan tree for the expression. If the user specified this, the parse expressions are not used + * Generates the sampling predicate from the TABLESAMPLE clause information. + * This function uses the bucket column list to decide the expression inputs + * to the predicate hash function in case useBucketCols is set to true, + * otherwise the expression list stored in the TableSample is used. The bucket + * columns of the table are used to generate this predicate in case no + * expressions are provided on the TABLESAMPLE clause and the table has + * clustering columns defined in it's metadata. The predicate created has the + * following structure: + * + * ((hash(expressions) & Integer.MAX_VALUE) % denominator) == numerator + * + * @param ts + * TABLESAMPLE clause information + * @param bucketCols + * The clustering columns of the table + * @param useBucketCols + * Flag to indicate whether the bucketCols should be used as input to + * the hash function + * @param alias + * The alias used for the table in the row resolver + * @param rwsch + * The row resolver used to resolve column references + * @param qbm + * The metadata information for the query block which is used to + * resolve unaliased columns + * @param planExpr + * The plan tree for the expression. If the user specified this, the + * parse expressions are not used * @return exprNodeDesc * @exception SemanticException */ - private exprNodeDesc genSamplePredicate(TableSample ts, List bucketCols, - boolean useBucketCols, String alias, - RowResolver rwsch, QBMetaData qbm, exprNodeDesc planExpr) - throws SemanticException { + private exprNodeDesc genSamplePredicate(TableSample ts, + List bucketCols, boolean useBucketCols, String alias, + RowResolver rwsch, QBMetaData qbm, exprNodeDesc planExpr) + throws SemanticException { exprNodeDesc numeratorExpr = new exprNodeConstantDesc( - TypeInfoFactory.intTypeInfo, - Integer.valueOf(ts.getNumerator() - 1)); + TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getNumerator() - 1)); exprNodeDesc denominatorExpr = new exprNodeConstantDesc( - TypeInfoFactory.intTypeInfo, - Integer.valueOf(ts.getDenominator())); + TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getDenominator())); exprNodeDesc intMaxExpr = new exprNodeConstantDesc( - TypeInfoFactory.intTypeInfo, - Integer.valueOf(Integer.MAX_VALUE)); + TypeInfoFactory.intTypeInfo, Integer.valueOf(Integer.MAX_VALUE)); ArrayList args = new ArrayList(); - if (planExpr != null) + if (planExpr != null) { args.add(planExpr); - else if (useBucketCols) { + } else if (useBucketCols) { for (String col : bucketCols) { ColumnInfo ci = rwsch.get(alias, col); // TODO: change type to the one in the table schema - args.add(new exprNodeColumnDesc(ci.getType(), ci.getInternalName(), - ci.getTabAlias(), ci.getIsPartitionCol())); + args.add(new exprNodeColumnDesc(ci.getType(), ci.getInternalName(), ci + .getTabAlias(), ci.getIsPartitionCol())); } - } - else { - for(ASTNode expr: ts.getExprs()) { + } else { + for (ASTNode expr : ts.getExprs()) { args.add(genExprNodeDesc(expr, rwsch)); } } exprNodeDesc equalsExpr = null; { - exprNodeDesc hashfnExpr = new exprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, - new GenericUDFHash(), args); - assert(hashfnExpr != null); + exprNodeDesc hashfnExpr = new exprNodeGenericFuncDesc( + TypeInfoFactory.intTypeInfo, new GenericUDFHash(), args); + assert (hashfnExpr != null); LOG.info("hashfnExpr = " + hashfnExpr); - exprNodeDesc andExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("&", hashfnExpr, intMaxExpr); - assert(andExpr != null); + exprNodeDesc andExpr = TypeCheckProcFactory.DefaultExprProcessor + .getFuncExprNodeDesc("&", hashfnExpr, intMaxExpr); + assert (andExpr != null); LOG.info("andExpr = " + andExpr); - exprNodeDesc modExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("%", andExpr, denominatorExpr); - assert(modExpr != null); + exprNodeDesc modExpr = TypeCheckProcFactory.DefaultExprProcessor + .getFuncExprNodeDesc("%", andExpr, denominatorExpr); + assert (modExpr != null); LOG.info("modExpr = " + modExpr); LOG.info("numeratorExpr = " + numeratorExpr); - equalsExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("==", modExpr, numeratorExpr); + equalsExpr = TypeCheckProcFactory.DefaultExprProcessor + .getFuncExprNodeDesc("==", modExpr, numeratorExpr); LOG.info("equalsExpr = " + equalsExpr); - assert(equalsExpr != null); + assert (equalsExpr != null); } return equalsExpr; } @@ -4607,44 +4814,49 @@ RowResolver rwsch; // is the table already present - Operator top = this.topOps.get(alias_id); - Operator dummySel = this.topSelOps.get(alias_id); - if (dummySel != null) + Operator top = topOps.get(alias_id); + Operator dummySel = topSelOps.get(alias_id); + if (dummySel != null) { top = dummySel; + } if (top == null) { rwsch = new RowResolver(); try { - StructObjectInspector rowObjectInspector = (StructObjectInspector)tab.getDeserializer().getObjectInspector(); - List fields = rowObjectInspector.getAllStructFieldRefs(); - for (int i=0; i fields = rowObjectInspector + .getAllStructFieldRefs(); + for (int i = 0; i < fields.size(); i++) { + rwsch.put(alias, fields.get(i).getFieldName(), new ColumnInfo(fields + .get(i).getFieldName(), TypeInfoUtils + .getTypeInfoFromObjectInspector(fields.get(i) + .getFieldObjectInspector()), alias, false)); } } catch (SerDeException e) { throw new RuntimeException(e); } // Hack!! - refactor once the metadata APIs with types are ready // Finally add the partitioning columns - for(FieldSchema part_col: tab.getPartCols()) { + for (FieldSchema part_col : tab.getPartCols()) { LOG.trace("Adding partition col: " + part_col); - // TODO: use the right type by calling part_col.getType() instead of String.class - rwsch.put(alias, part_col.getName(), - new ColumnInfo(part_col.getName(), TypeInfoFactory.stringTypeInfo, alias, true)); + // TODO: use the right type by calling part_col.getType() instead of + // String.class + rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(), + TypeInfoFactory.stringTypeInfo, alias, true)); } // Create the root of the operator tree - top = putOpInsertMap(OperatorFactory.get(new tableScanDesc(alias), new RowSchema(rwsch.getColumnInfos())), rwsch); + top = putOpInsertMap(OperatorFactory.get(new tableScanDesc(alias), + new RowSchema(rwsch.getColumnInfos())), rwsch); - // Add this to the list of top operators - we always start from a table scan - this.topOps.put(alias_id, top); + // Add this to the list of top operators - we always start from a table + // scan + topOps.put(alias_id, top); // Add a mapping from the table scan operator to Table - this.topToTable.put((TableScanOperator)top, tab); - } - else { + topToTable.put((TableScanOperator) top, tab); + } else { rwsch = opParseCtx.get(top).getRR(); top.setChildOperators(null); } @@ -4663,11 +4875,14 @@ // If there are no sample cols and no bucket cols then throw an error if (tabBucketCols.size() == 0 && sampleExprs.size() == 0) { - throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " " + tab.getName()); + throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " " + + tab.getName()); } if (num > den) { - throw new SemanticException(ErrorMsg.BUCKETED_NUMBERATOR_BIGGER_DENOMINATOR.getMsg() + " " + tab.getName()); + throw new SemanticException( + ErrorMsg.BUCKETED_NUMBERATOR_BIGGER_DENOMINATOR.getMsg() + " " + + tab.getName()); } // check if a predicate is needed @@ -4676,7 +4891,8 @@ // check if the sample columns are the same as the table bucket columns boolean colsEqual = true; - if ( (sampleExprs.size() != tabBucketCols.size()) && (sampleExprs.size() != 0) ) { + if ((sampleExprs.size() != tabBucketCols.size()) + && (sampleExprs.size() != 0)) { colsEqual = false; } @@ -4687,7 +4903,8 @@ break; } - if (((ASTNode)sampleExprs.get(i).getChild(0)).getText().equalsIgnoreCase(tabBucketCols.get(j))) { + if (((ASTNode) sampleExprs.get(i).getChild(0)).getText() + .equalsIgnoreCase(tabBucketCols.get(j))) { colFound = true; } } @@ -4695,41 +4912,45 @@ } // Check if input can be pruned - ts.setInputPruning((sampleExprs == null || sampleExprs.size() == 0 || colsEqual)); + ts + .setInputPruning((sampleExprs == null || sampleExprs.size() == 0 || colsEqual)); // check if input pruning is enough if ((sampleExprs == null || sampleExprs.size() == 0 || colsEqual) && (num == den || (den % numBuckets == 0 || numBuckets % den == 0))) { - // input pruning is enough; add the filter for the optimizer to use it later + // input pruning is enough; add the filter for the optimizer to use it + // later LOG.info("No need for sample filter"); - exprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null); - tableOp = OperatorFactory.getAndMakeChild( - new filterDesc(samplePredicate, true, new sampleDesc(ts.getNumerator(), ts.getDenominator(), tabBucketCols, true)), - top); - } - else { + exprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, + colsEqual, alias, rwsch, qb.getMetaData(), null); + tableOp = OperatorFactory.getAndMakeChild(new filterDesc( + samplePredicate, true, new sampleDesc(ts.getNumerator(), ts + .getDenominator(), tabBucketCols, true)), top); + } else { // need to add filter // create tableOp to be filterDesc and set as child to 'top' LOG.info("Need sample filter"); - exprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null); - tableOp = OperatorFactory.getAndMakeChild( - new filterDesc(samplePredicate, true), - top); + exprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, + colsEqual, alias, rwsch, qb.getMetaData(), null); + tableOp = OperatorFactory.getAndMakeChild(new filterDesc( + samplePredicate, true), top); } - } - else { + } else { boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE); if (testMode) { String tabName = tab.getName(); // has the user explicitly asked not to sample this table - String unSampleTblList = conf.getVar(HiveConf.ConfVars.HIVETESTMODENOSAMPLE); - String[] unSampleTbls = unSampleTblList.split(","); + String unSampleTblList = conf + .getVar(HiveConf.ConfVars.HIVETESTMODENOSAMPLE); + String[] unSampleTbls = unSampleTblList.split(","); boolean unsample = false; - for (String unSampleTbl : unSampleTbls) - if (tabName.equalsIgnoreCase(unSampleTbl)) + for (String unSampleTbl : unSampleTbls) { + if (tabName.equalsIgnoreCase(unSampleTbl)) { unsample = true; + } + } if (!unsample) { int numBuckets = tab.getNumBuckets(); @@ -4739,11 +4960,12 @@ TableSample tsSample = new TableSample(1, numBuckets); tsSample.setInputPruning(true); qb.getParseInfo().setTabSample(alias, tsSample); - exprNodeDesc samplePred = genSamplePredicate(tsSample, tab.getBucketCols(), true, alias, rwsch, qb.getMetaData(), null); - tableOp = OperatorFactory.getAndMakeChild( - new filterDesc(samplePred, true, - new sampleDesc(tsSample.getNumerator(), tsSample.getDenominator(), tab.getBucketCols(), true)), - top); + exprNodeDesc samplePred = genSamplePredicate(tsSample, tab + .getBucketCols(), true, alias, rwsch, qb.getMetaData(), null); + tableOp = OperatorFactory + .getAndMakeChild(new filterDesc(samplePred, true, + new sampleDesc(tsSample.getNumerator(), tsSample + .getDenominator(), tab.getBucketCols(), true)), top); LOG.info("No need for sample filter"); } // The table is not bucketed, add a dummy filter :: rand() @@ -4753,9 +4975,13 @@ tsSample.setInputPruning(false); qb.getParseInfo().setTabSample(alias, tsSample); LOG.info("Need sample filter"); - exprNodeDesc randFunc = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand", new exprNodeConstantDesc(Integer.valueOf(460476415))); - exprNodeDesc samplePred = genSamplePredicate(tsSample, null, false, alias, rwsch, qb.getMetaData(), randFunc); - tableOp = OperatorFactory.getAndMakeChild(new filterDesc(samplePred, true), top); + exprNodeDesc randFunc = TypeCheckProcFactory.DefaultExprProcessor + .getFuncExprNodeDesc("rand", new exprNodeConstantDesc(Integer + .valueOf(460476415))); + exprNodeDesc samplePred = genSamplePredicate(tsSample, null, false, + alias, rwsch, qb.getMetaData(), randFunc); + tableOp = OperatorFactory.getAndMakeChild(new filterDesc( + samplePred, true), top); } } } @@ -4818,14 +5044,15 @@ mergeJoinTree(qb); } - // if any filters are present in the join tree, push them on top of the table + // if any filters are present in the join tree, push them on top of the + // table pushJoinFilters(qb, qb.getQbJoinTree(), aliasToOpInfo); srcOpInfo = genJoinPlan(qb, aliasToOpInfo); - } - else + } else { // Now if there are more than 1 sources then we have a join case // later we can extend this to the union all case as well srcOpInfo = aliasToOpInfo.values().iterator().next(); + } Operator bodyOpInfo = genBodyPlan(qb, srcOpInfo); LOG.debug("Created Plan for Query Block " + qb.getId()); @@ -4837,17 +5064,18 @@ /** * Generates the operator DAG needed to implement lateral views and attaches * it to the TS operator. - * - * @param aliasToOpInfo A mapping from a table alias to the TS operator. This - * function replaces the operator mapping as necessary + * + * @param aliasToOpInfo + * A mapping from a table alias to the TS operator. This function + * replaces the operator mapping as necessary * @param qb * @throws SemanticException */ void genLateralViewPlans(HashMap aliasToOpInfo, QB qb) - throws SemanticException { - Map> aliasToLateralViews = - qb.getParseInfo().getAliasToLateralViews(); + throws SemanticException { + Map> aliasToLateralViews = qb.getParseInfo() + .getAliasToLateralViews(); for (Entry e : aliasToOpInfo.entrySet()) { String alias = e.getKey(); // See if the alias has a lateral view. If so, chain the lateral view @@ -4861,7 +5089,7 @@ // to the same LateralViewJoinOperator. // TS -> SelectOperator(*) -> LateralViewJoinOperator // TS -> SelectOperator (gets cols for UDTF) -> UDTFOperator0 - // -> LateralViewJoinOperator + // -> LateralViewJoinOperator // The order in which the two paths are added is important. The // lateral view join operator depends on having the select operator @@ -4869,19 +5097,16 @@ // Get the all path by making a select(*) RowResolver allPathRR = opParseCtx.get(op).getRR(); - Operator allPath = - putOpInsertMap(OperatorFactory.getAndMakeChild( - new selectDesc(true), - new RowSchema(allPathRR.getColumnInfos()), - op), allPathRR); + Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild( + new selectDesc(true), new RowSchema(allPathRR.getColumnInfos()), + op), allPathRR); // Get the UDTF Path QB blankQb = new QB(null, null, false); - Operator udtfPath = - genSelectPlan((ASTNode)lateralViewTree.getChild(0), blankQb, op); + Operator udtfPath = genSelectPlan((ASTNode) lateralViewTree + .getChild(0), blankQb, op); RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRR(); - // Merge the two into the lateral view join // The cols of the merged result will be the combination of both the // cols of the UDTF path and the cols of the all path. The internal @@ -4890,16 +5115,13 @@ RowResolver lateralViewRR = new RowResolver(); ArrayList outputInternalColNames = new ArrayList(); - LVmergeRowResolvers(allPathRR, lateralViewRR, - outputInternalColNames); - LVmergeRowResolvers(udtfPathRR, lateralViewRR, - outputInternalColNames); + LVmergeRowResolvers(allPathRR, lateralViewRR, outputInternalColNames); + LVmergeRowResolvers(udtfPathRR, lateralViewRR, outputInternalColNames); - Operator lateralViewJoin = - putOpInsertMap(OperatorFactory.getAndMakeChild( - new lateralViewJoinDesc(outputInternalColNames), - new RowSchema(lateralViewRR.getColumnInfos()), - allPath, udtfPath), lateralViewRR); + Operator lateralViewJoin = putOpInsertMap(OperatorFactory + .getAndMakeChild(new lateralViewJoinDesc(outputInternalColNames), + new RowSchema(lateralViewRR.getColumnInfos()), allPath, + udtfPath), lateralViewRR); op = lateralViewJoin; } e.setValue(op); @@ -4911,56 +5133,32 @@ * A helper function that gets all the columns and respective aliases in the * source and puts them into dest. It renames the internal names of the * columns based on getColumnInternalName(position). - * + * * Note that this helper method relies on RowResolver.getColumnInfos() * returning the columns in the same order as they will be passed in the * operator DAG. - * + * * @param source * @param dest - * @param outputColNames - a list to which the new internal column names will - * be added, in the same order as in the dest row - * resolver + * @param outputColNames + * - a list to which the new internal column names will be added, in + * the same order as in the dest row resolver */ private void LVmergeRowResolvers(RowResolver source, RowResolver dest, ArrayList outputInternalColNames) { Vector cols = source.getColumnInfos(); for (ColumnInfo c : cols) { - String internalName = - getColumnInternalName(outputInternalColNames.size()); + String internalName = getColumnInternalName(outputInternalColNames.size()); outputInternalColNames.add(internalName); - ColumnInfo newCol = new ColumnInfo(internalName, c.getType(), - c.getTabAlias(), c.getIsPartitionCol()); - String [] tableCol = source.reverseLookup(c.getInternalName()); + ColumnInfo newCol = new ColumnInfo(internalName, c.getType(), c + .getTabAlias(), c.getIsPartitionCol()); + String[] tableCol = source.reverseLookup(c.getInternalName()); String tableAlias = tableCol[0]; String colAlias = tableCol[1]; dest.put(tableAlias, colAlias, newCol); } } - private Operator getReduceSink(Operator top) { - if (top.getClass() == ReduceSinkOperator.class) { - // Get the operator following the reduce sink - assert (top.getChildOperators().size() == 1); - - return top; - } - - List> childOps = top.getChildOperators(); - if (childOps == null) { - return null; - } - - for (int i = 0; i < childOps.size(); ++i) { - Operator reducer = getReduceSink(childOps.get(i)); - if (reducer != null) { - return reducer; - } - } - - return null; - } - @SuppressWarnings("nls") private void genMapRedTasks(QB qb) throws SemanticException { fetchWork fetch = null; @@ -4970,40 +5168,46 @@ QBParseInfo qbParseInfo = qb.getParseInfo(); // Does this query need reduce job - if (qb.isSelectStarQuery() - && qbParseInfo.getDestToClusterBy().isEmpty() + if (qb.isSelectStarQuery() && qbParseInfo.getDestToClusterBy().isEmpty() && qbParseInfo.getDestToDistributeBy().isEmpty() && qbParseInfo.getDestToOrderBy().isEmpty() && qbParseInfo.getDestToSortBy().isEmpty()) { boolean noMapRed = false; - Iterator> iter = qb.getMetaData().getAliasToTable().entrySet().iterator(); - Table tab = ((Map.Entry)iter.next()).getValue(); + Iterator> iter = qb.getMetaData() + .getAliasToTable().entrySet().iterator(); + Table tab = (iter.next()).getValue(); if (!tab.isPartitioned()) { if (qbParseInfo.getDestToWhereExpr().isEmpty()) { - fetch = new fetchWork(tab.getPath().toString(), Utilities.getTableDesc(tab), qb.getParseInfo().getOuterQueryLimit()); + fetch = new fetchWork(tab.getPath().toString(), Utilities + .getTableDesc(tab), qb.getParseInfo().getOuterQueryLimit()); noMapRed = true; inputs.add(new ReadEntity(tab)); } - } - else { + } else { if (topOps.size() == 1) { - TableScanOperator ts = (TableScanOperator)topOps.values().toArray()[0]; + TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0]; // check if the pruner only contains partition columns - if (PartitionPruner.onlyContainsPartnCols(topToTable.get(ts), opToPartPruner.get(ts))) { + if (PartitionPruner.onlyContainsPartnCols(topToTable.get(ts), + opToPartPruner.get(ts))) { PrunedPartitionList partsList = null; try { - partsList = PartitionPruner.prune(topToTable.get(ts), opToPartPruner.get(ts), conf, (String)topOps.keySet().toArray()[0], prunedPartitions); + partsList = PartitionPruner.prune(topToTable.get(ts), + opToPartPruner.get(ts), conf, (String) topOps.keySet() + .toArray()[0], prunedPartitions); } catch (HiveException e) { - // Has to use full name to make sure it does not conflict with org.apache.commons.lang.StringUtils - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); + // Has to use full name to make sure it does not conflict with + // org.apache.commons.lang.StringUtils + LOG.error(org.apache.hadoop.util.StringUtils + .stringifyException(e)); throw new SemanticException(e.getMessage(), e); } - // If there is any unknown partition, create a map-reduce job for the filter to prune correctly + // If there is any unknown partition, create a map-reduce job for + // the filter to prune correctly if (partsList.getUnknownPartns().size() == 0) { List listP = new ArrayList(); List partP = new ArrayList(); @@ -5013,15 +5217,16 @@ while (iterParts.hasNext()) { Partition part = iterParts.next(); listP.add(part.getPartitionPath().toString()); - try{ - partP.add(Utilities.getPartitionDesc(part)); + try { + partP.add(Utilities.getPartitionDesc(part)); } catch (HiveException e) { - throw new SemanticException(e.getMessage(), e); + throw new SemanticException(e.getMessage(), e); } inputs.add(new ReadEntity(part)); } - fetch = new fetchWork(listP, partP, qb.getParseInfo().getOuterQueryLimit()); + fetch = new fetchWork(listP, partP, qb.getParseInfo() + .getOuterQueryLimit()); noMapRed = true; } } @@ -5029,7 +5234,7 @@ } if (noMapRed) { - fetchTask = TaskFactory.get(fetch, this.conf); + fetchTask = TaskFactory.get(fetch, conf); setFetchTask(fetchTask); // remove root tasks if any @@ -5040,40 +5245,43 @@ // In case of a select, use a fetch task instead of a move task if (qb.getIsQuery()) { - if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) + if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) { throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg()); + } String cols = loadFileWork.get(0).getColumns(); String colTypes = loadFileWork.get(0).getColumnTypes(); - fetch = new fetchWork(new Path(loadFileWork.get(0).getSourceDir()).toString(), - new tableDesc(LazySimpleSerDe.class, TextInputFormat.class, - IgnoreKeyTextOutputFormat.class, - Utilities.makeProperties( - org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, "" + Utilities.ctrlaCode, - org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS, cols, - org.apache.hadoop.hive.serde.Constants.LIST_COLUMN_TYPES, colTypes)), - qb.getParseInfo().getOuterQueryLimit()); + fetch = new fetchWork(new Path(loadFileWork.get(0).getSourceDir()) + .toString(), new tableDesc(LazySimpleSerDe.class, + TextInputFormat.class, IgnoreKeyTextOutputFormat.class, Utilities + .makeProperties( + org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, + "" + Utilities.ctrlaCode, + org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS, cols, + org.apache.hadoop.hive.serde.Constants.LIST_COLUMN_TYPES, + colTypes)), qb.getParseInfo().getOuterQueryLimit()); - fetchTask = TaskFactory.get(fetch, this.conf); + fetchTask = TaskFactory.get(fetch, conf); setFetchTask(fetchTask); } else { - // First we generate the move work as this needs to be made dependent on all - // the tasks that have a file sink operation - List mv = new ArrayList(); - for (loadTableDesc ltd : loadTableWork) - mvTask.add(TaskFactory.get(new moveWork(null, null, ltd, null, false), this.conf)); + new ArrayList(); + for (loadTableDesc ltd : loadTableWork) { + mvTask.add(TaskFactory.get(new moveWork(null, null, ltd, null, false), + conf)); + } boolean oneLoadFile = true; for (loadFileDesc lfd : loadFileWork) { - if ( qb.isCTAS() ) { - assert(oneLoadFile); // should not have more than 1 load file for CTAS + if (qb.isCTAS()) { + assert (oneLoadFile); // should not have more than 1 load file for + // CTAS // make the movetask's destination directory the table's destination. String location = qb.getTableDesc().getLocation(); - if ( location == null ) { + if (location == null) { // get the table's default location location = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE); - assert(location.length() > 0 ); - if ( location.charAt(location.length()-1) != '/' ) { + assert (location.length() > 0); + if (location.charAt(location.length() - 1) != '/') { location += '/'; } location += qb.getTableDesc().getTableName().toLowerCase(); @@ -5081,100 +5289,122 @@ lfd.setTargetDir(location); oneLoadFile = false; } - mvTask.add(TaskFactory.get(new moveWork(null, null, null, lfd, false), this.conf)); + mvTask.add(TaskFactory.get(new moveWork(null, null, null, lfd, false), + conf)); } } // generate map reduce plans - GenMRProcContext procCtx = - new GenMRProcContext( - conf, new HashMap, Task>(), - new ArrayList>(), - getParseContext(), mvTask, this.rootTasks, + GenMRProcContext procCtx = new GenMRProcContext( + conf, + new HashMap, Task>(), + new ArrayList>(), getParseContext(), + mvTask, rootTasks, new LinkedHashMap, GenMapRedCtx>(), inputs, outputs); - // create a walker which walks the tree in a DFS manner while maintaining the operator stack. + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. // The dispatcher generates the plan from the operator tree Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp(new String("R1"), "TS%"), new GenMRTableScan1()); - opRules.put(new RuleRegExp(new String("R2"), "TS%.*RS%"), new GenMRRedSink1()); - opRules.put(new RuleRegExp(new String("R3"), "RS%.*RS%"), new GenMRRedSink2()); + opRules.put(new RuleRegExp(new String("R2"), "TS%.*RS%"), + new GenMRRedSink1()); + opRules.put(new RuleRegExp(new String("R3"), "RS%.*RS%"), + new GenMRRedSink2()); opRules.put(new RuleRegExp(new String("R4"), "FS%"), new GenMRFileSink1()); opRules.put(new RuleRegExp(new String("R5"), "UNION%"), new GenMRUnion1()); - opRules.put(new RuleRegExp(new String("R6"), "UNION%.*RS%"), new GenMRRedSink3()); - opRules.put(new RuleRegExp(new String("R6"), "MAPJOIN%.*RS%"), new GenMRRedSink4()); - opRules.put(new RuleRegExp(new String("R7"), "TS%.*MAPJOIN%"), MapJoinFactory.getTableScanMapJoin()); - opRules.put(new RuleRegExp(new String("R8"), "RS%.*MAPJOIN%"), MapJoinFactory.getReduceSinkMapJoin()); - opRules.put(new RuleRegExp(new String("R9"), "UNION%.*MAPJOIN%"), MapJoinFactory.getUnionMapJoin()); - opRules.put(new RuleRegExp(new String("R10"), "MAPJOIN%.*MAPJOIN%"), MapJoinFactory.getMapJoinMapJoin()); - opRules.put(new RuleRegExp(new String("R11"), "MAPJOIN%SEL%"), MapJoinFactory.getMapJoin()); - - // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules, procCtx); + opRules.put(new RuleRegExp(new String("R6"), "UNION%.*RS%"), + new GenMRRedSink3()); + opRules.put(new RuleRegExp(new String("R6"), "MAPJOIN%.*RS%"), + new GenMRRedSink4()); + opRules.put(new RuleRegExp(new String("R7"), "TS%.*MAPJOIN%"), + MapJoinFactory.getTableScanMapJoin()); + opRules.put(new RuleRegExp(new String("R8"), "RS%.*MAPJOIN%"), + MapJoinFactory.getReduceSinkMapJoin()); + opRules.put(new RuleRegExp(new String("R9"), "UNION%.*MAPJOIN%"), + MapJoinFactory.getUnionMapJoin()); + opRules.put(new RuleRegExp(new String("R10"), "MAPJOIN%.*MAPJOIN%"), + MapJoinFactory.getMapJoinMapJoin()); + opRules.put(new RuleRegExp(new String("R11"), "MAPJOIN%SEL%"), + MapJoinFactory.getMapJoin()); + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules, + procCtx); + GraphWalker ogw = new GenMapRedWalker(disp); ArrayList topNodes = new ArrayList(); - topNodes.addAll(this.topOps.values()); + topNodes.addAll(topOps.values()); ogw.startWalking(topNodes, null); - // reduce sink does not have any kids - since the plan by now has been broken up into multiple + // reduce sink does not have any kids - since the plan by now has been + // broken up into multiple // tasks, iterate over all tasks. // For each task, go over all operators recursively - for (Task rootTask: rootTasks) + for (Task rootTask : rootTasks) { breakTaskTree(rootTask); - + } + // For each task, set the key descriptor for the reducer - for (Task rootTask: rootTasks) + for (Task rootTask : rootTasks) { setKeyDescTaskTree(rootTask); - - PhysicalContext physicalContext = new PhysicalContext(conf, getParseContext(), ctx, rootTasks, fetchTask); - PhysicalOptimizer physicalOptimizer = new PhysicalOptimizer(physicalContext, conf); + } + + PhysicalContext physicalContext = new PhysicalContext(conf, + getParseContext(), ctx, rootTasks, fetchTask); + PhysicalOptimizer physicalOptimizer = new PhysicalOptimizer( + physicalContext, conf); physicalOptimizer.optimize(); // For each operator, generate the counters if needed - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEJOBPROGRESS)) - for (Task rootTask: rootTasks) + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEJOBPROGRESS)) { + for (Task rootTask : rootTasks) { generateCountersTask(rootTask); + } + } - if ( qb.isCTAS() ) { + if (qb.isCTAS()) { // generate a DDL task and make it a dependent task of the leaf createTableDesc crtTblDesc = qb.getTableDesc(); validateCreateTable(crtTblDesc); - // Clear the output for CTAS since we don't need the output from the mapredWork, the + // Clear the output for CTAS since we don't need the output from the + // mapredWork, the // DDLWork at the tail of the chain will have the output getOutputs().clear(); - Task crtTblTask = - TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblDesc), this.conf); + Task crtTblTask = TaskFactory.get(new DDLWork( + getInputs(), getOutputs(), crtTblDesc), conf); - // find all leaf tasks and make the DDLTask as a dependent task of all of them + // find all leaf tasks and make the DDLTask as a dependent task of all of + // them HashSet> leaves = new HashSet>(); getLeafTasks(rootTasks, leaves); - assert(leaves.size() > 0); - for ( Task task: leaves ) { + assert (leaves.size() > 0); + for (Task task : leaves) { task.addDependentTask(crtTblTask); } } } - + /** * Find all leaf tasks of the list of root tasks. */ - private void getLeafTasks( List> rootTasks, - HashSet> leaves) { + private void getLeafTasks(List> rootTasks, + HashSet> leaves) { - for ( Task root : rootTasks ) { + for (Task root : rootTasks) { getLeafTasks(root, leaves); } } - private void getLeafTasks( Task task, - HashSet> leaves) { - if ( task.getChildTasks() == null ) { - if ( ! leaves.contains(task) ) { + private void getLeafTasks(Task task, + HashSet> leaves) { + if (task.getChildTasks() == null) { + if (!leaves.contains(task)) { leaves.add(task); } } else { @@ -5182,106 +5412,126 @@ } } - // loop over all the tasks recursviely private void generateCountersTask(Task task) { if ((task instanceof MapRedTask) || (task instanceof ExecDriver)) { - HashMap> opMap = ((mapredWork)task.getWork()).getAliasToWork(); + HashMap> opMap = ((mapredWork) task + .getWork()).getAliasToWork(); if (!opMap.isEmpty()) { - for (Operator op: opMap.values()) { + for (Operator op : opMap.values()) { generateCountersOperator(op); } } - Operator reducer = ((mapredWork)task.getWork()).getReducer(); + Operator reducer = ((mapredWork) task.getWork()) + .getReducer(); if (reducer != null) { LOG.info("Generating counters for operator " + reducer); generateCountersOperator(reducer); } - } - else if (task instanceof ConditionalTask) { - List> listTasks = ((ConditionalTask)task).getListTasks(); - for (Task tsk : listTasks) + } else if (task instanceof ConditionalTask) { + List> listTasks = ((ConditionalTask) task) + .getListTasks(); + for (Task tsk : listTasks) { generateCountersTask(tsk); + } } // Start the counters from scratch - a hack for hadoop 17. Operator.resetLastEnumUsed(); - if (task.getChildTasks() == null) + if (task.getChildTasks() == null) { return; + } - for (Task childTask : task.getChildTasks()) + for (Task childTask : task.getChildTasks()) { generateCountersTask(childTask); + } } private void generateCountersOperator(Operator op) { op.assignCounterNameToEnum(); - if (op.getChildOperators() == null) + if (op.getChildOperators() == null) { return; + } - for (Operator child: op.getChildOperators()) + for (Operator child : op.getChildOperators()) { generateCountersOperator(child); + } } - + // loop over all the tasks recursviely private void breakTaskTree(Task task) { if ((task instanceof MapRedTask) || (task instanceof ExecDriver)) { - HashMap> opMap = ((mapredWork)task.getWork()).getAliasToWork(); - if (!opMap.isEmpty()) - for (Operator op: opMap.values()) { + HashMap> opMap = ((mapredWork) task + .getWork()).getAliasToWork(); + if (!opMap.isEmpty()) { + for (Operator op : opMap.values()) { breakOperatorTree(op); } - } - else if (task instanceof ConditionalTask) { - List> listTasks = ((ConditionalTask)task).getListTasks(); - for (Task tsk : listTasks) + } + } else if (task instanceof ConditionalTask) { + List> listTasks = ((ConditionalTask) task) + .getListTasks(); + for (Task tsk : listTasks) { breakTaskTree(tsk); + } } - if (task.getChildTasks() == null) + if (task.getChildTasks() == null) { return; + } - for (Task childTask : task.getChildTasks()) + for (Task childTask : task.getChildTasks()) { breakTaskTree(childTask); + } } // loop over all the operators recursviely private void breakOperatorTree(Operator topOp) { - if (topOp instanceof ReduceSinkOperator) + if (topOp instanceof ReduceSinkOperator) { topOp.setChildOperators(null); + } - if (topOp.getChildOperators() == null) + if (topOp.getChildOperators() == null) { return; + } - for (Operator op: topOp.getChildOperators()) + for (Operator op : topOp.getChildOperators()) { breakOperatorTree(op); + } } // loop over all the tasks recursviely private void setKeyDescTaskTree(Task task) { if ((task instanceof MapRedTask) || (task instanceof ExecDriver)) { - mapredWork work = (mapredWork)task.getWork(); + mapredWork work = (mapredWork) task.getWork(); work.deriveExplainAttributes(); - HashMap> opMap = work.getAliasToWork(); - if (!opMap.isEmpty()) - for (Operator op: opMap.values()) + HashMap> opMap = work + .getAliasToWork(); + if (!opMap.isEmpty()) { + for (Operator op : opMap.values()) { GenMapRedUtils.setKeyAndValueDesc(work, op); - } - else if (task instanceof ConditionalTask) { - List> listTasks = ((ConditionalTask)task).getListTasks(); - for (Task tsk : listTasks) + } + } + } else if (task instanceof ConditionalTask) { + List> listTasks = ((ConditionalTask) task) + .getListTasks(); + for (Task tsk : listTasks) { setKeyDescTaskTree(tsk); + } } - if (task.getChildTasks() == null) + if (task.getChildTasks() == null) { return; + } - for (Task childTask : task.getChildTasks()) + for (Task childTask : task.getChildTasks()) { setKeyDescTaskTree(childTask); + } } @SuppressWarnings("nls") @@ -5309,8 +5559,9 @@ // analyze create table command if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) { // if it is not CTAS, we don't need to go further and just return - if ( (child = analyzeCreateTable(ast, qb)) == null ) - return; + if ((child = analyzeCreateTable(ast, qb)) == null) { + return; + } } // analyze create view command @@ -5335,21 +5586,18 @@ saveViewDefinition(sinkOp); // Since we're only creating a view (not executing it), we // don't need to optimize or translate the plan (and in fact, those - // procedures can interfere with the view creation). So + // procedures can interfere with the view creation). So // skip the rest of this method. ctx.setResDir(null); ctx.setResFile(null); return; } - ParseContext pCtx = new ParseContext(conf, qb, child, opToPartPruner, topOps, - topSelOps, opParseCtx, joinContext, topToTable, - loadTableWork, loadFileWork, - ctx, idToTableNameMap, destTableId, uCtx, - listMapJoinOpsNoReducer, - groupOpToInputTables, - prunedPartitions, - opToSamplePruner); + ParseContext pCtx = new ParseContext(conf, qb, child, opToPartPruner, + topOps, topSelOps, opParseCtx, joinContext, topToTable, loadTableWork, + loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, + listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, + opToSamplePruner); Optimizer optm = new Optimizer(); optm.setPctx(pCtx); @@ -5367,13 +5615,12 @@ return; } - private void saveViewDefinition(Operator sinkOp) - throws SemanticException { + private void saveViewDefinition(Operator sinkOp) throws SemanticException { // Save the view schema derived from the sink operator produced // by genPlan. - List derivedSchema = - convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRR()); + List derivedSchema = convertRowSchemaToViewSchema(opParseCtx + .get(sinkOp).getRR()); validateColumnNameUniqueness(derivedSchema); List imposedSchema = createVwDesc.getSchema(); @@ -5381,15 +5628,14 @@ int explicitColCount = imposedSchema.size(); int derivedColCount = derivedSchema.size(); if (explicitColCount != derivedColCount) { - throw new SemanticException( - ErrorMsg.VIEW_COL_MISMATCH.getMsg(viewSelect)); + throw new SemanticException(ErrorMsg.VIEW_COL_MISMATCH + .getMsg(viewSelect)); } } // Preserve the original view definition as specified by the user. String originalText = ctx.getTokenRewriteStream().toString( - viewSelect.getTokenStartIndex(), - viewSelect.getTokenStopIndex()); + viewSelect.getTokenStartIndex(), viewSelect.getTokenStopIndex()); createVwDesc.setViewOriginalText(originalText); // Now expand the view definition with extras such as explicit column @@ -5397,8 +5643,7 @@ // referenced later. unparseTranslator.applyTranslation(ctx.getTokenRewriteStream()); String expandedText = ctx.getTokenRewriteStream().toString( - viewSelect.getTokenStartIndex(), - viewSelect.getTokenStopIndex()); + viewSelect.getTokenStartIndex(), viewSelect.getTokenStopIndex()); if (imposedSchema != null) { // Merge the names from the imposed schema into the types @@ -5428,7 +5673,7 @@ sb.append(HiveUtils.unparseIdentifier(createVwDesc.getViewName())); expandedText = sb.toString(); } - + createVwDesc.setSchema(derivedSchema); createVwDesc.setViewExpandedText(expandedText); } @@ -5437,59 +5682,71 @@ List fieldSchemas = new ArrayList(); for (ColumnInfo colInfo : rr.getColumnInfos()) { String colName = rr.reverseLookup(colInfo.getInternalName())[1]; - fieldSchemas.add( - new FieldSchema(colName, colInfo.getType().getTypeName(), null)); + fieldSchemas.add(new FieldSchema(colName, + colInfo.getType().getTypeName(), null)); } return fieldSchemas; } /** - * Generates an expression node descriptor for the expression passed in the arguments. This - * function uses the row resolver and the metadata information that are passed as arguments - * to resolve the column names to internal names. - * @param expr The expression - * @param input The row resolver + * Generates an expression node descriptor for the expression passed in the + * arguments. This function uses the row resolver and the metadata information + * that are passed as arguments to resolve the column names to internal names. + * + * @param expr + * The expression + * @param input + * The row resolver * @return exprNodeDesc * @throws SemanticException */ @SuppressWarnings("nls") public exprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input) - throws SemanticException { - // We recursively create the exprNodeDesc. Base cases: when we encounter - // a column ref, we convert that into an exprNodeColumnDesc; when we encounter - // a constant, we convert that into an exprNodeConstantDesc. For others we just - // build the exprNodeFuncDesc with recursively built children. + throws SemanticException { + // We recursively create the exprNodeDesc. Base cases: when we encounter + // a column ref, we convert that into an exprNodeColumnDesc; when we + // encounter + // a constant, we convert that into an exprNodeConstantDesc. For others we + // just + // build the exprNodeFuncDesc with recursively built children. - // If the current subExpression is pre-calculated, as in Group-By etc. + // If the current subExpression is pre-calculated, as in Group-By etc. ColumnInfo colInfo = input.get("", expr.toStringTree()); if (colInfo != null) { - return new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), - colInfo.getTabAlias(), colInfo.getIsPartitionCol()); + return new exprNodeColumnDesc(colInfo.getType(), colInfo + .getInternalName(), colInfo.getTabAlias(), colInfo + .getIsPartitionCol()); } // Create the walker, the rules dispatcher and the context. TypeCheckCtx tcCtx = new TypeCheckCtx(input); tcCtx.setUnparseTranslator(unparseTranslator); - // create a walker which walks the tree in a DFS manner while maintaining the operator stack. The dispatcher + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. The dispatcher // generates the plan from the operator tree Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", HiveParser.TOK_NULL + "%"), TypeCheckProcFactory.getNullExprProcessor()); - opRules.put(new RuleRegExp("R2", HiveParser.Number + "%"), TypeCheckProcFactory.getNumExprProcessor()); - opRules.put(new RuleRegExp("R3", HiveParser.Identifier + "%|" + - HiveParser.StringLiteral + "%|" + - HiveParser.TOK_CHARSETLITERAL + "%|" + - HiveParser.KW_IF + "%|" + - HiveParser.KW_CASE + "%|" + - HiveParser.KW_WHEN + "%"), - TypeCheckProcFactory.getStrExprProcessor()); - opRules.put(new RuleRegExp("R4", HiveParser.KW_TRUE + "%|" + HiveParser.KW_FALSE + "%"), - TypeCheckProcFactory.getBoolExprProcessor()); - opRules.put(new RuleRegExp("R5", HiveParser.TOK_TABLE_OR_COL + "%"), TypeCheckProcFactory.getColumnExprProcessor()); + opRules.put(new RuleRegExp("R1", HiveParser.TOK_NULL + "%"), + TypeCheckProcFactory.getNullExprProcessor()); + opRules.put(new RuleRegExp("R2", HiveParser.Number + "%"), + TypeCheckProcFactory.getNumExprProcessor()); + opRules + .put(new RuleRegExp("R3", HiveParser.Identifier + "%|" + + HiveParser.StringLiteral + "%|" + HiveParser.TOK_CHARSETLITERAL + + "%|" + HiveParser.KW_IF + "%|" + HiveParser.KW_CASE + "%|" + + HiveParser.KW_WHEN + "%"), TypeCheckProcFactory + .getStrExprProcessor()); + opRules.put(new RuleRegExp("R4", HiveParser.KW_TRUE + "%|" + + HiveParser.KW_FALSE + "%"), TypeCheckProcFactory + .getBoolExprProcessor()); + opRules.put(new RuleRegExp("R5", HiveParser.TOK_TABLE_OR_COL + "%"), + TypeCheckProcFactory.getColumnExprProcessor()); - // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(TypeCheckProcFactory.getDefaultExprProcessor(), opRules, tcCtx); + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(TypeCheckProcFactory + .getDefaultExprProcessor(), opRules, tcCtx); GraphWalker ogw = new DefaultGraphWalker(disp); // Create a list of topop nodes @@ -5497,7 +5754,7 @@ topNodes.add(expr); HashMap nodeOutputs = new HashMap(); ogw.startWalking(topNodes, nodeOutputs); - exprNodeDesc desc = (exprNodeDesc)nodeOutputs.get(expr); + exprNodeDesc desc = (exprNodeDesc) nodeOutputs.get(expr); if (desc == null) { throw new SemanticException(tcCtx.getError()); } @@ -5517,78 +5774,86 @@ ASTNode node = (ASTNode) entry.getKey(); exprNodeColumnDesc columnDesc = (exprNodeColumnDesc) entry.getValue(); if ((columnDesc.getTabAlias() == null) - || (columnDesc.getTabAlias().length() == 0)) { + || (columnDesc.getTabAlias().length() == 0)) { // These aren't real column refs; instead, they are special // internal expressions used in the representation of aggregation. continue; } - String [] tmp = input.reverseLookup(columnDesc.getColumn()); + String[] tmp = input.reverseLookup(columnDesc.getColumn()); StringBuilder replacementText = new StringBuilder(); replacementText.append(HiveUtils.unparseIdentifier(tmp[0])); replacementText.append("."); replacementText.append(HiveUtils.unparseIdentifier(tmp[1])); unparseTranslator.addTranslation(node, replacementText.toString()); } - + return desc; } /** - * Gets the table Alias for the column from the column name. This function throws - * and exception in case the same column name is present in multiple table. The exception - * message indicates that the ambiguity could not be resolved. - * - * @param qbm The metadata where the function looks for the table alias - * @param colName The name of the non aliased column - * @param pt The parse tree corresponding to the column(this is used for error reporting) + * Gets the table Alias for the column from the column name. This function + * throws and exception in case the same column name is present in multiple + * table. The exception message indicates that the ambiguity could not be + * resolved. + * + * @param qbm + * The metadata where the function looks for the table alias + * @param colName + * The name of the non aliased column + * @param pt + * The parse tree corresponding to the column(this is used for error + * reporting) * @return String * @throws SemanticException */ static String getTabAliasForCol(QBMetaData qbm, String colName, ASTNode pt) - throws SemanticException { + throws SemanticException { String tabAlias = null; boolean found = false; - for(Map.Entry ent: qbm.getAliasToTable().entrySet()) { - for(FieldSchema field: ent.getValue().getAllCols()) { + for (Map.Entry ent : qbm.getAliasToTable().entrySet()) { + for (FieldSchema field : ent.getValue().getAllCols()) { if (colName.equalsIgnoreCase(field.getName())) { - if (found) { - throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(pt)); - } + if (found) { + throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(pt)); + } - found = true; - tabAlias = ent.getKey(); + found = true; + tabAlias = ent.getKey(); } } } return tabAlias; } - + @Override public void validate() throws SemanticException { // Check if the plan contains atleast one path. // validate all tasks - for(Task rootTask: rootTasks) + for (Task rootTask : rootTasks) { validate(rootTask); + } } - private void validate(Task task) throws SemanticException { + private void validate(Task task) + throws SemanticException { if ((task instanceof MapRedTask) || (task instanceof ExecDriver)) { - mapredWork work = (mapredWork)task.getWork(); + task.getWork(); // If the plan does not contain any path, an empty file // will be added by ExecDriver at execute time } - if (task.getChildTasks() == null) + if (task.getChildTasks() == null) { return; + } - for (Task childTask : task.getChildTasks()) + for (Task childTask : task.getChildTasks()) { validate(childTask); + } } - /** * Get the row resolver given an operator. */ @@ -5597,190 +5862,201 @@ } /** - * Analyze the create table command. If it is a regular create-table or create-table-like - * statements, we create a DDLWork and return true. If it is a create-table-as-select, we get the - * necessary info such as the SerDe and Storage Format and put it in QB, and return false, indicating - * the rest of the semantic analyzer need to deal with the select statement with respect to the - * SerDe and Storage Format. + * Analyze the create table command. If it is a regular create-table or + * create-table-like statements, we create a DDLWork and return true. If it is + * a create-table-as-select, we get the necessary info such as the SerDe and + * Storage Format and put it in QB, and return false, indicating the rest of + * the semantic analyzer need to deal with the select statement with respect + * to the SerDe and Storage Format. */ private ASTNode analyzeCreateTable(ASTNode ast, QB qb) - throws SemanticException { - String tableName = unescapeIdentifier(ast.getChild(0).getText()); - String likeTableName = null; - List cols = null; - List partCols = null; - List bucketCols = null; - List sortCols = null; - int numBuckets = -1; - String fieldDelim = null; - String fieldEscape = null; - String collItemDelim = null; - String mapKeyDelim = null; - String lineDelim = null; - String comment = null; - String inputFormat = TEXTFILE_INPUT; - String outputFormat = TEXTFILE_OUTPUT; - String location = null; - String serde = null; - Map mapProp = null; - boolean ifNotExists = false; - boolean isExt = false; - ASTNode selectStmt = null; - final int CREATE_TABLE = 0; // regular CREATE TABLE - final int CTLT = 1; // CREATE TABLE LIKE ... (CTLT) - final int CTAS = 2; // CREATE TABLE AS SELECT ... (CTAS) - int command_type = CREATE_TABLE; + throws SemanticException { + String tableName = unescapeIdentifier(ast.getChild(0).getText()); + String likeTableName = null; + List cols = null; + List partCols = null; + List bucketCols = null; + List sortCols = null; + int numBuckets = -1; + String fieldDelim = null; + String fieldEscape = null; + String collItemDelim = null; + String mapKeyDelim = null; + String lineDelim = null; + String comment = null; + String inputFormat = TEXTFILE_INPUT; + String outputFormat = TEXTFILE_OUTPUT; + String location = null; + String serde = null; + Map mapProp = null; + boolean ifNotExists = false; + boolean isExt = false; + ASTNode selectStmt = null; + final int CREATE_TABLE = 0; // regular CREATE TABLE + final int CTLT = 1; // CREATE TABLE LIKE ... (CTLT) + final int CTAS = 2; // CREATE TABLE AS SELECT ... (CTAS) + int command_type = CREATE_TABLE; - if ("SequenceFile".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { + if ("SequenceFile".equalsIgnoreCase(conf + .getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { inputFormat = SEQUENCEFILE_INPUT; outputFormat = SEQUENCEFILE_OUTPUT; - } else if ("RCFile".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { + } else if ("RCFile".equalsIgnoreCase(conf + .getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { inputFormat = RCFILE_INPUT; outputFormat = RCFILE_OUTPUT; serde = COLUMNAR_SERDE; } - LOG.info("Creating table " + tableName + " position=" + ast.getCharPositionInLine()); + LOG.info("Creating table " + tableName + " position=" + + ast.getCharPositionInLine()); int numCh = ast.getChildCount(); - /* Check the 1st-level children and do simple semantic checks: - * 1) CTLT and CTAS should not coexists. - * 2) CTLT or CTAS should not coexists with column list (target table schema). - * 3) CTAS does not support partitioning (for now). + /* + * Check the 1st-level children and do simple semantic checks: 1) CTLT and + * CTAS should not coexists. 2) CTLT or CTAS should not coexists with column + * list (target table schema). 3) CTAS does not support partitioning (for + * now). */ - for (int num = 1; num < numCh; num++) - { - ASTNode child = (ASTNode)ast.getChild(num); + for (int num = 1; num < numCh; num++) { + ASTNode child = (ASTNode) ast.getChild(num); switch (child.getToken().getType()) { - case HiveParser.TOK_IFNOTEXISTS: - ifNotExists = true; - break; - case HiveParser.KW_EXTERNAL: - isExt = true; - break; - case HiveParser.TOK_LIKETABLE: - if (child.getChildCount() > 0) { - likeTableName = unescapeIdentifier(child.getChild(0).getText()); - if ( likeTableName != null ) { - if ( command_type == CTAS ) { - throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg()); - } - if ( cols != null ) { - throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE.getMsg()); - } + case HiveParser.TOK_IFNOTEXISTS: + ifNotExists = true; + break; + case HiveParser.KW_EXTERNAL: + isExt = true; + break; + case HiveParser.TOK_LIKETABLE: + if (child.getChildCount() > 0) { + likeTableName = unescapeIdentifier(child.getChild(0).getText()); + if (likeTableName != null) { + if (command_type == CTAS) { + throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE + .getMsg()); } - command_type = CTLT; + if (cols != null) { + throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE + .getMsg()); + } } - break; - case HiveParser.TOK_QUERY: // CTAS - if ( command_type == CTLT ) { - throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg()); - } - if ( cols != null ) { - throw new SemanticException(ErrorMsg.CTAS_COLLST_COEXISTENCE.getMsg()); - } - // TODO: support partition for CTAS? - if ( partCols != null || bucketCols != null ) { - throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg()); - } - if ( isExt ) { - throw new SemanticException(ErrorMsg.CTAS_EXTTBL_COEXISTENCE.getMsg()); - } - command_type = CTAS; - selectStmt = child; - break; - case HiveParser.TOK_TABCOLLIST: - cols = getColumns(child); - break; - case HiveParser.TOK_TABLECOMMENT: - comment = unescapeSQLString(child.getChild(0).getText()); - break; - case HiveParser.TOK_TABLEPARTCOLS: - partCols = getColumns((ASTNode)child.getChild(0)); - break; - case HiveParser.TOK_TABLEBUCKETS: - bucketCols = getColumnNames((ASTNode)child.getChild(0)); - if (child.getChildCount() == 2) - numBuckets = (Integer.valueOf(child.getChild(1).getText())).intValue(); - else - { - sortCols = getColumnNamesOrder((ASTNode)child.getChild(1)); - numBuckets = (Integer.valueOf(child.getChild(2).getText())).intValue(); - } - break; - case HiveParser.TOK_TABLEROWFORMAT: + command_type = CTLT; + } + break; + case HiveParser.TOK_QUERY: // CTAS + if (command_type == CTLT) { + throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg()); + } + if (cols != null) { + throw new SemanticException(ErrorMsg.CTAS_COLLST_COEXISTENCE.getMsg()); + } + // TODO: support partition for CTAS? + if (partCols != null || bucketCols != null) { + throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg()); + } + if (isExt) { + throw new SemanticException(ErrorMsg.CTAS_EXTTBL_COEXISTENCE.getMsg()); + } + command_type = CTAS; + selectStmt = child; + break; + case HiveParser.TOK_TABCOLLIST: + cols = getColumns(child); + break; + case HiveParser.TOK_TABLECOMMENT: + comment = unescapeSQLString(child.getChild(0).getText()); + break; + case HiveParser.TOK_TABLEPARTCOLS: + partCols = getColumns((ASTNode) child.getChild(0)); + break; + case HiveParser.TOK_TABLEBUCKETS: + bucketCols = getColumnNames((ASTNode) child.getChild(0)); + if (child.getChildCount() == 2) { + numBuckets = (Integer.valueOf(child.getChild(1).getText())) + .intValue(); + } else { + sortCols = getColumnNamesOrder((ASTNode) child.getChild(1)); + numBuckets = (Integer.valueOf(child.getChild(2).getText())) + .intValue(); + } + break; + case HiveParser.TOK_TABLEROWFORMAT: - child = (ASTNode)child.getChild(0); - int numChildRowFormat = child.getChildCount(); - for (int numC = 0; numC < numChildRowFormat; numC++) - { - ASTNode rowChild = (ASTNode)child.getChild(numC); - switch (rowChild.getToken().getType()) { - case HiveParser.TOK_TABLEROWFORMATFIELD: - fieldDelim = unescapeSQLString(rowChild.getChild(0).getText()); - if (rowChild.getChildCount()>=2) { - fieldEscape = unescapeSQLString(rowChild.getChild(1).getText()); - } - break; - case HiveParser.TOK_TABLEROWFORMATCOLLITEMS: - collItemDelim = unescapeSQLString(rowChild.getChild(0).getText()); - break; - case HiveParser.TOK_TABLEROWFORMATMAPKEYS: - mapKeyDelim = unescapeSQLString(rowChild.getChild(0).getText()); - break; - case HiveParser.TOK_TABLEROWFORMATLINES: - lineDelim = unescapeSQLString(rowChild.getChild(0).getText()); - if (!lineDelim.equals("\n") && !lineDelim.equals("10")) { - throw new SemanticException(ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg()); - } - break; - default: assert false; + child = (ASTNode) child.getChild(0); + int numChildRowFormat = child.getChildCount(); + for (int numC = 0; numC < numChildRowFormat; numC++) { + ASTNode rowChild = (ASTNode) child.getChild(numC); + switch (rowChild.getToken().getType()) { + case HiveParser.TOK_TABLEROWFORMATFIELD: + fieldDelim = unescapeSQLString(rowChild.getChild(0).getText()); + if (rowChild.getChildCount() >= 2) { + fieldEscape = unescapeSQLString(rowChild.getChild(1).getText()); } + break; + case HiveParser.TOK_TABLEROWFORMATCOLLITEMS: + collItemDelim = unescapeSQLString(rowChild.getChild(0).getText()); + break; + case HiveParser.TOK_TABLEROWFORMATMAPKEYS: + mapKeyDelim = unescapeSQLString(rowChild.getChild(0).getText()); + break; + case HiveParser.TOK_TABLEROWFORMATLINES: + lineDelim = unescapeSQLString(rowChild.getChild(0).getText()); + if (!lineDelim.equals("\n") && !lineDelim.equals("10")) { + throw new SemanticException( + ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg()); + } + break; + default: + assert false; } - break; - case HiveParser.TOK_TABLESERIALIZER: + } + break; + case HiveParser.TOK_TABLESERIALIZER: - child = (ASTNode)child.getChild(0); - serde = unescapeSQLString(child.getChild(0).getText()); - if (child.getChildCount() == 2) { - mapProp = new HashMap(); - ASTNode prop = (ASTNode)((ASTNode)child.getChild(1)).getChild(0); - for (int propChild = 0; propChild < prop.getChildCount(); propChild++) { - String key = unescapeSQLString(prop.getChild(propChild).getChild(0).getText()); - String value = unescapeSQLString(prop.getChild(propChild).getChild(1).getText()); - mapProp.put(key,value); - } + child = (ASTNode) child.getChild(0); + serde = unescapeSQLString(child.getChild(0).getText()); + if (child.getChildCount() == 2) { + mapProp = new HashMap(); + ASTNode prop = (ASTNode) ((ASTNode) child.getChild(1)).getChild(0); + for (int propChild = 0; propChild < prop.getChildCount(); propChild++) { + String key = unescapeSQLString(prop.getChild(propChild).getChild(0) + .getText()); + String value = unescapeSQLString(prop.getChild(propChild).getChild( + 1).getText()); + mapProp.put(key, value); } - break; - case HiveParser.TOK_TBLSEQUENCEFILE: - inputFormat = SEQUENCEFILE_INPUT; - outputFormat = SEQUENCEFILE_OUTPUT; - break; - case HiveParser.TOK_TBLTEXTFILE: - inputFormat = TEXTFILE_INPUT; - outputFormat = TEXTFILE_OUTPUT; - break; - case HiveParser.TOK_TBLRCFILE: - inputFormat = RCFILE_INPUT; - outputFormat = RCFILE_OUTPUT; - serde = COLUMNAR_SERDE; - break; - case HiveParser.TOK_TABLEFILEFORMAT: - inputFormat = unescapeSQLString(child.getChild(0).getText()); - outputFormat = unescapeSQLString(child.getChild(1).getText()); - break; - case HiveParser.TOK_TABLELOCATION: - location = unescapeSQLString(child.getChild(0).getText()); - break; - default: assert false; + } + break; + case HiveParser.TOK_TBLSEQUENCEFILE: + inputFormat = SEQUENCEFILE_INPUT; + outputFormat = SEQUENCEFILE_OUTPUT; + break; + case HiveParser.TOK_TBLTEXTFILE: + inputFormat = TEXTFILE_INPUT; + outputFormat = TEXTFILE_OUTPUT; + break; + case HiveParser.TOK_TBLRCFILE: + inputFormat = RCFILE_INPUT; + outputFormat = RCFILE_OUTPUT; + serde = COLUMNAR_SERDE; + break; + case HiveParser.TOK_TABLEFILEFORMAT: + inputFormat = unescapeSQLString(child.getChild(0).getText()); + outputFormat = unescapeSQLString(child.getChild(1).getText()); + break; + case HiveParser.TOK_TABLELOCATION: + location = unescapeSQLString(child.getChild(0).getText()); + break; + default: + assert false; } } // check for existence of table - if ( ifNotExists ) { + if (ifNotExists) { try { - List tables = this.db.getTablesByPattern(tableName); - if ( tables != null && tables.size() > 0 ) { // table exists + List tables = db.getTablesByPattern(tableName); + if (tables != null && tables.size() > 0) { // table exists return null; } } catch (HiveException e) { @@ -5790,58 +6066,56 @@ // Handle different types of CREATE TABLE command createTableDesc crtTblDesc = null; - switch ( command_type ) { + switch (command_type) { - case CREATE_TABLE: // REGULAR CREATE TABLE DDL - crtTblDesc = - new createTableDesc(tableName, isExt, cols, partCols, bucketCols, - sortCols, numBuckets, - fieldDelim, fieldEscape, - collItemDelim, mapKeyDelim, lineDelim, - comment, inputFormat, outputFormat, location, serde, - mapProp, ifNotExists); + case CREATE_TABLE: // REGULAR CREATE TABLE DDL + crtTblDesc = new createTableDesc(tableName, isExt, cols, partCols, + bucketCols, sortCols, numBuckets, fieldDelim, fieldEscape, + collItemDelim, mapKeyDelim, lineDelim, comment, inputFormat, + outputFormat, location, serde, mapProp, ifNotExists); - validateCreateTable(crtTblDesc); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblDesc), conf)); - break; + validateCreateTable(crtTblDesc); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + crtTblDesc), conf)); + break; - case CTLT: // create table like - createTableLikeDesc crtTblLikeDesc = - new createTableLikeDesc(tableName, isExt, location, ifNotExists, likeTableName); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblLikeDesc), conf)); - break; + case CTLT: // create table like + createTableLikeDesc crtTblLikeDesc = new createTableLikeDesc(tableName, + isExt, location, ifNotExists, likeTableName); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + crtTblLikeDesc), conf)); + break; - case CTAS: // create table as select + case CTAS: // create table as select - // check for existence of table. Throw an exception if it exists. - try { - Table tab = this.db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName, - false); // do not throw exception if table does not exist + // check for existence of table. Throw an exception if it exists. + try { + Table tab = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, + tableName, false); // do not throw exception if table does not exist - if ( tab != null ) { - throw new SemanticException(ErrorMsg.TABLE_ALREADY_EXISTS.getMsg(tableName)); - } - } catch (HiveException e) { // may be unable to get meta data - throw new SemanticException(e); + if (tab != null) { + throw new SemanticException(ErrorMsg.TABLE_ALREADY_EXISTS + .getMsg(tableName)); } + } catch (HiveException e) { // may be unable to get meta data + throw new SemanticException(e); + } - crtTblDesc = - new createTableDesc(tableName, isExt, cols, partCols, bucketCols, - sortCols, numBuckets, - fieldDelim, fieldEscape, - collItemDelim, mapKeyDelim, lineDelim, - comment, inputFormat, outputFormat, location, serde, - mapProp, ifNotExists); - qb.setTableDesc(crtTblDesc); + crtTblDesc = new createTableDesc(tableName, isExt, cols, partCols, + bucketCols, sortCols, numBuckets, fieldDelim, fieldEscape, + collItemDelim, mapKeyDelim, lineDelim, comment, inputFormat, + outputFormat, location, serde, mapProp, ifNotExists); + qb.setTableDesc(crtTblDesc); - return selectStmt; - default: assert false; // should never be unknown command type + return selectStmt; + default: + assert false; // should never be unknown command type } return null; } private ASTNode analyzeCreateView(ASTNode ast, QB qb) - throws SemanticException { + throws SemanticException { String tableName = unescapeIdentifier(ast.getChild(0).getText()); List cols = null; boolean ifNotExists = false; @@ -5849,10 +6123,10 @@ ASTNode selectStmt = null; LOG.info("Creating view " + tableName + " position=" - + ast.getCharPositionInLine()); + + ast.getCharPositionInLine()); int numCh = ast.getChildCount(); for (int num = 1; num < numCh; num++) { - ASTNode child = (ASTNode)ast.getChild(num); + ASTNode child = (ASTNode) ast.getChild(num); switch (child.getToken().getType()) { case HiveParser.TOK_IFNOTEXISTS: ifNotExists = true; @@ -5866,24 +6140,26 @@ case HiveParser.TOK_TABLECOMMENT: comment = unescapeSQLString(child.getChild(0).getText()); break; - default: assert false; + default: + assert false; } } createVwDesc = new createViewDesc(tableName, cols, comment, ifNotExists); unparseTranslator.enable(); - rootTasks.add( - TaskFactory.get( - new DDLWork(getInputs(), getOutputs(), createVwDesc), conf)); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + createVwDesc), conf)); return selectStmt; } private List validateColumnNameUniqueness( - List fieldSchemas) throws SemanticException { + List fieldSchemas) throws SemanticException { // no duplicate column names - // currently, it is a simple n*n algorithm - this can be optimized later if need be - // but it should not be a major bottleneck as the number of columns are anyway not so big + // currently, it is a simple n*n algorithm - this can be optimized later if + // need be + // but it should not be a major bottleneck as the number of columns are + // anyway not so big Iterator iterCols = fieldSchemas.iterator(); List colNames = new ArrayList(); while (iterCols.hasNext()) { @@ -5891,38 +6167,44 @@ Iterator iter = colNames.iterator(); while (iter.hasNext()) { String oldColName = iter.next(); - if (colName.equalsIgnoreCase(oldColName)) - throw new SemanticException( - ErrorMsg.DUPLICATE_COLUMN_NAMES.getMsg(oldColName)); + if (colName.equalsIgnoreCase(oldColName)) { + throw new SemanticException(ErrorMsg.DUPLICATE_COLUMN_NAMES + .getMsg(oldColName)); + } } colNames.add(colName); } return colNames; } - private void validateCreateTable(createTableDesc crtTblDesc) throws SemanticException { + private void validateCreateTable(createTableDesc crtTblDesc) + throws SemanticException { - if((crtTblDesc.getCols() == null) || (crtTblDesc.getCols().size() == 0)) { + if ((crtTblDesc.getCols() == null) || (crtTblDesc.getCols().size() == 0)) { // for now make sure that serde exists - if(StringUtils.isEmpty(crtTblDesc.getSerName()) || SerDeUtils.isNativeSerDe(crtTblDesc.getSerName())) { + if (StringUtils.isEmpty(crtTblDesc.getSerName()) + || SerDeUtils.isNativeSerDe(crtTblDesc.getSerName())) { throw new SemanticException(ErrorMsg.INVALID_TBL_DDL_SERDE.getMsg()); } return; } try { - Class origin = Class.forName(crtTblDesc.getOutputFormat(), true, JavaUtils.getClassLoader()); - Class replaced = HiveFileFormatUtils.getOutputFormatSubstitute(origin); - if(replaced == null) - throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); + Class origin = Class.forName(crtTblDesc.getOutputFormat(), true, + JavaUtils.getClassLoader()); + Class replaced = HiveFileFormatUtils + .getOutputFormatSubstitute(origin); + if (replaced == null) { + throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE + .getMsg()); + } } catch (ClassNotFoundException e) { throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); } List colNames = validateColumnNameUniqueness(crtTblDesc.getCols()); - if (crtTblDesc.getBucketCols() != null) - { + if (crtTblDesc.getBucketCols() != null) { // all columns in cluster and sort are valid columns Iterator bucketCols = crtTblDesc.getBucketCols().iterator(); while (bucketCols.hasNext()) { @@ -5936,13 +6218,13 @@ break; } } - if (!found) + if (!found) { throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg()); + } } } - if (crtTblDesc.getSortCols() != null) - { + if (crtTblDesc.getSortCols() != null) { // all columns in cluster and sort are valid columns Iterator sortCols = crtTblDesc.getSortCols().iterator(); while (sortCols.hasNext()) { @@ -5956,13 +6238,13 @@ break; } } - if (!found) + if (!found) { throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg()); + } } } - if (crtTblDesc.getPartCols() != null) - { + if (crtTblDesc.getPartCols() != null) { // there is no overlap between columns and partitioning columns Iterator partColsIter = crtTblDesc.getPartCols().iterator(); while (partColsIter.hasNext()) { @@ -5970,8 +6252,10 @@ Iterator colNamesIter = colNames.iterator(); while (colNamesIter.hasNext()) { String colName = unescapeIdentifier(colNamesIter.next()); - if (partCol.equalsIgnoreCase(colName)) - throw new SemanticException(ErrorMsg.COLUMN_REPEATED_IN_PARTITIONING_COLS.getMsg()); + if (partCol.equalsIgnoreCase(colName)) { + throw new SemanticException( + ErrorMsg.COLUMN_REPEATED_IN_PARTITIONING_COLS.getMsg()); + } } } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/OpParseContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/OpParseContext.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/OpParseContext.java (working copy) @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.parse; - /** * Implementation of the Operator Parse Context. It maintains the parse context * that may be needed by an operator. Currently, it only maintains the row @@ -26,13 +25,14 @@ **/ public class OpParseContext { - private RowResolver rr; // row resolver for the operator + private RowResolver rr; // row resolver for the operator - public OpParseContext() { + public OpParseContext() { } - + /** - * @param rr row resolver + * @param rr + * row resolver */ public OpParseContext(RowResolver rr) { this.rr = rr; @@ -46,7 +46,8 @@ } /** - * @param rr the row resolver to set + * @param rr + * the row resolver to set */ public void setRR(RowResolver rr) { this.rr = rr; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy) @@ -25,18 +25,18 @@ import java.util.Map; import java.util.Set; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.plan.exprNodeDesc; import org.apache.hadoop.hive.ql.plan.loadFileDesc; import org.apache.hadoop.hive.ql.plan.loadTableDesc; -import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.plan.filterDesc.sampleDesc; /** @@ -46,7 +46,7 @@ * populated. Note that since the parse context contains the operator tree, it * can be easily retrieved by the next optimization step or finally for task * generation after the plan has been completely optimized. - * + * **/ public class ParseContext { @@ -66,13 +66,18 @@ private HashMap idToTableNameMap; private int destTableId; private UnionProcContext uCtx; - private List listMapJoinOpsNoReducer; // list of map join operators with no reducer + private List listMapJoinOpsNoReducer; // list of map join + // operators with no + // reducer private Map> groupOpToInputTables; private Map prunedPartitions; - // is set to true if the expression only contains partitioning columns and not any other column reference. - // This is used to optimize select * from table where ... scenario, when the where condition only references - // partitioning columns - the partitions are identified and streamed directly to the client without requiring + // is set to true if the expression only contains partitioning columns and not + // any other column reference. + // This is used to optimize select * from table where ... scenario, when the + // where condition only references + // partitioning columns - the partitions are identified and streamed directly + // to the client without requiring // a map-reduce job private boolean hasNonPartCols; @@ -93,21 +98,28 @@ * @param opParseCtx * operator parse context - contains a mapping from operator to * operator parse state (row resolver etc.) - * @param joinContext context needed join processing (map join specifically) - * @param topToTable the top tables being processed + * @param joinContext + * context needed join processing (map join specifically) + * @param topToTable + * the top tables being processed * @param loadTableWork * list of destination tables being loaded * @param loadFileWork * list of destination files being loaded - * @param ctx parse context + * @param ctx + * parse context * @param idToTableNameMap * @param destTableId * @param uCtx * @param listMapJoinOpsNoReducer * list of map join operators with no reducer - * @param opToSamplePruner operator to sample pruner map + * @param opToSamplePruner + * operator to sample pruner map */ - public ParseContext(HiveConf conf, QB qb, ASTNode ast, + public ParseContext( + HiveConf conf, + QB qb, + ASTNode ast, HashMap opToPartPruner, HashMap> topOps, HashMap> topSelOps, @@ -115,8 +127,8 @@ Map joinContext, HashMap topToTable, List loadTableWork, List loadFileWork, - Context ctx, HashMap idToTableNameMap, int destTableId, UnionProcContext uCtx, - List listMapJoinOpsNoReducer, + Context ctx, HashMap idToTableNameMap, int destTableId, + UnionProcContext uCtx, List listMapJoinOpsNoReducer, Map> groupOpToInputTables, Map prunedPartitions, HashMap opToSamplePruner) { @@ -136,7 +148,7 @@ this.destTableId = destTableId; this.uCtx = uCtx; this.listMapJoinOpsNoReducer = listMapJoinOpsNoReducer; - this.hasNonPartCols = false; + hasNonPartCols = false; this.groupOpToInputTables = new HashMap>(); this.groupOpToInputTables = groupOpToInputTables; this.prunedPartitions = prunedPartitions; @@ -214,7 +226,8 @@ * @param opToPartPruner * the opToPartPruner to set */ - public void setOpToPartPruner(HashMap opToPartPruner) { + public void setOpToPartPruner( + HashMap opToPartPruner) { this.opToPartPruner = opToPartPruner; } @@ -342,7 +355,8 @@ } /** - * @param joinContext the joinContext to set + * @param joinContext + * the joinContext to set */ public void setJoinContext(Map joinContext) { this.joinContext = joinContext; @@ -356,7 +370,8 @@ } /** - * @param listMapJoinOpsNoReducer the listMapJoinOpsNoReducer to set + * @param listMapJoinOpsNoReducer + * the listMapJoinOpsNoReducer to set */ public void setListMapJoinOpsNoReducer( List listMapJoinOpsNoReducer) { @@ -365,17 +380,18 @@ /** * Sets the hasNonPartCols flag + * * @param val */ public void setHasNonPartCols(boolean val) { - this.hasNonPartCols = val; + hasNonPartCols = val; } /** * Gets the value of the hasNonPartCols flag */ public boolean getHasNonPartCols() { - return this.hasNonPartCols; + return hasNonPartCols; } /** @@ -389,7 +405,8 @@ * @param opToSamplePruner * the opToSamplePruner to set */ - public void setOpToSamplePruner(HashMap opToSamplePruner) { + public void setOpToSamplePruner( + HashMap opToSamplePruner) { this.opToSamplePruner = opToSamplePruner; } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticException.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticException.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticException.java (working copy) @@ -26,22 +26,22 @@ public class SemanticException extends HiveException { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - public SemanticException() { - super(); - } - - public SemanticException(String message) { - super(message); - } - - public SemanticException(Throwable cause) { - super(cause); - } - - public SemanticException(String message, Throwable cause) { - super(message, cause); - } - + public SemanticException() { + super(); + } + + public SemanticException(String message) { + super(message); + } + + public SemanticException(Throwable cause) { + super(cause); + } + + public SemanticException(String message, Throwable cause) { + super(message, cause); + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java (working copy) @@ -27,51 +27,59 @@ */ public class PrunedPartitionList { // confirmed partitions - satisfy the partition criteria - private Set confirmedPartns; + private Set confirmedPartns; // unknown partitions - may/may not satisfy the partition criteria - private Set unknownPartns; + private Set unknownPartns; // denied partitions - do not satisfy the partition criteria - private Set deniedPartns; + private final Set deniedPartns; /** - * @param confirmedPartns confirmed paritions - * @param unknownPartns unknown partitions + * @param confirmedPartns + * confirmed paritions + * @param unknownPartns + * unknown partitions */ - public PrunedPartitionList(Set confirmedPartns, Set unknownPartns, Set deniedPartns) { - this.confirmedPartns = confirmedPartns; - this.unknownPartns = unknownPartns; - this.deniedPartns = deniedPartns; + public PrunedPartitionList(Set confirmedPartns, + Set unknownPartns, Set deniedPartns) { + this.confirmedPartns = confirmedPartns; + this.unknownPartns = unknownPartns; + this.deniedPartns = deniedPartns; } /** * get confirmed partitions - * @return confirmedPartns confirmed paritions + * + * @return confirmedPartns confirmed paritions */ - public Set getConfirmedPartns() { + public Set getConfirmedPartns() { return confirmedPartns; } /** * get unknown partitions - * @return unknownPartns unknown paritions + * + * @return unknownPartns unknown paritions */ - public Set getUnknownPartns() { + public Set getUnknownPartns() { return unknownPartns; } /** * get denied partitions - * @return deniedPartns denied paritions + * + * @return deniedPartns denied paritions */ - public Set getDeniedPartns() { + public Set getDeniedPartns() { return deniedPartns; } /** * set confirmed partitions - * @param confirmedPartns confirmed paritions + * + * @param confirmedPartns + * confirmed paritions */ public void setConfirmedPartns(Set confirmedPartns) { this.confirmedPartns = confirmedPartns; @@ -79,9 +87,11 @@ /** * set unknown partitions - * @param unknownPartns unknown partitions + * + * @param unknownPartns + * unknown partitions */ public void setUnknownPartns(Set unknownPartns) { - this.unknownPartns = unknownPartns; + this.unknownPartns = unknownPartns; } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseError.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseError.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseError.java (working copy) @@ -18,7 +18,8 @@ package org.apache.hadoop.hive.ql.parse; -import org.antlr.runtime.*; +import org.antlr.runtime.BaseRecognizer; +import org.antlr.runtime.RecognitionException; /* * SemanticException.java @@ -33,16 +34,16 @@ * */ public class ParseError { - private BaseRecognizer br; - private RecognitionException re; - private String[] tokenNames; - + private final BaseRecognizer br; + private final RecognitionException re; + private final String[] tokenNames; + ParseError(BaseRecognizer br, RecognitionException re, String[] tokenNames) { this.br = br; this.re = re; this.tokenNames = tokenNames; - } - + } + BaseRecognizer getBaseRecognizer() { return br; } @@ -50,7 +51,7 @@ RecognitionException getRecognitionException() { return re; } - + String[] getTokenNames() { return tokenNames; } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java (working copy) @@ -18,7 +18,12 @@ package org.apache.hadoop.hive.ql.parse; -import java.util.*; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; +import java.util.Vector; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -26,29 +31,29 @@ /** * Implementation of the Row Resolver - * + * **/ public class RowResolver { - private RowSchema rowSchema; - private HashMap> rslvMap; + private final RowSchema rowSchema; + private final HashMap> rslvMap; - private HashMap invRslvMap; + private final HashMap invRslvMap; // TODO: Refactor this and do in a more object oriented manner private boolean isExprResolver; @SuppressWarnings("unused") private static final Log LOG = LogFactory.getLog(RowResolver.class.getName()); - + public RowResolver() { rowSchema = new RowSchema(); rslvMap = new HashMap>(); invRslvMap = new HashMap(); isExprResolver = false; } - + public void put(String tab_alias, String col_alias, ColumnInfo colInfo) { if (tab_alias != null) { tab_alias = tab_alias.toLowerCase(); @@ -67,7 +72,7 @@ } f_map.put(col_alias, colInfo); - String [] qualifiedAlias = new String[2]; + String[] qualifiedAlias = new String[2]; qualifiedAlias[0] = tab_alias; qualifiedAlias[1] = col_alias; invRslvMap.put(colInfo.getInternalName(), qualifiedAlias); @@ -78,21 +83,27 @@ } /** - * Gets the column Info to tab_alias.col_alias type of a column reference. I the tab_alias is not - * provided as can be the case with an non aliased column, this function looks up the column in all - * the table aliases in this row resolver and returns the match. It also throws an exception if - * the column is found in multiple table aliases. If no match is found a null values is returned. + * Gets the column Info to tab_alias.col_alias type of a column reference. I + * the tab_alias is not provided as can be the case with an non aliased + * column, this function looks up the column in all the table aliases in this + * row resolver and returns the match. It also throws an exception if the + * column is found in multiple table aliases. If no match is found a null + * values is returned. * - * This allows us to interpret both select t.c1 type of references and select c1 kind of refereneces. - * The later kind are what we call non aliased column references in the query. + * This allows us to interpret both select t.c1 type of references and select + * c1 kind of refereneces. The later kind are what we call non aliased column + * references in the query. * - * @param tab_alias The table alias to match (this is null if the column reference is non aliased) - * @param col_alias The column name that is being searched for + * @param tab_alias + * The table alias to match (this is null if the column reference is + * non aliased) + * @param col_alias + * The column name that is being searched for * @return ColumnInfo * @throws SemanticException */ - public ColumnInfo get(String tab_alias, String col_alias) - throws SemanticException { + public ColumnInfo get(String tab_alias, String col_alias) + throws SemanticException { col_alias = col_alias.toLowerCase(); ColumnInfo ret = null; @@ -103,29 +114,29 @@ return null; } ret = f_map.get(col_alias); - } - else { + } else { boolean found = false; - for(LinkedHashMap cmap: rslvMap.values()) { - for(Map.Entry cmapEnt: cmap.entrySet()) { - if (col_alias.equalsIgnoreCase((String)cmapEnt.getKey())) { + for (LinkedHashMap cmap : rslvMap.values()) { + for (Map.Entry cmapEnt : cmap.entrySet()) { + if (col_alias.equalsIgnoreCase(cmapEnt.getKey())) { if (found) { - throw new SemanticException("Column " + col_alias + " Found in more than One Tables/Subqueries"); + throw new SemanticException("Column " + col_alias + + " Found in more than One Tables/Subqueries"); } found = true; - ret = (ColumnInfo)cmapEnt.getValue(); + ret = cmapEnt.getValue(); } } } } - return ret; + return ret; } public Vector getColumnInfos() { return rowSchema.getSignature(); } - + public HashMap getFieldMap(String tab_alias) { if (tab_alias == null) { return rslvMap.get(null); @@ -137,10 +148,10 @@ public int getPosition(String internalName) { int pos = -1; - for(ColumnInfo var: rowSchema.getSignature()) { + for (ColumnInfo var : rowSchema.getSignature()) { ++pos; if (var.getInternalName().equals(internalName)) { - return pos; + return pos; } } @@ -154,7 +165,7 @@ public String[] reverseLookup(String internalName) { return invRslvMap.get(internalName); } - + public void setIsExprResolver(boolean isExprResolver) { this.isExprResolver = isExprResolver; } @@ -163,17 +174,21 @@ return isExprResolver; } + @Override public String toString() { StringBuffer sb = new StringBuffer(); - - for(Map.Entry> e: rslvMap.entrySet()) { - String tab = (String)e.getKey(); + + for (Map.Entry> e : rslvMap + .entrySet()) { + String tab = e.getKey(); sb.append(tab + "{"); - HashMap f_map = (HashMap)e.getValue(); - if (f_map != null) - for(Map.Entry entry: f_map.entrySet()) { - sb.append("(" + (String)entry.getKey() + "," + entry.getValue().toString() + ")"); + HashMap f_map = e.getValue(); + if (f_map != null) { + for (Map.Entry entry : f_map.entrySet()) { + sb.append("(" + entry.getKey() + "," + entry.getValue().toString() + + ")"); } + } sb.append("} "); } return sb.toString(); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TableSample.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TableSample.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TableSample.java (working copy) @@ -22,35 +22,35 @@ /** * - * This class stores all the information specified in the TABLESAMPLE clause. e.g. - * for the clause "FROM t TABLESAMPLE(1 OUT OF 2 ON c1) it will store the numerator - * 1, the denominator 2 and the list of expressions(in this case c1) in the appropriate - * fields. The afore-mentioned sampling clause causes the 1st bucket to be picked out of - * the 2 buckets created by hashing on c1. - * + * This class stores all the information specified in the TABLESAMPLE clause. + * e.g. for the clause "FROM t TABLESAMPLE(1 OUT OF 2 ON c1) it will store the + * numerator 1, the denominator 2 and the list of expressions(in this case c1) + * in the appropriate fields. The afore-mentioned sampling clause causes the 1st + * bucket to be picked out of the 2 buckets created by hashing on c1. + * */ public class TableSample { - + /** * The numerator of the TABLESAMPLE clause */ private int numerator; - + /** * The denominator of the TABLESAMPLE clause */ private int denominator; - + /** - * The list of expressions following ON part of the TABLESAMPLE clause. This list is - * empty in case there are no expressions such as in the clause + * The list of expressions following ON part of the TABLESAMPLE clause. This + * list is empty in case there are no expressions such as in the clause * "FROM t TABLESAMPLE(1 OUT OF 2)". For this expression the sampling is done - * on the tables clustering column(as specified when the table was created). In case - * the table does not have any clustering column, the usage of a table sample clause - * without an ON part is disallowed by the compiler + * on the tables clustering column(as specified when the table was created). + * In case the table does not have any clustering column, the usage of a table + * sample clause without an ON part is disallowed by the compiler */ private ArrayList exprs; - + /** * Flag to indicate that input files can be pruned */ @@ -60,71 +60,77 @@ * Constructs the TableSample given the numerator, denominator and the list of * ON clause expressions * - * @param num The numerator - * @param den The denominator - * @param exprs The list of expressions in the ON part of the TABLESAMPLE clause + * @param num + * The numerator + * @param den + * The denominator + * @param exprs + * The list of expressions in the ON part of the TABLESAMPLE clause */ public TableSample(String num, String den, ArrayList exprs) { - this.numerator = Integer.valueOf(num).intValue(); - this.denominator = Integer.valueOf(den).intValue(); + numerator = Integer.valueOf(num).intValue(); + denominator = Integer.valueOf(den).intValue(); this.exprs = exprs; } - + public TableSample(int num, int den) { - this.numerator = num; - this.denominator = den; - this.exprs = null; + numerator = num; + denominator = den; + exprs = null; } - + /** * Gets the numerator * * @return int */ public int getNumerator() { - return this.numerator; + return numerator; } - + /** * Sets the numerator * - * @param num The numerator + * @param num + * The numerator */ public void setNumerator(int num) { - this.numerator = num; + numerator = num; } - + /** * Gets the denominator * * @return int */ public int getDenominator() { - return this.denominator; + return denominator; } - + /** * Sets the denominator * - * @param den The denominator + * @param den + * The denominator */ public void setDenominator(int den) { - this.denominator = den; + denominator = den; } - + /** * Gets the ON part's expression list * * @return ArrayList */ public ArrayList getExprs() { - return this.exprs; + return exprs; } - + /** * Sets the expression list * - * @param exprs The expression list + * @param exprs + * The expression list */ public void setExprs(ArrayList exprs) { this.exprs = exprs; @@ -136,15 +142,16 @@ * @return boolean */ public boolean getInputPruning() { - return this.inputPruning; + return inputPruning; } - + /** * Sets the flag that indicates whether input pruning is possible or not * - * @param inputPruning true if input pruning is possible + * @param inputPruning + * true if input pruning is possible */ public void setInputPruning(boolean inputPruning) { - this.inputPruning = inputPruning; + this.inputPruning = inputPruning; } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java (working copy) @@ -24,44 +24,43 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.plan.explainWork; public class ExplainSemanticAnalyzer extends BaseSemanticAnalyzer { - public ExplainSemanticAnalyzer(HiveConf conf) throws SemanticException { super(conf); } + @Override public void analyzeInternal(ASTNode ast) throws SemanticException { ctx.setExplain(true); // Create a semantic analyzer for the query - BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, (ASTNode)ast.getChild(0)); - sem.analyze((ASTNode)ast.getChild(0), ctx); - + BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, (ASTNode) ast + .getChild(0)); + sem.analyze((ASTNode) ast.getChild(0), ctx); + boolean extended = false; if (ast.getChildCount() > 1) { extended = true; } - + ctx.setResFile(new Path(ctx.getLocalTmpFileURI())); List> tasks = sem.getRootTasks(); Task fetchTask = sem.getFetchTask(); if (tasks == null) { - if (fetchTask != null) { - tasks = new ArrayList>(); - tasks.add(fetchTask); - } + if (fetchTask != null) { + tasks = new ArrayList>(); + tasks.add(fetchTask); + } + } else if (fetchTask != null) { + tasks.add(fetchTask); } - else if (fetchTask != null) - tasks.add(fetchTask); - + rootTasks.add(TaskFactory.get(new explainWork(ctx.getResFile(), tasks, - ((ASTNode)ast.getChild(0)).toStringTree(), - extended), this.conf)); + ((ASTNode) ast.getChild(0)).toStringTree(), extended), conf)); } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java (working copy) @@ -20,39 +20,41 @@ /** * Library of utility functions used in the parse code - * + * */ public class ParseUtils { - + /** * Tests whether the parse tree node is a join token * - * @param node The parse tree node + * @param node + * The parse tree node * @return boolean */ public static boolean isJoinToken(ASTNode node) { if ((node.getToken().getType() == HiveParser.TOK_JOIN) - || (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) - || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) - || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN)) + || (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) + || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) + || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN)) { return true; + } return false; } /** - * Performs a descent of the leftmost branch of a tree, stopping - * when either a node with a non-null token is found or the leaf - * level is encountered. - * - * @param tree candidate node from which to start searching - * + * Performs a descent of the leftmost branch of a tree, stopping when either a + * node with a non-null token is found or the leaf level is encountered. + * + * @param tree + * candidate node from which to start searching + * * @return node at which descent stopped */ public static ASTNode findRootNonNullToken(ASTNode tree) { - while ((tree.getToken() == null) && (tree.getChildCount() > 0)) { - tree = (ASTNode) tree.getChild(0); - } - return tree; + while ((tree.getToken() == null) && (tree.getChildCount() > 0)) { + tree = (ASTNode) tree.getChild(0); + } + return tree; } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (working copy) @@ -29,18 +29,15 @@ import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.Tree; -import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.plan.AddPartitionDesc; import org.apache.hadoop.hive.ql.plan.DDLWork; @@ -61,7 +58,8 @@ import org.apache.hadoop.mapred.TextInputFormat; public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { - private static final Log LOG = LogFactory.getLog("hive.ql.parse.DDLSemanticAnalyzer"); + private static final Log LOG = LogFactory + .getLog("hive.ql.parse.DDLSemanticAnalyzer"); public static final Map TokenToTypeName = new HashMap(); static { TokenToTypeName.put(HiveParser.TOK_BOOLEAN, Constants.BOOLEAN_TYPE_NAME); @@ -74,7 +72,8 @@ TokenToTypeName.put(HiveParser.TOK_STRING, Constants.STRING_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DATE, Constants.DATE_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DATETIME, Constants.DATETIME_TYPE_NAME); - TokenToTypeName.put(HiveParser.TOK_TIMESTAMP, Constants.TIMESTAMP_TYPE_NAME); + TokenToTypeName + .put(HiveParser.TOK_TIMESTAMP, Constants.TIMESTAMP_TYPE_NAME); } public static String getTypeName(int token) { @@ -87,15 +86,12 @@ @Override public void analyzeInternal(ASTNode ast) throws SemanticException { - if (ast.getToken().getType() == HiveParser.TOK_DROPTABLE) - analyzeDropTable(ast, false); - else if (ast.getToken().getType() == HiveParser.TOK_DESCTABLE) - { + if (ast.getToken().getType() == HiveParser.TOK_DROPTABLE) { + analyzeDropTable(ast, false); + } else if (ast.getToken().getType() == HiveParser.TOK_DESCTABLE) { ctx.setResFile(new Path(ctx.getLocalTmpFileURI())); analyzeDescribeTable(ast); - } - else if (ast.getToken().getType() == HiveParser.TOK_SHOWTABLES) - { + } else if (ast.getToken().getType() == HiveParser.TOK_SHOWTABLES) { ctx.setResFile(new Path(ctx.getLocalTmpFileURI())); analyzeShowTables(ast); } else if (ast.getToken().getType() == HiveParser.TOK_SHOW_TABLESTATUS) { @@ -110,149 +106,166 @@ } else if (ast.getToken().getType() == HiveParser.TOK_MSCK) { ctx.setResFile(new Path(ctx.getLocalTmpFileURI())); analyzeMetastoreCheck(ast); - } else if (ast.getToken().getType() == HiveParser.TOK_DROPVIEW) + } else if (ast.getToken().getType() == HiveParser.TOK_DROPVIEW) { analyzeDropTable(ast, true); - else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_RENAME) + } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_RENAME) { analyzeAlterTableRename(ast); - else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_ADDCOLS) + } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_ADDCOLS) { analyzeAlterTableModifyCols(ast, alterTableTypes.ADDCOLS); - else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_REPLACECOLS) + } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_REPLACECOLS) { analyzeAlterTableModifyCols(ast, alterTableTypes.REPLACECOLS); - else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_RENAMECOL) + } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_RENAMECOL) { analyzeAlterTableRenameCol(ast); - else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_ADDPARTS) { + } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_ADDPARTS) { analyzeAlterTableAddParts(ast); - } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_DROPPARTS) + } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_DROPPARTS) { analyzeAlterTableDropParts(ast); - else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_PROPERTIES) + } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_PROPERTIES) { analyzeAlterTableProps(ast); - else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES) + } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES) { analyzeAlterTableSerdeProps(ast); - else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_SERIALIZER) + } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_SERIALIZER) { analyzeAlterTableSerde(ast); - else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_FILEFORMAT) + } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_FILEFORMAT) { analyzeAlterTableFileFormat(ast); - else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_CLUSTER_SORT) - analyzeAlterTableClusterSort(ast); - else if (ast.getToken().getType() == HiveParser.TOK_SHOWPARTITIONS) - { + } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_CLUSTER_SORT) { + analyzeAlterTableClusterSort(ast); + } else if (ast.getToken().getType() == HiveParser.TOK_SHOWPARTITIONS) { ctx.setResFile(new Path(ctx.getLocalTmpFileURI())); analyzeShowPartitions(ast); - } - else { + } else { throw new SemanticException("Unsupported command."); } } - private void analyzeDropTable(ASTNode ast, boolean expectView) - throws SemanticException { + private void analyzeDropTable(ASTNode ast, boolean expectView) + throws SemanticException { String tableName = unescapeIdentifier(ast.getChild(0).getText()); dropTableDesc dropTblDesc = new dropTableDesc(tableName, expectView); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), dropTblDesc), conf)); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + dropTblDesc), conf)); } private void analyzeAlterTableProps(ASTNode ast) throws SemanticException { String tableName = unescapeIdentifier(ast.getChild(0).getText()); - HashMap mapProp = getProps((ASTNode)(ast.getChild(1)).getChild(0)); + HashMap mapProp = getProps((ASTNode) (ast.getChild(1)) + .getChild(0)); alterTableDesc alterTblDesc = new alterTableDesc(alterTableTypes.ADDPROPS); alterTblDesc.setProps(mapProp); alterTblDesc.setOldName(tableName); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + alterTblDesc), conf)); } - private void analyzeAlterTableSerdeProps(ASTNode ast) throws SemanticException { + private void analyzeAlterTableSerdeProps(ASTNode ast) + throws SemanticException { String tableName = unescapeIdentifier(ast.getChild(0).getText()); - HashMap mapProp = getProps((ASTNode)(ast.getChild(1)).getChild(0)); - alterTableDesc alterTblDesc = new alterTableDesc(alterTableTypes.ADDSERDEPROPS); + HashMap mapProp = getProps((ASTNode) (ast.getChild(1)) + .getChild(0)); + alterTableDesc alterTblDesc = new alterTableDesc( + alterTableTypes.ADDSERDEPROPS); alterTblDesc.setProps(mapProp); alterTblDesc.setOldName(tableName); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + alterTblDesc), conf)); } private void analyzeAlterTableSerde(ASTNode ast) throws SemanticException { String tableName = unescapeIdentifier(ast.getChild(0).getText()); String serdeName = unescapeSQLString(ast.getChild(1).getText()); alterTableDesc alterTblDesc = new alterTableDesc(alterTableTypes.ADDSERDE); - if(ast.getChildCount() > 2) { - HashMap mapProp = getProps((ASTNode)(ast.getChild(2)).getChild(0)); + if (ast.getChildCount() > 2) { + HashMap mapProp = getProps((ASTNode) (ast.getChild(2)) + .getChild(0)); alterTblDesc.setProps(mapProp); } alterTblDesc.setOldName(tableName); alterTblDesc.setSerdeName(serdeName); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + alterTblDesc), conf)); } - - private void analyzeAlterTableFileFormat(ASTNode ast) throws SemanticException { - String tableName = unescapeIdentifier(ast.getChild(0).getText()); - String inputFormat = null; - String outputFormat = null; + + private void analyzeAlterTableFileFormat(ASTNode ast) + throws SemanticException { + String tableName = unescapeIdentifier(ast.getChild(0).getText()); + String inputFormat = null; + String outputFormat = null; String serde = null; - ASTNode child = (ASTNode)ast.getChild(1); - - switch (child.getToken().getType()) { - case HiveParser.TOK_TABLEFILEFORMAT: - inputFormat = unescapeSQLString(((ASTNode) child.getChild(0)).getToken() - .getText()); - outputFormat = unescapeSQLString(((ASTNode) child.getChild(1)).getToken() - .getText()); - try { - Class.forName(inputFormat); - Class.forName(outputFormat); - } catch (ClassNotFoundException e) { - throw new SemanticException(e); - } - break; - case HiveParser.TOK_TBLSEQUENCEFILE: - inputFormat = SEQUENCEFILE_INPUT; - outputFormat = SEQUENCEFILE_OUTPUT; - break; - case HiveParser.TOK_TBLTEXTFILE: - inputFormat = TEXTFILE_INPUT; - outputFormat = TEXTFILE_OUTPUT; - break; - case HiveParser.TOK_TBLRCFILE: - inputFormat = RCFILE_INPUT; - outputFormat = RCFILE_OUTPUT; - serde = COLUMNAR_SERDE; - break; - } - alterTableDesc alterTblDesc = new alterTableDesc(tableName, inputFormat, outputFormat, serde); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); + ASTNode child = (ASTNode) ast.getChild(1); + + switch (child.getToken().getType()) { + case HiveParser.TOK_TABLEFILEFORMAT: + inputFormat = unescapeSQLString(((ASTNode) child.getChild(0)).getToken() + .getText()); + outputFormat = unescapeSQLString(((ASTNode) child.getChild(1)).getToken() + .getText()); + try { + Class.forName(inputFormat); + Class.forName(outputFormat); + } catch (ClassNotFoundException e) { + throw new SemanticException(e); + } + break; + case HiveParser.TOK_TBLSEQUENCEFILE: + inputFormat = SEQUENCEFILE_INPUT; + outputFormat = SEQUENCEFILE_OUTPUT; + break; + case HiveParser.TOK_TBLTEXTFILE: + inputFormat = TEXTFILE_INPUT; + outputFormat = TEXTFILE_OUTPUT; + break; + case HiveParser.TOK_TBLRCFILE: + inputFormat = RCFILE_INPUT; + outputFormat = RCFILE_OUTPUT; + serde = COLUMNAR_SERDE; + break; + } + alterTableDesc alterTblDesc = new alterTableDesc(tableName, inputFormat, + outputFormat, serde); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + alterTblDesc), conf)); } - - private void analyzeAlterTableClusterSort(ASTNode ast) throws SemanticException { - String tableName = unescapeIdentifier(ast.getChild(0).getText()); - ASTNode buckets = (ASTNode)ast.getChild(1); - List bucketCols = getColumnNames((ASTNode) buckets.getChild(0)); - List sortCols = null; - int numBuckets = -1; - if (buckets.getChildCount() == 2) - numBuckets = (Integer.valueOf(buckets.getChild(1).getText())).intValue(); - else { - sortCols = getColumnNamesOrder((ASTNode) buckets.getChild(1)); - numBuckets = (Integer.valueOf(buckets.getChild(2).getText())).intValue(); - } - if(numBuckets <=0 ) - throw new SemanticException(ErrorMsg.INVALID_BUCKET_NUMBER.getMsg()); - alterTableDesc alterTblDesc = new alterTableDesc(tableName, numBuckets, bucketCols, sortCols); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); + + private void analyzeAlterTableClusterSort(ASTNode ast) + throws SemanticException { + String tableName = unescapeIdentifier(ast.getChild(0).getText()); + ASTNode buckets = (ASTNode) ast.getChild(1); + List bucketCols = getColumnNames((ASTNode) buckets.getChild(0)); + List sortCols = null; + int numBuckets = -1; + if (buckets.getChildCount() == 2) { + numBuckets = (Integer.valueOf(buckets.getChild(1).getText())).intValue(); + } else { + sortCols = getColumnNamesOrder((ASTNode) buckets.getChild(1)); + numBuckets = (Integer.valueOf(buckets.getChild(2).getText())).intValue(); + } + if (numBuckets <= 0) { + throw new SemanticException(ErrorMsg.INVALID_BUCKET_NUMBER.getMsg()); + } + alterTableDesc alterTblDesc = new alterTableDesc(tableName, numBuckets, + bucketCols, sortCols); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + alterTblDesc), conf)); } private HashMap getProps(ASTNode prop) { HashMap mapProp = new HashMap(); for (int propChild = 0; propChild < prop.getChildCount(); propChild++) { - String key = unescapeSQLString(prop.getChild(propChild).getChild(0).getText()); - String value = unescapeSQLString(prop.getChild(propChild).getChild(1).getText()); - mapProp.put(key,value); + String key = unescapeSQLString(prop.getChild(propChild).getChild(0) + .getText()); + String value = unescapeSQLString(prop.getChild(propChild).getChild(1) + .getText()); + mapProp.put(key, value); } return mapProp; } /** - * Get the fully qualified name in the ast. e.g. the ast of the form ^(DOT ^(DOT a b) c) - * will generate a name of the form a.b.c - * - * @param ast The AST from which the qualified name has to be extracted + * Get the fully qualified name in the ast. e.g. the ast of the form ^(DOT + * ^(DOT a b) c) will generate a name of the form a.b.c + * + * @param ast + * The AST from which the qualified name has to be extracted * @return String */ private String getFullyQualifiedName(ASTNode ast) { @@ -260,14 +273,17 @@ return ast.getText(); } - return getFullyQualifiedName((ASTNode)ast.getChild(0)) + "." + - getFullyQualifiedName((ASTNode)ast.getChild(1)); + return getFullyQualifiedName((ASTNode) ast.getChild(0)) + "." + + getFullyQualifiedName((ASTNode) ast.getChild(1)); } /** * Create a FetchTask for a given table and thrift ddl schema - * @param tablename tablename - * @param schema thrift ddl + * + * @param tablename + * tablename + * @param schema + * thrift ddl */ private Task createFetchTask(String schema) { Properties prop = new Properties(); @@ -278,19 +294,17 @@ prop.setProperty("columns", colTypes[0]); prop.setProperty("columns.types", colTypes[1]); - fetchWork fetch = new fetchWork( - ctx.getResFile().toString(), - new tableDesc(LazySimpleSerDe.class, TextInputFormat.class, IgnoreKeyTextOutputFormat.class, prop), - -1 - ); + fetchWork fetch = new fetchWork(ctx.getResFile().toString(), new tableDesc( + LazySimpleSerDe.class, TextInputFormat.class, + IgnoreKeyTextOutputFormat.class, prop), -1); fetch.setSerializationNullFormat(" "); - return TaskFactory.get(fetch, this.conf); + return TaskFactory.get(fetch, conf); } - private void analyzeDescribeTable(ASTNode ast) - throws SemanticException { - ASTNode tableTypeExpr = (ASTNode)ast.getChild(0); - String tableName = getFullyQualifiedName((ASTNode)tableTypeExpr.getChild(0)); + private void analyzeDescribeTable(ASTNode ast) throws SemanticException { + ASTNode tableTypeExpr = (ASTNode) ast.getChild(0); + String tableName = getFullyQualifiedName((ASTNode) tableTypeExpr + .getChild(0)); HashMap partSpec = null; // get partition metadata if partition specified @@ -300,13 +314,14 @@ } boolean isExt = ast.getChildCount() > 1; - descTableDesc descTblDesc = new descTableDesc(ctx.getResFile(), tableName, partSpec, isExt); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), descTblDesc), conf)); + descTableDesc descTblDesc = new descTableDesc(ctx.getResFile(), tableName, + partSpec, isExt); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + descTblDesc), conf)); setFetchTask(createFetchTask(descTblDesc.getSchema())); LOG.info("analyzeDescribeTable done"); } - private HashMap getPartSpec(ASTNode partspec) throws SemanticException { HashMap partSpec = new LinkedHashMap(); @@ -318,90 +333,95 @@ return partSpec; } - private void analyzeShowPartitions(ASTNode ast) - throws SemanticException { + private void analyzeShowPartitions(ASTNode ast) throws SemanticException { showPartitionsDesc showPartsDesc; String tableName = unescapeIdentifier(ast.getChild(0).getText()); showPartsDesc = new showPartitionsDesc(tableName, ctx.getResFile()); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), showPartsDesc), conf)); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + showPartsDesc), conf)); setFetchTask(createFetchTask(showPartsDesc.getSchema())); } - private void analyzeShowTables(ASTNode ast) - throws SemanticException { + private void analyzeShowTables(ASTNode ast) throws SemanticException { showTablesDesc showTblsDesc; - if (ast.getChildCount() == 1) - { + if (ast.getChildCount() == 1) { String tableNames = unescapeSQLString(ast.getChild(0).getText()); showTblsDesc = new showTablesDesc(ctx.getResFile(), tableNames); - } - else { + } else { showTblsDesc = new showTablesDesc(ctx.getResFile()); } - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), showTblsDesc), conf)); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + showTblsDesc), conf)); setFetchTask(createFetchTask(showTblsDesc.getSchema())); } - private void analyzeShowTableStatus(ASTNode ast) - throws SemanticException { + private void analyzeShowTableStatus(ASTNode ast) throws SemanticException { showTableStatusDesc showTblStatusDesc; String tableNames = unescapeIdentifier(ast.getChild(0).getText()); String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; int children = ast.getChildCount(); HashMap partSpec = null; if (children >= 2) { - if(children > 3) + if (children > 3) { throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg()); + } for (int i = 1; i < children; i++) { ASTNode child = (ASTNode) ast.getChild(i); - if(child.getToken().getType() == HiveParser.Identifier) + if (child.getToken().getType() == HiveParser.Identifier) { dbName = unescapeIdentifier(child.getText()); - else if (child.getToken().getType() == HiveParser.TOK_PARTSPEC) + } else if (child.getToken().getType() == HiveParser.TOK_PARTSPEC) { partSpec = getPartSpec(child); - else + } else { throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg()); + } } } - showTblStatusDesc = new showTableStatusDesc(ctx.getResFile(), dbName, tableNames, partSpec); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), showTblStatusDesc), conf)); + showTblStatusDesc = new showTableStatusDesc(ctx.getResFile(), dbName, + tableNames, partSpec); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + showTblStatusDesc), conf)); setFetchTask(createFetchTask(showTblStatusDesc.getSchema())); } /** - * Add the task according to the parsed command tree. - * This is used for the CLI command "SHOW FUNCTIONS;". - * @param ast The parsed command tree. - * @throws SemanticException Parsin failed + * Add the task according to the parsed command tree. This is used for the CLI + * command "SHOW FUNCTIONS;". + * + * @param ast + * The parsed command tree. + * @throws SemanticException + * Parsin failed */ - private void analyzeShowFunctions(ASTNode ast) - throws SemanticException { + private void analyzeShowFunctions(ASTNode ast) throws SemanticException { showFunctionsDesc showFuncsDesc; if (ast.getChildCount() == 1) { String funcNames = stripQuotes(ast.getChild(0).getText()); showFuncsDesc = new showFunctionsDesc(ctx.getResFile(), funcNames); - } - else { + } else { showFuncsDesc = new showFunctionsDesc(ctx.getResFile()); } - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), showFuncsDesc), conf)); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + showFuncsDesc), conf)); setFetchTask(createFetchTask(showFuncsDesc.getSchema())); } /** - * Add the task according to the parsed command tree. - * This is used for the CLI command "DESCRIBE FUNCTION;". - * @param ast The parsed command tree. - * @throws SemanticException Parsing failed + * Add the task according to the parsed command tree. This is used for the CLI + * command "DESCRIBE FUNCTION;". + * + * @param ast + * The parsed command tree. + * @throws SemanticException + * Parsing failed */ - private void analyzeDescFunction(ASTNode ast) - throws SemanticException { + private void analyzeDescFunction(ASTNode ast) throws SemanticException { String funcName; boolean isExtended; - if(ast.getChildCount() == 1) { + if (ast.getChildCount() == 1) { funcName = stripQuotes(ast.getChild(0).getText()); isExtended = false; - } else if(ast.getChildCount() == 2) { + } else if (ast.getChildCount() == 2) { funcName = stripQuotes(ast.getChild(0).getText()); isExtended = true; } else { @@ -409,57 +429,61 @@ } descFunctionDesc descFuncDesc = new descFunctionDesc(ctx.getResFile(), - funcName, isExtended); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), descFuncDesc), conf)); + funcName, isExtended); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + descFuncDesc), conf)); setFetchTask(createFetchTask(descFuncDesc.getSchema())); } - private void analyzeAlterTableRename(ASTNode ast) - throws SemanticException { - alterTableDesc alterTblDesc = new alterTableDesc( - unescapeIdentifier(ast.getChild(0).getText()), - unescapeIdentifier(ast.getChild(1).getText())); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); + private void analyzeAlterTableRename(ASTNode ast) throws SemanticException { + alterTableDesc alterTblDesc = new alterTableDesc(unescapeIdentifier(ast + .getChild(0).getText()), unescapeIdentifier(ast.getChild(1).getText())); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + alterTblDesc), conf)); } - - private void analyzeAlterTableRenameCol(ASTNode ast) - throws SemanticException { + + private void analyzeAlterTableRenameCol(ASTNode ast) throws SemanticException { String tblName = unescapeIdentifier(ast.getChild(0).getText()); - String newComment =null; - String newType =null; + String newComment = null; + String newType = null; newType = getTypeStringFromAST((ASTNode) ast.getChild(3)); - boolean first =false; + boolean first = false; String flagCol = null; ASTNode positionNode = null; - if(ast.getChildCount() == 6) { + if (ast.getChildCount() == 6) { newComment = unescapeSQLString(ast.getChild(4).getText()); positionNode = (ASTNode) ast.getChild(5); } else if (ast.getChildCount() == 5) { - if(ast.getChild(4).getType()==HiveParser.StringLiteral) + if (ast.getChild(4).getType() == HiveParser.StringLiteral) { newComment = unescapeSQLString(ast.getChild(4).getText()); - else + } else { positionNode = (ASTNode) ast.getChild(4); + } } - - if(positionNode!= null) { - if (positionNode.getChildCount() == 0) + + if (positionNode != null) { + if (positionNode.getChildCount() == 0) { first = true; - else + } else { flagCol = unescapeIdentifier(positionNode.getChild(0).getText()); + } } - + alterTableDesc alterTblDesc = new alterTableDesc(tblName, - unescapeIdentifier(ast.getChild(1).getText()), - unescapeIdentifier(ast.getChild(2).getText()), newType, newComment, first, flagCol); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); + unescapeIdentifier(ast.getChild(1).getText()), unescapeIdentifier(ast + .getChild(2).getText()), newType, newComment, first, flagCol); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + alterTblDesc), conf)); } - private void analyzeAlterTableModifyCols(ASTNode ast, alterTableTypes alterType) - throws SemanticException { + private void analyzeAlterTableModifyCols(ASTNode ast, + alterTableTypes alterType) throws SemanticException { String tblName = unescapeIdentifier(ast.getChild(0).getText()); - List newCols = getColumns((ASTNode)ast.getChild(1)); - alterTableDesc alterTblDesc = new alterTableDesc(tblName, newCols, alterType); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); + List newCols = getColumns((ASTNode) ast.getChild(1)); + alterTableDesc alterTblDesc = new alterTableDesc(tblName, newCols, + alterType); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + alterTblDesc), conf)); } private void analyzeAlterTableDropParts(ASTNode ast) throws SemanticException { @@ -467,21 +491,25 @@ // get table metadata List> partSpecs = getPartitionSpecs(ast); dropTableDesc dropTblDesc = new dropTableDesc(tblName, partSpecs); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), dropTblDesc), conf)); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + dropTblDesc), conf)); } /** - * Add one or more partitions to a table. Useful - * when the data has been copied to the right location - * by some other process. - * @param ast The parsed command tree. - * @throws SemanticException Parsin failed + * Add one or more partitions to a table. Useful when the data has been copied + * to the right location by some other process. + * + * @param ast + * The parsed command tree. + * @throws SemanticException + * Parsin failed */ private void analyzeAlterTableAddParts(CommonTree ast) - throws SemanticException { + throws SemanticException { - String tblName = unescapeIdentifier(ast.getChild(0).getText());; - //partition name to value + String tblName = unescapeIdentifier(ast.getChild(0).getText()); + ; + // partition name to value List> partSpecs = getPartitionSpecs(ast); Iterator> partIter = partSpecs.iterator(); @@ -491,21 +519,22 @@ int numCh = ast.getChildCount(); for (int num = 1; num < numCh; num++) { - CommonTree child = (CommonTree)ast.getChild(num); + CommonTree child = (CommonTree) ast.getChild(num); switch (child.getToken().getType()) { case HiveParser.TOK_PARTSPEC: - if(currentPart != null) { - AddPartitionDesc addPartitionDesc = - new AddPartitionDesc(MetaStoreUtils.DEFAULT_DATABASE_NAME, - tblName, currentPart, currentLocation); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), addPartitionDesc), conf)); + if (currentPart != null) { + AddPartitionDesc addPartitionDesc = new AddPartitionDesc( + MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, currentPart, + currentLocation); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + addPartitionDesc), conf)); } - //create new partition, set values + // create new partition, set values currentLocation = null; currentPart = partIter.next(); break; case HiveParser.TOK_PARTITIONLOCATION: - //if location specified, set in partition + // if location specified, set in partition currentLocation = unescapeSQLString(child.getChild(0).getText()); break; default: @@ -513,25 +542,28 @@ } } - //add the last one - if(currentPart != null) { - AddPartitionDesc addPartitionDesc = - new AddPartitionDesc(MetaStoreUtils.DEFAULT_DATABASE_NAME, - tblName, currentPart, currentLocation); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), addPartitionDesc), conf)); + // add the last one + if (currentPart != null) { + AddPartitionDesc addPartitionDesc = new AddPartitionDesc( + MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, currentPart, + currentLocation); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + addPartitionDesc), conf)); } } /** - * Verify that the information in the metastore matches up - * with the data on the fs. - * @param ast Query tree. + * Verify that the information in the metastore matches up with the data on + * the fs. + * + * @param ast + * Query tree. * @throws SemanticException */ private void analyzeMetastoreCheck(CommonTree ast) throws SemanticException { String tableName = null; boolean repair = false; - if(ast.getChildCount() > 0) { + if (ast.getChildCount() > 0) { repair = ast.getChild(0).getType() == HiveParser.KW_REPAIR; if (!repair) { tableName = unescapeIdentifier(ast.getChild(0).getText()); @@ -540,24 +572,29 @@ } } List> specs = getPartitionSpecs(ast); - MsckDesc checkDesc = new MsckDesc(tableName, specs, ctx.getResFile(), repair); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), checkDesc), conf)); + MsckDesc checkDesc = new MsckDesc(tableName, specs, ctx.getResFile(), + repair); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + checkDesc), conf)); } /** * Get the partition specs from the tree. - * @param ast Tree to extract partitions from. + * + * @param ast + * Tree to extract partitions from. * @return A list of partition name to value mappings. * @throws SemanticException */ - private List> getPartitionSpecs(CommonTree ast) throws SemanticException { + private List> getPartitionSpecs(CommonTree ast) + throws SemanticException { List> partSpecs = new ArrayList>(); int childIndex = 0; // get partition metadata if partition specified for (childIndex = 1; childIndex < ast.getChildCount(); childIndex++) { Tree partspec = ast.getChild(childIndex); - //sanity check - if(partspec.getType() == HiveParser.TOK_PARTSPEC) { + // sanity check + if (partspec.getType() == HiveParser.TOK_PARTSPEC) { Map partSpec = new LinkedHashMap(); for (int i = 0; i < partspec.getChildCount(); ++i) { CommonTree partspec_val = (CommonTree) partspec.getChild(i); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java (working copy) @@ -21,14 +21,14 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; /** - * This class implements the context information that is used for typechecking phase - * in query compilation. + * This class implements the context information that is used for typechecking + * phase in query compilation. */ public class TypeCheckCtx implements NodeProcessorCtx { - + /** - * The row resolver of the previous operator. This field is used to generate expression - * descriptors from the expression ASTs. + * The row resolver of the previous operator. This field is used to generate + * expression descriptors from the expression ASTs. */ private RowResolver inputRR; @@ -41,19 +41,21 @@ * Potential typecheck error reason. */ private String error; - + /** * Constructor. * - * @param inputRR The input row resolver of the previous operator. + * @param inputRR + * The input row resolver of the previous operator. */ public TypeCheckCtx(RowResolver inputRR) { - this.setInputRR(inputRR); - this.error = null; + setInputRR(inputRR); + error = null; } /** - * @param inputRR the inputRR to set + * @param inputRR + * the inputRR to set */ public void setInputRR(RowResolver inputRR) { this.inputRR = inputRR; @@ -67,7 +69,8 @@ } /** - * @param unparseTranslator the unparseTranslator to set + * @param unparseTranslator + * the unparseTranslator to set */ public void setUnparseTranslator(UnparseTranslator unparseTranslator) { this.unparseTranslator = unparseTranslator; @@ -79,9 +82,10 @@ public UnparseTranslator getUnparseTranslator() { return unparseTranslator; } - + /** - * @param error the error to set + * @param error + * the error to set */ public void setError(String error) { this.error = error; Index: ql/src/java/org/apache/hadoop/hive/ql/Driver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java (working copy) @@ -21,50 +21,54 @@ import java.io.DataInput; import java.io.IOException; import java.io.Serializable; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.Set; +import java.util.Vector; -import org.apache.hadoop.hive.ql.parse.ASTNode; - import org.apache.commons.lang.StringUtils; - +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Schema; -import org.apache.hadoop.hive.ql.parse.ParseDriver; -import org.apache.hadoop.hive.ql.parse.ParseUtils; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.parse.ParseException; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; -import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory; -import org.apache.hadoop.hive.ql.parse.ErrorMsg; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.ExecDriver; +import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.exec.TaskResult; import org.apache.hadoop.hive.ql.exec.TaskRunner; -import org.apache.hadoop.hive.ql.exec.TaskResult; -import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; +import org.apache.hadoop.hive.ql.hooks.PostExecute; import org.apache.hadoop.hive.ql.hooks.PreExecute; -import org.apache.hadoop.hive.ql.hooks.PostExecute; - -import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.ErrorMsg; +import org.apache.hadoop.hive.ql.parse.ParseDriver; +import org.apache.hadoop.hive.ql.parse.ParseException; +import org.apache.hadoop.hive.ql.parse.ParseUtils; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.tableDesc; import org.apache.hadoop.hive.ql.processors.CommandProcessor; -import org.apache.hadoop.hive.ql.plan.tableDesc; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.security.UnixUserGroupInformation; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - public class Driver implements CommandProcessor { static final private Log LOG = LogFactory.getLog(Driver.class.getName()); @@ -81,31 +85,35 @@ private String SQLState; // A limit on the number of threads that can be launched - private int maxthreads = 8; - private int sleeptime = 2000; + private final int maxthreads = 8; + private final int sleeptime = 2000; public void init() { Operator.resetId(); } - + public int countJobs(List> tasks) { return countJobs(tasks, new ArrayList>()); } - public int countJobs(List> tasks, List> seenTasks) { - if (tasks == null) + public int countJobs(List> tasks, + List> seenTasks) { + if (tasks == null) { return 0; + } int jobs = 0; for (Task task : tasks) { if (!seenTasks.contains(task)) { seenTasks.add(task); - - if(task instanceof ConditionalTask) - jobs +=countJobs(((ConditionalTask)task).getListTasks(), seenTasks); - else if (task.isMapRedTask()) { //this may be true for conditional task, but we will not inc the counter + + if (task instanceof ConditionalTask) { + jobs += countJobs(((ConditionalTask) task).getListTasks(), seenTasks); + } else if (task.isMapRedTask()) { // this may be true for conditional + // task, but we will not inc the + // counter jobs++; } - + jobs += countJobs(task.getChildTasks(), seenTasks); } } @@ -121,8 +129,7 @@ JobConf job = new JobConf(conf, ExecDriver.class); JobClient jc = new JobClient(job); cs = jc.getClusterStatus(); - } - catch (Exception e) { + } catch (Exception e) { e.printStackTrace(); throw e; } @@ -147,28 +154,30 @@ tableDesc td = ft.getTblDesc(); // partitioned tables don't have tableDesc set on the FetchTask. Instead - // they have a list of PartitionDesc objects, each with a table desc. Let's - // try to fetch the desc for the first partition and use it's deserializer. - if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) { + // they have a list of PartitionDesc objects, each with a table desc. + // Let's + // try to fetch the desc for the first partition and use it's + // deserializer. + if (td == null && ft.getWork() != null + && ft.getWork().getPartDesc() != null) { if (ft.getWork().getPartDesc().size() > 0) { td = ft.getWork().getPartDesc().get(0).getTableDesc(); } } if (td == null) { - throw new Exception("No table description found for fetch task: " + ft); + throw new Exception("No table description found for fetch task: " + + ft); } String tableName = "result"; List lst = MetaStoreUtils.getFieldsFromDeserializer( tableName, td.getDeserializer()); schema = new Schema(lst, null); - } - else { + } else { schema = new Schema(); } - } - catch (Exception e) { + } catch (Exception e) { e.printStackTrace(); throw e; } @@ -182,18 +191,17 @@ public Schema getThriftSchema() throws Exception { Schema schema; try { - schema = this.getSchema(); + schema = getSchema(); if (schema != null) { - List lst = schema.getFieldSchemas(); - // Go over the schema and convert type to thrift type - if (lst != null) { - for (FieldSchema f : lst) { - f.setType(MetaStoreUtils.typeToThriftType(f.getType())); + List lst = schema.getFieldSchemas(); + // Go over the schema and convert type to thrift type + if (lst != null) { + for (FieldSchema f : lst) { + f.setType(MetaStoreUtils.typeToThriftType(f.getType())); } - } + } } - } - catch (Exception e) { + } catch (Exception e) { e.printStackTrace(); throw e; } @@ -216,8 +224,9 @@ } public boolean hasReduceTasks(List> tasks) { - if (tasks == null) + if (tasks == null) { return false; + } boolean hasReduce = false; for (Task task : tasks) { @@ -254,9 +263,11 @@ } /** - * Compile a new query. Any currently-planned query associated with this Driver is discarded. - * - * @param command The SQL query to compile. + * Compile a new query. Any currently-planned query associated with this + * Driver is discarded. + * + * @param command + * The SQL query to compile. */ public int compile(String command) { if (plan != null) { @@ -267,7 +278,7 @@ TaskFactory.resetId(); try { - ctx = new Context (conf); + ctx = new Context(conf); ParseDriver pd = new ParseDriver(); ASTNode tree = pd.parse(command, ctx); @@ -322,25 +333,27 @@ SQLState = null; int ret = compile(command); - if (ret != 0) + if (ret != 0) { return new DriverResponse(ret, errorMessage, SQLState); + } ret = execute(); - if (ret != 0) + if (ret != 0) { return new DriverResponse(ret, errorMessage, SQLState); + } return new DriverResponse(ret); } /** * Encapsulates the basic response info returned by the Driver. Typically - * errorMessage and SQLState will only be set if - * the responseCode is not 0. + * errorMessage and SQLState will only be set if the + * responseCode is not 0. */ public class DriverResponse { - private int responseCode; - private String errorMessage; - private String SQLState; + private final int responseCode; + private final String errorMessage; + private final String SQLState; public DriverResponse(int responseCode) { this(responseCode, null, null); @@ -352,23 +365,33 @@ this.SQLState = SQLState; } - public int getResponseCode() { return responseCode; } - public String getErrorMessage() { return errorMessage; } - public String getSQLState() { return SQLState; } + public int getResponseCode() { + return responseCode; + } + + public String getErrorMessage() { + return errorMessage; + } + + public String getSQLState() { + return SQLState; + } } private List getPreExecHooks() throws Exception { ArrayList pehooks = new ArrayList(); String pestr = conf.getVar(HiveConf.ConfVars.PREEXECHOOKS); pestr = pestr.trim(); - if (pestr.equals("")) + if (pestr.equals("")) { return pehooks; + } String[] peClasses = pestr.split(","); - for(String peClass: peClasses) { + for (String peClass : peClasses) { try { - pehooks.add((PreExecute)Class.forName(peClass.trim(), true, JavaUtils.getClassLoader()).newInstance()); + pehooks.add((PreExecute) Class.forName(peClass.trim(), true, + JavaUtils.getClassLoader()).newInstance()); } catch (ClassNotFoundException e) { console.printError("Pre Exec Hook Class not found:" + e.getMessage()); throw e; @@ -382,14 +405,16 @@ ArrayList pehooks = new ArrayList(); String pestr = conf.getVar(HiveConf.ConfVars.POSTEXECHOOKS); pestr = pestr.trim(); - if (pestr.equals("")) + if (pestr.equals("")) { return pehooks; + } String[] peClasses = pestr.split(","); - for(String peClass: peClasses) { + for (String peClass : peClasses) { try { - pehooks.add((PostExecute)Class.forName(peClass.trim(), true, JavaUtils.getClassLoader()).newInstance()); + pehooks.add((PostExecute) Class.forName(peClass.trim(), true, + JavaUtils.getClassLoader()).newInstance()); } catch (ClassNotFoundException e) { console.printError("Post Exec Hook Class not found:" + e.getMessage()); throw e; @@ -404,7 +429,7 @@ .getVar(HiveConf.ConfVars.HADOOPJOBNAME)); int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH); - int curJobNo=0; + int curJobNo = 0; String queryId = plan.getQueryId(); String queryStr = plan.getQueryStr(); @@ -418,7 +443,8 @@ plan.setStarted(); if (SessionState.get() != null) { - SessionState.get().getHiveHistory().startQuery(queryStr, conf.getVar(HiveConf.ConfVars.HIVEQUERYID) ); + SessionState.get().getHiveHistory().startQuery(queryStr, + conf.getVar(HiveConf.ConfVars.HIVEQUERYID)); SessionState.get().getHiveHistory().logPlanProgress(plan); } resStream = null; @@ -426,34 +452,36 @@ BaseSemanticAnalyzer sem = plan.getPlan(); // Get all the pre execution hooks and execute them. - for(PreExecute peh: getPreExecHooks()) { - peh.run(SessionState.get(), - sem.getInputs(), sem.getOutputs(), - UnixUserGroupInformation.readFromConf(conf, UnixUserGroupInformation.UGI_PROPERTY_NAME)); + for (PreExecute peh : getPreExecHooks()) { + peh.run(SessionState.get(), sem.getInputs(), sem.getOutputs(), + UnixUserGroupInformation.readFromConf(conf, + UnixUserGroupInformation.UGI_PROPERTY_NAME)); } int jobs = countJobs(sem.getRootTasks()); if (jobs > 0) { console.printInfo("Total MapReduce jobs = " + jobs); } - if (SessionState.get() != null){ + if (SessionState.get() != null) { SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_NUM_TASKS, String.valueOf(jobs)); - SessionState.get().getHiveHistory().setIdToTableMap(sem.getIdToTableNameMap()); + SessionState.get().getHiveHistory().setIdToTableMap( + sem.getIdToTableNameMap()); } String jobname = Utilities.abbreviate(queryStr, maxlen - 6); - // A runtime that launches runnable tasks as separate Threads through TaskRunners + // A runtime that launches runnable tasks as separate Threads through + // TaskRunners // As soon as a task isRunnable, it is put in a queue // At any time, at most maxthreads tasks can be running // The main thread polls the TaskRunners to check if they have finished. Queue> runnable = new LinkedList>(); - Map running = new HashMap (); + Map running = new HashMap(); - DriverContext driverCxt = new DriverContext(runnable); + DriverContext driverCxt = new DriverContext(runnable); - //Add root Tasks to runnable + // Add root Tasks to runnable for (Task tsk : sem.getRootTasks()) { driverCxt.addToRunnable(tsk); @@ -461,11 +489,12 @@ // Loop while you either have tasks running, or tasks queued up - while (running.size() != 0 || runnable.peek()!=null) { + while (running.size() != 0 || runnable.peek() != null) { // Launch upto maxthreads tasks - while(runnable.peek() != null && running.size() < maxthreads) { + while (runnable.peek() != null && running.size() < maxthreads) { Task tsk = runnable.remove(); - curJobNo = launchTask(tsk, queryId, noName,running, jobname, jobs, curJobNo, driverCxt); + curJobNo = launchTask(tsk, queryId, noName, running, jobname, jobs, + curJobNo, driverCxt); } // poll the Tasks to see which one completed @@ -474,13 +503,13 @@ Task tsk = tskRun.getTask(); int exitVal = tskRes.getExitVal(); - if(exitVal != 0) { - //TODO: This error messaging is not very informative. Fix that. + if (exitVal != 0) { + // TODO: This error messaging is not very informative. Fix that. errorMessage = "FAILED: Execution Error, return code " + exitVal - + " from " + tsk.getClass().getName(); + + " from " + tsk.getClass().getName(); SQLState = "08S01"; console.printError(errorMessage); - if(running.size() !=0) { + if (running.size() != 0) { taskCleanup(); } return 9; @@ -494,7 +523,7 @@ if (tsk.getChildTasks() != null) { for (Task child : tsk.getChildTasks()) { - if(DriverContext.isLaunchable(child)) { + if (DriverContext.isLaunchable(child)) { driverCxt.addToRunnable(child); } } @@ -502,22 +531,23 @@ } // Get all the post execution hooks and execute them. - for(PostExecute peh: getPostExecHooks()) { - peh.run(SessionState.get(), - sem.getInputs(), sem.getOutputs(), - UnixUserGroupInformation.readFromConf(conf, UnixUserGroupInformation.UGI_PROPERTY_NAME)); + for (PostExecute peh : getPostExecHooks()) { + peh.run(SessionState.get(), sem.getInputs(), sem.getOutputs(), + UnixUserGroupInformation.readFromConf(conf, + UnixUserGroupInformation.UGI_PROPERTY_NAME)); } - if (SessionState.get() != null){ + if (SessionState.get() != null) { SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(0)); SessionState.get().getHiveHistory().printRowCount(queryId); } } catch (Exception e) { - if (SessionState.get() != null) + if (SessionState.get() != null) { SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(12)); - //TODO: do better with handling types of Exception here + } + // TODO: do better with handling types of Exception here errorMessage = "FAILED: Unknown exception : " + e.getMessage(); SQLState = "08S01"; console.printError(errorMessage, "\n" @@ -546,59 +576,62 @@ /** * Launches a new task * - * @param tsk task being launched - * @param queryId Id of the query containing the task - * @param noName whether the task has a name set - * @param running map from taskresults to taskrunners - * @param jobname name of the task, if it is a map-reduce job - * @param jobs number of map-reduce jobs - * @param curJobNo the sequential number of the next map-reduce job - * @return the updated number of last the map-reduce job launched + * @param tsk + * task being launched + * @param queryId + * Id of the query containing the task + * @param noName + * whether the task has a name set + * @param running + * map from taskresults to taskrunners + * @param jobname + * name of the task, if it is a map-reduce job + * @param jobs + * number of map-reduce jobs + * @param curJobNo + * the sequential number of the next map-reduce job + * @return the updated number of last the map-reduce job launched */ + public int launchTask(Task tsk, String queryId, + boolean noName, Map running, String jobname, + int jobs, int curJobNo, DriverContext cxt) { - - public int launchTask(Task tsk, String queryId, - boolean noName, Map running, String jobname, - int jobs, int curJobNo, DriverContext cxt) { - if (SessionState.get() != null) { SessionState.get().getHiveHistory().startTask(queryId, tsk, - tsk.getClass().getName()); + tsk.getClass().getName()); } if (tsk.isMapRedTask() && !(tsk instanceof ConditionalTask)) { if (noName) { - conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname + "(" - + tsk.getId() + ")"); + conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname + "(" + + tsk.getId() + ")"); } curJobNo++; - console.printInfo("Launching Job " + curJobNo + " out of "+jobs); + console.printInfo("Launching Job " + curJobNo + " out of " + jobs); } tsk.initialize(conf, plan, cxt); TaskResult tskRes = new TaskResult(); - TaskRunner tskRun = new TaskRunner(tsk,tskRes); + TaskRunner tskRun = new TaskRunner(tsk, tskRes); - //Launch Task - if(HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) && tsk.isMapRedTask()) { + // Launch Task + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) + && tsk.isMapRedTask()) { // Launch it in the parallel mode, as a separate thread only for MR tasks tskRun.start(); - } - else - { + } else { tskRun.runSequential(); } - running.put(tskRes,tskRun); + running.put(tskRes, tskRun); return curJobNo; } - /** * Cleans up remaining tasks in case of failure */ - + public void taskCleanup() { - // The currently existing Shutdown hooks will be automatically called, - // killing the map-reduce processes. + // The currently existing Shutdown hooks will be automatically called, + // killing the map-reduce processes. // The non MR processes will be killed as well. System.exit(9); } @@ -606,16 +639,17 @@ /** * Polls running tasks to see if a task has ended. * - * @param results Set of result objects for running tasks - * @return The result object for any completed/failed task + * @param results + * Set of result objects for running tasks + * @return The result object for any completed/failed task */ public TaskResult pollTasks(Set results) { Iterator resultIterator = results.iterator(); - while(true) { - while(resultIterator.hasNext()) { + while (true) { + while (resultIterator.hasNext()) { TaskResult tskRes = resultIterator.next(); - if(tskRes.isRunning() == false) { + if (tskRes.isRunning() == false) { return tskRes; } } @@ -624,9 +658,8 @@ // Sleep 10 seconds and restart try { Thread.sleep(sleeptime); - } - catch (InterruptedException ie) { - //Do Nothing + } catch (InterruptedException ie) { + // Do Nothing ; } resultIterator = results.iterator(); @@ -645,30 +678,34 @@ return ft.fetch(res); } - if (resStream == null) + if (resStream == null) { resStream = ctx.getStream(); - if (resStream == null) + } + if (resStream == null) { return false; + } int numRows = 0; String row = null; while (numRows < maxRows) { if (resStream == null) { - if (numRows > 0) + if (numRows > 0) { return true; - else + } else { return false; + } } bos.reset(); Utilities.streamStatus ss; try { ss = Utilities.readColumn(resStream, bos); - if (bos.getCount() > 0) + if (bos.getCount() > 0) { row = new String(bos.getData(), 0, bos.getCount(), "UTF-8"); - else if (ss == Utilities.streamStatus.TERMINATED) + } else if (ss == Utilities.streamStatus.TERMINATED) { row = new String(); + } if (row != null) { numRows++; @@ -681,8 +718,9 @@ return false; } - if (ss == Utilities.streamStatus.EOF) + if (ss == Utilities.streamStatus.EOF) { resStream = ctx.getStream(); + } } return true; } @@ -699,7 +737,8 @@ return (0); } - public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan() throws IOException { + public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan() + throws IOException { return plan.getQueryPlan(); } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/SoftCache.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/SoftCache.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/SoftCache.java (working copy) @@ -64,249 +64,265 @@ */ package org.apache.hadoop.hive.ql.util.jdbm.helper; +import java.lang.ref.Reference; import java.lang.ref.ReferenceQueue; import java.lang.ref.SoftReference; -import java.lang.ref.Reference; import java.util.Enumeration; +import java.util.HashMap; import java.util.Map; -import java.util.HashMap; /** * Wraps a deterministic cache policy with a Level-2 cache based on - * J2SE's {@link SoftReference soft references}. Soft references allow - * this cache to keep references to objects until the memory they occupy - * is required elsewhere. + * J2SE's {@link SoftReference soft references}. Soft references allow this + * cache to keep references to objects until the memory they occupy is required + * elsewhere. *

- * Since the {@link CachePolicy} interface requires an event be fired - * when an object is evicted, and the event contains the actual object, - * this class cannot be a stand-alone implementation of - * CachePolicy. This limitation arises because Java References - * does not support notification before references are cleared; nor do - * they support reaching soft referents. Therefore, this wrapper cache - * aggressively notifies evictions: events are fired when the objects are - * evicted from the internal cache. Consequently, the soft cache may return - * a non-null object when get( ) is called, even if that - * object was said to have been evicted. + * Since the {@link CachePolicy} interface requires an event be fired when an + * object is evicted, and the event contains the actual object, this class + * cannot be a stand-alone implementation of CachePolicy. This + * limitation arises because Java References does not support notification + * before references are cleared; nor do they support reaching soft referents. + * Therefore, this wrapper cache aggressively notifies evictions: events are + * fired when the objects are evicted from the internal cache. Consequently, the + * soft cache may return a non-null object when get( ) is called, + * even if that object was said to have been evicted. *

- * The current implementation uses a hash structure for its internal key - * to value mappings. + * The current implementation uses a hash structure for its internal key to + * value mappings. *

* Note: this component's publicly exposed methods are not threadsafe; * potentially concurrent code should synchronize on the cache instance. - * + * * @author Dilum Ranatunga * @version $Id: SoftCache.java,v 1.1 2003/11/01 13:29:27 dranatunga Exp $ */ public class SoftCache implements CachePolicy { - private static final int INITIAL_CAPACITY = 128; - private static final float DEFAULT_LOAD_FACTOR = 1.5f; + private static final int INITIAL_CAPACITY = 128; + private static final float DEFAULT_LOAD_FACTOR = 1.5f; - private final ReferenceQueue _clearQueue = new ReferenceQueue(); - private final CachePolicy _internal; - private final Map _cacheMap; + private final ReferenceQueue _clearQueue = new ReferenceQueue(); + private final CachePolicy _internal; + private final Map _cacheMap; - /** - * Creates a soft-reference based L2 cache with a {@link MRU} cache as - * the internal (L1) cache. The soft reference cache uses the - * default load capacity of 1.5f, which is intended to sacrifice some - * performance for space. This compromise is reasonable, since all - * {@link #get(Object) get( )s} first try the L1 cache anyway. The - * internal MRU is given a capacity of 128 elements. - */ - public SoftCache() { - this(new MRU(INITIAL_CAPACITY)); - } + /** + * Creates a soft-reference based L2 cache with a {@link MRU} cache as the + * internal (L1) cache. The soft reference cache uses the default load + * capacity of 1.5f, which is intended to sacrifice some performance for + * space. This compromise is reasonable, since all {@link #get(Object) get( + * )s} first try the L1 cache anyway. The internal MRU is given a capacity of + * 128 elements. + */ + public SoftCache() { + this(new MRU(INITIAL_CAPACITY)); + } - /** - * Creates a soft-reference based L2 cache wrapping the specified - * L1 cache. - * - * @param internal non null internal cache. - * @throws NullPointerException if the internal cache is null. - */ - public SoftCache(CachePolicy internal) throws NullPointerException { - this(DEFAULT_LOAD_FACTOR, internal); - } + /** + * Creates a soft-reference based L2 cache wrapping the specified L1 cache. + * + * @param internal + * non null internal cache. + * @throws NullPointerException + * if the internal cache is null. + */ + public SoftCache(CachePolicy internal) throws NullPointerException { + this(DEFAULT_LOAD_FACTOR, internal); + } - /** - * Creates a soft-reference based L2 cache wrapping the specified - * L1 cache. This constructor is somewhat implementation-specific, - * so users are encouraged to use {@link #SoftCache(CachePolicy)} - * instead. - * - * @param loadFactor load factor that the soft cache's hash structure - * should use. - * @param internal non null internal cache. - * @throws IllegalArgumentException if the load factor is nonpositive. - * @throws NullPointerException if the internal cache is null. - */ - public SoftCache(float loadFactor, CachePolicy internal) throws IllegalArgumentException, NullPointerException { - if (internal == null) { - throw new NullPointerException("Internal cache cannot be null."); - } - _internal = internal; - _cacheMap = new HashMap(INITIAL_CAPACITY, loadFactor); + /** + * Creates a soft-reference based L2 cache wrapping the specified L1 cache. + * This constructor is somewhat implementation-specific, so users are + * encouraged to use {@link #SoftCache(CachePolicy)} instead. + * + * @param loadFactor + * load factor that the soft cache's hash structure should use. + * @param internal + * non null internal cache. + * @throws IllegalArgumentException + * if the load factor is nonpositive. + * @throws NullPointerException + * if the internal cache is null. + */ + public SoftCache(float loadFactor, CachePolicy internal) + throws IllegalArgumentException, NullPointerException { + if (internal == null) { + throw new NullPointerException("Internal cache cannot be null."); } + _internal = internal; + _cacheMap = new HashMap(INITIAL_CAPACITY, loadFactor); + } - /** - * Adds the specified value to the cache under the specified key. Note - * that the object is added to both this and the internal cache. - * @param key the (non-null) key to store the object under - * @param value the (non-null) object to place in the cache - * @throws CacheEvictionException exception that the internal cache - * would have experienced while evicting an object it currently - * cached. - */ - public void put(Object key, Object value) throws CacheEvictionException { - if (key == null) { - throw new IllegalArgumentException("key cannot be null."); - } else if (value == null) { - throw new IllegalArgumentException("value cannot be null."); - } - _internal.put(key, value); - removeClearedEntries(); - _cacheMap.put(key, new Entry(key, value, _clearQueue)); + /** + * Adds the specified value to the cache under the specified key. Note that + * the object is added to both this and the internal cache. + * + * @param key + * the (non-null) key to store the object under + * @param value + * the (non-null) object to place in the cache + * @throws CacheEvictionException + * exception that the internal cache would have experienced while + * evicting an object it currently cached. + */ + public void put(Object key, Object value) throws CacheEvictionException { + if (key == null) { + throw new IllegalArgumentException("key cannot be null."); + } else if (value == null) { + throw new IllegalArgumentException("value cannot be null."); } + _internal.put(key, value); + removeClearedEntries(); + _cacheMap.put(key, new Entry(key, value, _clearQueue)); + } - /** - * Gets the object cached under the specified key. - *

- * The cache is looked up in the following manner: - *

    - *
  1. The internal (L1) cache is checked. If the object is found, it is - * returned.
  2. - *
  3. This (L2) cache is checked. If the object is not found, then - * the caller is informed that the object is inaccessible.
  4. - *
  5. Since the object exists in L2, but not in L1, the object is - * readded to L1 using {@link CachePolicy#put(Object, Object)}.
  6. - *
  7. If the readding succeeds, the value is returned to caller.
  8. - *
  9. If a cache eviction exception is encountered instead, we - * remove the object from L2 and behave as if the object was - * inaccessible.
  10. - *
- * @param key the key that the object was stored under. - * @return the object stored under the key specified; null if the - * object is not (nolonger) accessible via this cache. - */ - public Object get(Object key) { - // first try the internal cache. - Object value = _internal.get(key); - if (value != null) { - return value; - } - // poll and remove cleared references. - removeClearedEntries(); - Entry entry = (Entry)_cacheMap.get(key); - if (entry == null) { // object is not in cache. - return null; - } - value = entry.getValue(); - if (value == null) { // object was in cache, but it was cleared. - return null; - } - // we have the object. so we try to re-insert it into internal cache - try { - _internal.put(key, value); - } catch (CacheEvictionException e) { - // if the internal cache causes a fuss, we kick the object out. - _cacheMap.remove(key); - return null; - } - return value; + /** + * Gets the object cached under the specified key. + *

+ * The cache is looked up in the following manner: + *

    + *
  1. The internal (L1) cache is checked. If the object is found, it is + * returned.
  2. + *
  3. This (L2) cache is checked. If the object is not found, then the caller + * is informed that the object is inaccessible.
  4. + *
  5. Since the object exists in L2, but not in L1, the object is readded to + * L1 using {@link CachePolicy#put(Object, Object)}.
  6. + *
  7. If the readding succeeds, the value is returned to caller.
  8. + *
  9. If a cache eviction exception is encountered instead, we remove the + * object from L2 and behave as if the object was inaccessible.
  10. + *
+ * + * @param key + * the key that the object was stored under. + * @return the object stored under the key specified; null if the object is + * not (nolonger) accessible via this cache. + */ + public Object get(Object key) { + // first try the internal cache. + Object value = _internal.get(key); + if (value != null) { + return value; } - - /** - * Removes any object stored under the key specified. Note that the - * object is removed from both this (L2) and the internal (L1) - * cache. - * @param key the key whose object should be removed - */ - public void remove(Object key) { - _cacheMap.remove(key); - _internal.remove(key); + // poll and remove cleared references. + removeClearedEntries(); + Entry entry = (Entry) _cacheMap.get(key); + if (entry == null) { // object is not in cache. + return null; } - - /** - * Removes all objects in this (L2) and its internal (L1) cache. - */ - public void removeAll() { - _cacheMap.clear(); - _internal.removeAll(); + value = entry.getValue(); + if (value == null) { // object was in cache, but it was cleared. + return null; } - - /** - * Gets all the objects stored by the internal (L1) cache. - * @return an enumeration of objects in internal cache. - */ - public Enumeration elements() { - return _internal.elements(); + // we have the object. so we try to re-insert it into internal cache + try { + _internal.put(key, value); + } catch (CacheEvictionException e) { + // if the internal cache causes a fuss, we kick the object out. + _cacheMap.remove(key); + return null; } + return value; + } - /** - * Adds the specified listener to this cache. Note that the events - * fired by this correspond to the internal cache's events. - * @param listener the (non-null) listener to add to this policy - * @throws IllegalArgumentException if listener is null. - */ - public void addListener(CachePolicyListener listener) - throws IllegalArgumentException { - _internal.addListener(listener); + /** + * Removes any object stored under the key specified. Note that the object is + * removed from both this (L2) and the internal (L1) cache. + * + * @param key + * the key whose object should be removed + */ + public void remove(Object key) { + _cacheMap.remove(key); + _internal.remove(key); + } + + /** + * Removes all objects in this (L2) and its internal (L1) cache. + */ + public void removeAll() { + _cacheMap.clear(); + _internal.removeAll(); + } + + /** + * Gets all the objects stored by the internal (L1) cache. + * + * @return an enumeration of objects in internal cache. + */ + public Enumeration elements() { + return _internal.elements(); + } + + /** + * Adds the specified listener to this cache. Note that the events fired by + * this correspond to the internal cache's events. + * + * @param listener + * the (non-null) listener to add to this policy + * @throws IllegalArgumentException + * if listener is null. + */ + public void addListener(CachePolicyListener listener) + throws IllegalArgumentException { + _internal.addListener(listener); + } + + /** + * Removes a listener that was added earlier. + * + * @param listener + * the listener to remove. + */ + public void removeListener(CachePolicyListener listener) { + _internal.removeListener(listener); + } + + /** + * Cleans the mapping structure of any obsolete entries. This is usually + * called before insertions and lookups on the mapping structure. The runtime + * of this is usually very small, but it can be as expensive as n * log(n) if + * a large number of soft references were recently cleared. + */ + private final void removeClearedEntries() { + for (Reference r = _clearQueue.poll(); r != null; r = _clearQueue.poll()) { + Object key = ((Entry) r).getKey(); + _cacheMap.remove(key); } + } + /** + * Value objects we keep in the internal map. This contains the key in + * addition to the value, because polling for cleared references returns these + * instances, and having access to their corresponding keys drastically + * improves the performance of removing the pair from the map (see + * {@link SoftCache#removeClearedEntries()}.) + */ + private static class Entry extends SoftReference { + private final Object _key; + /** - * Removes a listener that was added earlier. - * @param listener the listener to remove. + * Constructor that uses value as the soft reference's + * referent. */ - public void removeListener(CachePolicyListener listener) { - _internal.removeListener(listener); + public Entry(Object key, Object value, ReferenceQueue queue) { + super(value, queue); + _key = key; } /** - * Cleans the mapping structure of any obsolete entries. This is usually - * called before insertions and lookups on the mapping structure. The - * runtime of this is usually very small, but it can be as expensive as - * n * log(n) if a large number of soft references were recently cleared. + * Gets the key + * + * @return the key associated with this value. */ - private final void removeClearedEntries() { - for (Reference r = _clearQueue.poll(); r != null; r = _clearQueue.poll()) { - Object key = ((Entry)r).getKey(); - _cacheMap.remove(key); - } + final Object getKey() { + return _key; } /** - * Value objects we keep in the internal map. This contains the key in - * addition to the value, because polling for cleared references - * returns these instances, and having access to their corresponding - * keys drastically improves the performance of removing the pair - * from the map (see {@link SoftCache#removeClearedEntries()}.) + * Gets the value + * + * @return the value; null if it is no longer accessible */ - private static class Entry extends SoftReference { - private final Object _key; - - /** - * Constructor that uses value as the soft - * reference's referent. - */ - public Entry(Object key, Object value, ReferenceQueue queue) { - super(value, queue); - _key = key; - } - - /** - * Gets the key - * @return the key associated with this value. - */ - final Object getKey() { - return _key; - } - - /** - * Gets the value - * @return the value; null if it is no longer accessible - */ - final Object getValue() { - return this.get(); - } + final Object getValue() { + return this.get(); } + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/ObjectBAComparator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/ObjectBAComparator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/ObjectBAComparator.java (working copy) @@ -69,116 +69,111 @@ import java.util.Comparator; /** - * Comparator for objects which have been serialized into byte arrays. - * In effect, it wraps another Comparator which compares object and provides + * Comparator for objects which have been serialized into byte arrays. In + * effect, it wraps another Comparator which compares object and provides * transparent deserialization from byte array to object. - * + * * @author Alex Boisvert - * @version $Id: ObjectBAComparator.java,v 1.1 2002/05/31 06:33:20 boisvert Exp $ + * @version $Id: ObjectBAComparator.java,v 1.1 2002/05/31 06:33:20 boisvert Exp + * $ */ -public final class ObjectBAComparator - implements Comparator, Serializable -{ +public final class ObjectBAComparator implements Comparator, Serializable { - /** - * Version id for serialization. - */ - final static long serialVersionUID = 1L; + /** + * Version id for serialization. + */ + final static long serialVersionUID = 1L; + /** + * Wrapped comparator. + */ + private final Comparator _comparator; - /** - * Wrapped comparator. - */ - private Comparator _comparator; + /** + * Construct an ObjectByteArrayComparator which wraps an Object Comparator. + * + * @param comparator + * Object comparator. + */ + public ObjectBAComparator(Comparator comparator) { + if (comparator == null) { + throw new IllegalArgumentException("Argument 'comparator' is null"); + } + _comparator = comparator; + } - /** - * Construct an ObjectByteArrayComparator which wraps an Object Comparator. - * - * @param comparator Object comparator. - */ - public ObjectBAComparator( Comparator comparator ) - { - if ( comparator == null ) { - throw new IllegalArgumentException( "Argument 'comparator' is null" ); - } + /** + * Compare two objects. + * + * @param obj1 + * First object + * @param obj2 + * Second object + * @return 1 if obj1 > obj2, 0 if obj1 == obj2, -1 if obj1 < obj2 + */ + public int compare(Object obj1, Object obj2) { + if (obj1 == null) { + throw new IllegalArgumentException("Argument 'obj1' is null"); + } - _comparator = comparator; + if (obj2 == null) { + throw new IllegalArgumentException("Argument 'obj2' is null"); } + try { + obj1 = Serialization.deserialize((byte[]) obj1); + obj2 = Serialization.deserialize((byte[]) obj2); - /** - * Compare two objects. - * - * @param obj1 First object - * @param obj2 Second object - * @return 1 if obj1 > obj2, 0 if obj1 == obj2, -1 if obj1 < obj2 - */ - public int compare( Object obj1, Object obj2 ) - { - if ( obj1 == null ) { - throw new IllegalArgumentException( "Argument 'obj1' is null" ); - } + return _comparator.compare(obj1, obj2); + } catch (IOException except) { + throw new WrappedRuntimeException(except); + } catch (ClassNotFoundException except) { + throw new WrappedRuntimeException(except); + } + } - if ( obj2 == null ) { - throw new IllegalArgumentException( "Argument 'obj2' is null" ); - } + /** + * Compare two byte arrays. + */ + public static int compareByteArray(byte[] thisKey, byte[] otherKey) { + int len = Math.min(thisKey.length, otherKey.length); - try { - obj1 = Serialization.deserialize( (byte[]) obj1 ); - obj2 = Serialization.deserialize( (byte[]) obj2 ); - - return _comparator.compare( obj1, obj2 ); - } catch ( IOException except ) { - throw new WrappedRuntimeException( except ); - } catch ( ClassNotFoundException except ) { - throw new WrappedRuntimeException( except ); + // compare the byte arrays + for (int i = 0; i < len; i++) { + if (thisKey[i] >= 0) { + if (otherKey[i] >= 0) { + // both positive + if (thisKey[i] < otherKey[i]) { + return -1; + } else if (thisKey[i] > otherKey[i]) { + return 1; + } + } else { + // otherKey is negative => greater (because MSB is 1) + return -1; } - } - - - /** - * Compare two byte arrays. - */ - public static int compareByteArray( byte[] thisKey, byte[] otherKey ) - { - int len = Math.min( thisKey.length, otherKey.length ); - - // compare the byte arrays - for ( int i=0; i= 0 ) { - if ( otherKey[i] >= 0 ) { - // both positive - if ( thisKey[i] < otherKey[i] ) { - return -1; - } else if ( thisKey[i] > otherKey[i] ) { - return 1; - } - } else { - // otherKey is negative => greater (because MSB is 1) - return -1; - } - } else { - if ( otherKey[i] >= 0 ) { - // thisKey is negative => greater (because MSB is 1) - return 1; - } else { - // both negative - if ( thisKey[i] < otherKey[i] ) { - return -1; - } else if ( thisKey[i] > otherKey[i] ) { - return 1; - } - } - } - } - if ( thisKey.length == otherKey.length) { - return 0; - } - if ( thisKey.length < otherKey.length ) { + } else { + if (otherKey[i] >= 0) { + // thisKey is negative => greater (because MSB is 1) + return 1; + } else { + // both negative + if (thisKey[i] < otherKey[i]) { return -1; + } else if (thisKey[i] > otherKey[i]) { + return 1; + } } - return 1; + } } + if (thisKey.length == otherKey.length) { + return 0; + } + if (thisKey.length < otherKey.length) { + return -1; + } + return 1; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/MRU.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/MRU.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/MRU.java (working copy) @@ -69,216 +69,210 @@ import java.util.Hashtable; import java.util.Vector; - /** - * MRU - Most Recently Used cache policy. - * - * Methods are *not* synchronized, so no concurrent access is allowed. - * + * MRU - Most Recently Used cache policy. + * + * Methods are *not* synchronized, so no concurrent access is allowed. + * * @author Alex Boisvert * @version $Id: MRU.java,v 1.8 2005/06/25 23:12:31 doomdark Exp $ */ public class MRU implements CachePolicy { - /** Cached object hashtable */ - Hashtable _hash = new Hashtable(); + /** Cached object hashtable */ + Hashtable _hash = new Hashtable(); - /** - * Maximum number of objects in the cache. - */ - int _max; + /** + * Maximum number of objects in the cache. + */ + int _max; - /** - * Beginning of linked-list of cache elements. First entry is element - * which has been used least recently. - */ - CacheEntry _first; + /** + * Beginning of linked-list of cache elements. First entry is element which + * has been used least recently. + */ + CacheEntry _first; - /** - * End of linked-list of cache elements. Last entry is element - * which has been used most recently. - */ - CacheEntry _last; + /** + * End of linked-list of cache elements. Last entry is element which has been + * used most recently. + */ + CacheEntry _last; + /** + * Cache eviction listeners + */ + Vector listeners = new Vector(); - /** - * Cache eviction listeners - */ - Vector listeners = new Vector(); - - - /** - * Construct an MRU with a given maximum number of objects. - */ - public MRU(int max) { - if (max <= 0) { - throw new IllegalArgumentException("MRU cache must contain at least one entry"); - } - _max = max; + /** + * Construct an MRU with a given maximum number of objects. + */ + public MRU(int max) { + if (max <= 0) { + throw new IllegalArgumentException( + "MRU cache must contain at least one entry"); } + _max = max; + } + /** + * Place an object in the cache. + */ + public void put(Object key, Object value) throws CacheEvictionException { + CacheEntry entry = (CacheEntry) _hash.get(key); + if (entry != null) { + entry.setValue(value); + touchEntry(entry); + } else { - /** - * Place an object in the cache. - */ - public void put(Object key, Object value) throws CacheEvictionException { - CacheEntry entry = (CacheEntry)_hash.get(key); - if (entry != null) { - entry.setValue(value); - touchEntry(entry); - } else { - - if (_hash.size() == _max) { - // purge and recycle entry - entry = purgeEntry(); - entry.setKey(key); - entry.setValue(value); - } else { - entry = new CacheEntry(key, value); - } - addEntry(entry); - _hash.put(entry.getKey(), entry); - } + if (_hash.size() == _max) { + // purge and recycle entry + entry = purgeEntry(); + entry.setKey(key); + entry.setValue(value); + } else { + entry = new CacheEntry(key, value); + } + addEntry(entry); + _hash.put(entry.getKey(), entry); } + } - - /** - * Obtain an object in the cache - */ - public Object get(Object key) { - CacheEntry entry = (CacheEntry)_hash.get(key); - if (entry != null) { - touchEntry(entry); - return entry.getValue(); - } else { - return null; - } + /** + * Obtain an object in the cache + */ + public Object get(Object key) { + CacheEntry entry = (CacheEntry) _hash.get(key); + if (entry != null) { + touchEntry(entry); + return entry.getValue(); + } else { + return null; } + } - - /** - * Remove an object from the cache - */ - public void remove(Object key) { - CacheEntry entry = (CacheEntry)_hash.get(key); - if (entry != null) { - removeEntry(entry); - _hash.remove(entry.getKey()); - } + /** + * Remove an object from the cache + */ + public void remove(Object key) { + CacheEntry entry = (CacheEntry) _hash.get(key); + if (entry != null) { + removeEntry(entry); + _hash.remove(entry.getKey()); } + } + /** + * Remove all objects from the cache + */ + public void removeAll() { + _hash = new Hashtable(); + _first = null; + _last = null; + } - /** - * Remove all objects from the cache - */ - public void removeAll() { - _hash = new Hashtable(); - _first = null; - _last = null; + /** + * Enumerate elements' values in the cache + */ + public Enumeration elements() { + return new MRUEnumeration(_hash.elements()); + } + + /** + * Add a listener to this cache policy + * + * @param listener + * Listener to add to this policy + */ + public void addListener(CachePolicyListener listener) { + if (listener == null) { + throw new IllegalArgumentException("Cannot add null listener."); } + if (!listeners.contains(listener)) { + listeners.addElement(listener); + } + } + /** + * Remove a listener from this cache policy + * + * @param listener + * Listener to remove from this policy + */ + public void removeListener(CachePolicyListener listener) { + listeners.removeElement(listener); + } - /** - * Enumerate elements' values in the cache - */ - public Enumeration elements() { - return new MRUEnumeration(_hash.elements()); + /** + * Add a CacheEntry. Entry goes at the end of the list. + */ + protected void addEntry(CacheEntry entry) { + if (_first == null) { + _first = entry; + _last = entry; + } else { + _last.setNext(entry); + entry.setPrevious(_last); + _last = entry; } + } - /** - * Add a listener to this cache policy - * - * @param listener Listener to add to this policy - */ - public void addListener(CachePolicyListener listener) { - if (listener == null) { - throw new IllegalArgumentException("Cannot add null listener."); - } - if ( ! listeners.contains(listener)) { - listeners.addElement(listener); - } + /** + * Remove a CacheEntry from linked list + */ + protected void removeEntry(CacheEntry entry) { + if (entry == _first) { + _first = entry.getNext(); } - - /** - * Remove a listener from this cache policy - * - * @param listener Listener to remove from this policy - */ - public void removeListener(CachePolicyListener listener) { - listeners.removeElement(listener); + if (_last == entry) { + _last = entry.getPrevious(); } - - /** - * Add a CacheEntry. Entry goes at the end of the list. - */ - protected void addEntry(CacheEntry entry) { - if (_first == null) { - _first = entry; - _last = entry; - } else { - _last.setNext(entry); - entry.setPrevious(_last); - _last = entry; - } + CacheEntry previous = entry.getPrevious(); + CacheEntry next = entry.getNext(); + if (previous != null) { + previous.setNext(next); } - - - /** - * Remove a CacheEntry from linked list - */ - protected void removeEntry(CacheEntry entry) { - if (entry == _first) { - _first = entry.getNext(); - } - if (_last == entry) { - _last = entry.getPrevious(); - } - CacheEntry previous = entry.getPrevious(); - CacheEntry next = entry.getNext(); - if (previous != null) { - previous.setNext(next); - } - if (next != null) { - next.setPrevious(previous); - } - entry.setPrevious(null); - entry.setNext(null); + if (next != null) { + next.setPrevious(previous); } + entry.setPrevious(null); + entry.setNext(null); + } - /** - * Place entry at the end of linked list -- Most Recently Used - */ - protected void touchEntry(CacheEntry entry) { - if (_last == entry) { - return; - } - removeEntry(entry); - addEntry(entry); + /** + * Place entry at the end of linked list -- Most Recently Used + */ + protected void touchEntry(CacheEntry entry) { + if (_last == entry) { + return; } + removeEntry(entry); + addEntry(entry); + } - /** - * Purge least recently used object from the cache - * - * @return recyclable CacheEntry - */ - protected CacheEntry purgeEntry() throws CacheEvictionException { - CacheEntry entry = _first; + /** + * Purge least recently used object from the cache + * + * @return recyclable CacheEntry + */ + protected CacheEntry purgeEntry() throws CacheEvictionException { + CacheEntry entry = _first; - // Notify policy listeners first. if any of them throw an - // eviction exception, then the internal data structure - // remains untouched. - CachePolicyListener listener; - for (int i=0; iAlex Boisvert */ -public abstract class FastIterator -{ +public abstract class FastIterator { - /** - * Returns the next element in the interation. - * - * @return the next element in the iteration, or null if no more element. - */ - public abstract Object next() - throws IterationException; + /** + * Returns the next element in the interation. + * + * @return the next element in the iteration, or null if no more element. + */ + public abstract Object next() throws IterationException; } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/Serializer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/Serializer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/Serializer.java (working copy) @@ -68,33 +68,30 @@ import java.io.Serializable; /** - * Interface used to provide a serialization mechanism other than a class' normal - * serialization. - * + * Interface used to provide a serialization mechanism other than a class' + * normal serialization. + * * @author Alex Boisvert * @version $Id: Serializer.java,v 1.1 2003/03/21 02:48:42 boisvert Exp $ */ -public interface Serializer - extends Serializable -{ +public interface Serializer extends Serializable { - /** - * Serialize the content of an object into a byte array. - * - * @param obj Object to serialize - * @return a byte array representing the object's state - */ - public byte[] serialize( Object obj ) - throws IOException; - - - /** - * Deserialize the content of an object from a byte array. - * - * @param serialized Byte array representation of the object - * @return deserialized object - */ - public Object deserialize( byte[] serialized ) - throws IOException; + /** + * Serialize the content of an object into a byte array. + * + * @param obj + * Object to serialize + * @return a byte array representing the object's state + */ + public byte[] serialize(Object obj) throws IOException; + /** + * Deserialize the content of an object from a byte array. + * + * @param serialized + * Byte array representation of the object + * @return deserialized object + */ + public Object deserialize(byte[] serialized) throws IOException; + } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/IterationException.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/IterationException.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/IterationException.java (working copy) @@ -65,51 +65,45 @@ package org.apache.hadoop.hive.ql.util.jdbm.helper; - /** * Iteration exception. - * + * * @author Alex Boisvert * @version $Revision: 1.2 $ */ -public class IterationException - extends WrappedRuntimeException -{ +public class IterationException extends WrappedRuntimeException { - /** - * Construct a new iteration exception wrapping an underlying exception - * and providing a message. - * - * @param message The exception message - * @param except The underlying exception - */ - public IterationException( String message, Exception except ) - { - super( message, except ); - } + /** + * Construct a new iteration exception wrapping an underlying exception and + * providing a message. + * + * @param message + * The exception message + * @param except + * The underlying exception + */ + public IterationException(String message, Exception except) { + super(message, except); + } + /** + * Construct a new iteration exception with a message. + * + * @param message + * The exception message + */ + public IterationException(String message) { + super(message, null); + } - /** - * Construct a new iteration exception with a message. - * - * @param message The exception message - */ - public IterationException( String message ) - { - super( message, null ); - } + /** + * Construct a new iteration exception wrapping an underlying exception. + * + * @param except + * The underlying exception + */ + public IterationException(Exception except) { + super(except); + } - - /** - * Construct a new iteration exception wrapping an underlying exception. - * - * @param except The underlying exception - */ - public IterationException( Exception except ) - { - super( except ); - } - } - - Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/Serialization.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/Serialization.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/Serialization.java (working copy) @@ -72,43 +72,38 @@ /** * Serialization-related utility methods. - * + * * @author Alex Boisvert * @version $Id: Serialization.java,v 1.1 2002/05/31 06:33:20 boisvert Exp $ */ -public final class Serialization -{ +public final class Serialization { - /** - * Serialize the object into a byte array. - */ - public static byte[] serialize( Object obj ) - throws IOException - { - ByteArrayOutputStream baos; - ObjectOutputStream oos; + /** + * Serialize the object into a byte array. + */ + public static byte[] serialize(Object obj) throws IOException { + ByteArrayOutputStream baos; + ObjectOutputStream oos; - baos = new ByteArrayOutputStream(); - oos = new ObjectOutputStream( baos ); - oos.writeObject( obj ); - oos.close(); + baos = new ByteArrayOutputStream(); + oos = new ObjectOutputStream(baos); + oos.writeObject(obj); + oos.close(); - return baos.toByteArray(); - } + return baos.toByteArray(); + } + /** + * Deserialize an object from a byte array + */ + public static Object deserialize(byte[] buf) throws ClassNotFoundException, + IOException { + ByteArrayInputStream bais; + ObjectInputStream ois; - /** - * Deserialize an object from a byte array - */ - public static Object deserialize( byte[] buf ) - throws ClassNotFoundException, IOException - { - ByteArrayInputStream bais; - ObjectInputStream ois; + bais = new ByteArrayInputStream(buf); + ois = new ObjectInputStream(bais); + return ois.readObject(); + } - bais = new ByteArrayInputStream( buf ); - ois = new ObjectInputStream( bais ); - return ois.readObject(); - } - } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/DefaultSerializer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/DefaultSerializer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/DefaultSerializer.java (working copy) @@ -68,54 +68,45 @@ /** * Default java serializer. - * + * * @author Alex Boisvert * @version $Id: DefaultSerializer.java,v 1.2 2003/09/21 15:47:00 boisvert Exp $ */ -public class DefaultSerializer - implements Serializer -{ +public class DefaultSerializer implements Serializer { - - public static final DefaultSerializer INSTANCE = new DefaultSerializer(); - - - /** - * Construct a DefaultSerializer. - */ - public DefaultSerializer() - { - // no op - } + public static final DefaultSerializer INSTANCE = new DefaultSerializer(); - - /** - * Serialize the content of an object into a byte array. - * - * @param obj Object to serialize - * @return a byte array representing the object's state - */ - public byte[] serialize( Object obj ) - throws IOException - { - return Serialization.serialize( obj ); - } - - - /** - * Deserialize the content of an object from a byte array. - * - * @param serialized Byte array representation of the object - * @return deserialized object - */ - public Object deserialize( byte[] serialized ) - throws IOException - { - try { - return Serialization.deserialize( serialized ); - } catch ( ClassNotFoundException except ) { - throw new WrappedRuntimeException( except ); - } - } + /** + * Construct a DefaultSerializer. + */ + public DefaultSerializer() { + // no op + } + /** + * Serialize the content of an object into a byte array. + * + * @param obj + * Object to serialize + * @return a byte array representing the object's state + */ + public byte[] serialize(Object obj) throws IOException { + return Serialization.serialize(obj); + } + + /** + * Deserialize the content of an object from a byte array. + * + * @param serialized + * Byte array representation of the object + * @return deserialized object + */ + public Object deserialize(byte[] serialized) throws IOException { + try { + return Serialization.deserialize(serialized); + } catch (ClassNotFoundException except) { + throw new WrappedRuntimeException(except); + } + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/CacheEvictionException.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/CacheEvictionException.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/CacheEvictionException.java (working copy) @@ -66,28 +66,24 @@ package org.apache.hadoop.hive.ql.util.jdbm.helper; /** - * Exception that occurs during eviction of an object in the cache. - * - * @author Alex Boisvert - * @version $Id: CacheEvictionException.java,v 1.4 2003/10/21 15:43:20 boisvert Exp $ + * Exception that occurs during eviction of an object in the cache. + * + * @author Alex Boisvert + * @version $Id: CacheEvictionException.java,v 1.4 2003/10/21 15:43:20 boisvert + * Exp $ */ -public class CacheEvictionException - extends Exception -{ +public class CacheEvictionException extends Exception { - /** - * Nested exception -- the original exception that occured, if any. - */ - protected Exception _nested; + /** + * Nested exception -- the original exception that occured, if any. + */ + protected Exception _nested; + public CacheEvictionException(Exception nested) { + _nested = nested; + } - public CacheEvictionException( Exception nested ) - { - _nested = nested; - } - - public Exception getNestedException() - { - return _nested; - } + public Exception getNestedException() { + return _nested; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/CachePolicy.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/CachePolicy.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/CachePolicy.java (working copy) @@ -68,94 +68,92 @@ import java.util.Enumeration; /** - * CachePolicity is an abstraction for different cache policies. - * (ie. MRU, time-based, soft-refs, ...) - * + * CachePolicity is an abstraction for different cache policies. (ie. MRU, + * time-based, soft-refs, ...) + * * @author Alex Boisvert * @author Dilum Ranatunga * @version $Id: CachePolicy.java,v 1.5 2003/11/01 13:25:02 dranatunga Exp $ */ -public interface CachePolicy -{ +public interface CachePolicy { - /** - * Place an object in the cache. If the cache does not currently contain - * an object for the key specified, this mapping is added. If an object - * currently exists under the specified key, the current object is - * replaced with the new object. - *

- * If the changes to the cache cause the eviction of any objects - * stored under other key(s), events corresponding to - * the evictions are fired for each object. If an event listener is - * unable to handle the eviction, and throws a cache eviction exception, - * that exception is propagated to the caller. If such an exception is - * thrown, the cache itself should be left as it was before the - * put() operation was invoked: the the object whose - * eviction failed is still in the cache, and the new insertion or - * modification is reverted. - * - * @param key key for the cached object - * @param value the cached object - * @throws CacheEvictionException propagated if, while evicting objects - * to make room for new object, an eviction listener encountered - * this problem. - */ - public void put( Object key, Object value ) - throws CacheEvictionException; + /** + * Place an object in the cache. If the cache does not currently contain an + * object for the key specified, this mapping is added. If an object currently + * exists under the specified key, the current object is replaced with the new + * object. + *

+ * If the changes to the cache cause the eviction of any objects + * stored under other key(s), events corresponding to the + * evictions are fired for each object. If an event listener is unable to + * handle the eviction, and throws a cache eviction exception, that exception + * is propagated to the caller. If such an exception is thrown, the cache + * itself should be left as it was before the put() operation was + * invoked: the the object whose eviction failed is still in the cache, and + * the new insertion or modification is reverted. + * + * @param key + * key for the cached object + * @param value + * the cached object + * @throws CacheEvictionException + * propagated if, while evicting objects to make room for new + * object, an eviction listener encountered this problem. + */ + public void put(Object key, Object value) throws CacheEvictionException; + /** + * Obtain the object stored under the key specified. + * + * @param key + * key the object was cached under + * @return the object if it is still in the cache, null otherwise. + */ + public Object get(Object key); - /** - * Obtain the object stored under the key specified. - * - * @param key key the object was cached under - * @return the object if it is still in the cache, null otherwise. - */ - public Object get( Object key ); + /** + * Remove the object stored under the key specified. Note that since eviction + * notices are only fired when objects under different keys + * are evicted, no event is fired for any object stored under this key (see + * {@link #put(Object, Object) put( )}). + * + * @param key + * key the object was stored in the cache under. + */ + public void remove(Object key); + /** + * Remove all objects from the cache. Consistent with {@link #remove(Object) + * remove( )}, no eviction notices are fired. + */ + public void removeAll(); - /** - * Remove the object stored under the key specified. Note that since - * eviction notices are only fired when objects under different - * keys are evicted, no event is fired for any object stored - * under this key (see {@link #put(Object, Object) put( )}). - * - * @param key key the object was stored in the cache under. - */ - public void remove( Object key ); + /** + * Enumerate through the objects currently in the cache. + */ + public Enumeration elements(); + /** + * Add a listener to this cache policy. + *

+ * If this cache policy already contains a listener that is equal to the one + * being added, this call has no effect. + * + * @param listener + * the (non-null) listener to add to this policy + * @throws IllegalArgumentException + * if listener is null. + */ + public void addListener(CachePolicyListener listener) + throws IllegalArgumentException; - /** - * Remove all objects from the cache. Consistent with - * {@link #remove(Object) remove( )}, no eviction notices are fired. - */ - public void removeAll(); + /** + * Remove a listener from this cache policy. The listener is found using + * object equality, not identity. + * + * @param listener + * the listener to remove from this policy + */ + public void removeListener(CachePolicyListener listener); - - /** - * Enumerate through the objects currently in the cache. - */ - public Enumeration elements(); - - - /** - * Add a listener to this cache policy. - *

- * If this cache policy already contains a listener that is equal to - * the one being added, this call has no effect. - * - * @param listener the (non-null) listener to add to this policy - * @throws IllegalArgumentException if listener is null. - */ - public void addListener( CachePolicyListener listener ) - throws IllegalArgumentException; - - - /** - * Remove a listener from this cache policy. The listener is found - * using object equality, not identity. - * - * @param listener the listener to remove from this policy - */ - public void removeListener( CachePolicyListener listener ); - } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/WrappedRuntimeException.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/WrappedRuntimeException.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/WrappedRuntimeException.java (working copy) @@ -68,102 +68,92 @@ import java.io.PrintWriter; /** - * A run-time exception that wraps another exception. The printed stack - * trace will be that of the wrapped exception. - * + * A run-time exception that wraps another exception. The printed stack trace + * will be that of the wrapped exception. + * * @author Alex Boisvert - * @version $Id: WrappedRuntimeException.java,v 1.1 2002/05/31 06:33:20 boisvert Exp $ + * @version $Id: WrappedRuntimeException.java,v 1.1 2002/05/31 06:33:20 boisvert + * Exp $ */ -public class WrappedRuntimeException - extends RuntimeException -{ +public class WrappedRuntimeException extends RuntimeException { + /** + * The underlying exception. + */ + private final Exception _except; - /** - * The underlying exception. - */ - private final Exception _except; + /** + * Constructs a new runtime exception based on a checked exception. + * + * @param message + * The error message + * @param except + * The checked exception + */ + public WrappedRuntimeException(String message, Exception except) { + super(message == null ? "No message available" : message); - - /** - * Constructs a new runtime exception based on a checked exception. - * - * @param message The error message - * @param except The checked exception - */ - public WrappedRuntimeException( String message, Exception except ) - { - super( message == null ? "No message available" : message ); - - if ( except instanceof WrappedRuntimeException && - ( (WrappedRuntimeException) except )._except != null ) - { - _except = ( (WrappedRuntimeException) except )._except; - } else { - _except = except; - } + if (except instanceof WrappedRuntimeException + && ((WrappedRuntimeException) except)._except != null) { + _except = ((WrappedRuntimeException) except)._except; + } else { + _except = except; } + } + /** + * Constructs a new runtime exception based on a checked exception. + * + * @param except + * The checked exception + */ + public WrappedRuntimeException(Exception except) { + super( + except == null || except.getMessage() == null ? "No message available" + : except.getMessage()); - /** - * Constructs a new runtime exception based on a checked exception. - * - * @param except The checked exception - */ - public WrappedRuntimeException( Exception except ) - { - super( except == null || except.getMessage() == null ? "No message available" : except.getMessage() ); - - if ( except instanceof WrappedRuntimeException && - ( (WrappedRuntimeException) except )._except != null ) - { - _except = ( (WrappedRuntimeException) except )._except; - } else { - _except = except; - } + if (except instanceof WrappedRuntimeException + && ((WrappedRuntimeException) except)._except != null) { + _except = ((WrappedRuntimeException) except)._except; + } else { + _except = except; } + } + /** + * Returns the exception wrapped by this runtime exception. + * + * @return The exception wrapped by this runtime exception + */ + public Exception getException() { + return _except; + } - /** - * Returns the exception wrapped by this runtime exception. - * - * @return The exception wrapped by this runtime exception - */ - public Exception getException() - { - return _except; + @Override + public void printStackTrace() { + if (_except == null) { + super.printStackTrace(); + } else { + _except.printStackTrace(); } + } - - public void printStackTrace() - { - if ( _except == null ) { - super.printStackTrace(); - } else { - _except.printStackTrace(); - } + @Override + public void printStackTrace(PrintStream stream) { + if (_except == null) { + super.printStackTrace(stream); + } else { + _except.printStackTrace(stream); } + } - - public void printStackTrace( PrintStream stream ) - { - if ( _except == null ) { - super.printStackTrace( stream ); - } else { - _except.printStackTrace( stream ); - } + @Override + public void printStackTrace(PrintWriter writer) { + if (_except == null) { + super.printStackTrace(writer); + } else { + _except.printStackTrace(writer); } + } - - public void printStackTrace( PrintWriter writer ) - { - if ( _except == null ) { - super.printStackTrace( writer ); - } else { - _except.printStackTrace( writer ); - } - } - } - - Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/Conversion.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/Conversion.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/Conversion.java (working copy) @@ -64,178 +64,148 @@ package org.apache.hadoop.hive.ql.util.jdbm.helper; - /** * Miscelaneous conversion utility methods. - * + * * @author Alex Boisvert * @version $Id: Conversion.java,v 1.3 2002/05/31 06:33:20 boisvert Exp $ */ -public class Conversion -{ +public class Conversion { - /** - * Convert a string into a byte array. - */ - public static byte[] convertToByteArray( String s ) - { - try { - // see the following page for character encoding - // http://java.sun.com/products/jdk/1.1/docs/guide/intl/encoding.doc.html - return s.getBytes( "UTF8" ); - } catch ( java.io.UnsupportedEncodingException uee ) { - uee.printStackTrace(); - throw new Error( "Platform doesn't support UTF8 encoding" ); - } + /** + * Convert a string into a byte array. + */ + public static byte[] convertToByteArray(String s) { + try { + // see the following page for character encoding + // http://java.sun.com/products/jdk/1.1/docs/guide/intl/encoding.doc.html + return s.getBytes("UTF8"); + } catch (java.io.UnsupportedEncodingException uee) { + uee.printStackTrace(); + throw new Error("Platform doesn't support UTF8 encoding"); } + } + /** + * Convert a byte into a byte array. + */ + public static byte[] convertToByteArray(byte n) { + n = (byte) (n ^ ((byte) 0x80)); // flip MSB because "byte" is signed + return new byte[] { n }; + } - /** - * Convert a byte into a byte array. - */ - public static byte[] convertToByteArray( byte n ) - { - n = (byte)( n ^ ( (byte) 0x80 ) ); // flip MSB because "byte" is signed - return new byte[] { n }; - } + /** + * Convert a short into a byte array. + */ + public static byte[] convertToByteArray(short n) { + n = (short) (n ^ ((short) 0x8000)); // flip MSB because "short" is signed + byte[] key = new byte[2]; + pack2(key, 0, n); + return key; + } + /** + * Convert an int into a byte array. + */ + public static byte[] convertToByteArray(int n) { + n = (n ^ 0x80000000); // flip MSB because "int" is signed + byte[] key = new byte[4]; + pack4(key, 0, n); + return key; + } - /** - * Convert a short into a byte array. - */ - public static byte[] convertToByteArray( short n ) - { - n = (short) ( n ^ ( (short) 0x8000 ) ); // flip MSB because "short" is signed - byte[] key = new byte[ 2 ]; - pack2( key, 0, n ); - return key; - } + /** + * Convert a long into a byte array. + */ + public static byte[] convertToByteArray(long n) { + n = (n ^ 0x8000000000000000L); // flip MSB because "long" is signed + byte[] key = new byte[8]; + pack8(key, 0, n); + return key; + } - - /** - * Convert an int into a byte array. - */ - public static byte[] convertToByteArray( int n ) - { - n = (n ^ 0x80000000); // flip MSB because "int" is signed - byte[] key = new byte[4]; - pack4(key, 0, n); - return key; + /** + * Convert a byte array (encoded as UTF-8) into a String + */ + public static String convertToString(byte[] buf) { + try { + // see the following page for character encoding + // http://java.sun.com/products/jdk/1.1/docs/guide/intl/encoding.doc.html + return new String(buf, "UTF8"); + } catch (java.io.UnsupportedEncodingException uee) { + uee.printStackTrace(); + throw new Error("Platform doesn't support UTF8 encoding"); } + } + /** + * Convert a byte array into an integer (signed 32-bit) value. + */ + public static int convertToInt(byte[] buf) { + int value = unpack4(buf, 0); + value = (value ^ 0x80000000); // flip MSB because "int" is signed + return value; + } - /** - * Convert a long into a byte array. - */ - public static byte[] convertToByteArray( long n ) - { - n = (n ^ 0x8000000000000000L); // flip MSB because "long" is signed - byte[] key = new byte[8]; - pack8( key, 0, n ); - return key; - } + /** + * Convert a byte array into a long (signed 64-bit) value. + */ + public static long convertToLong(byte[] buf) { + long value = ((long) unpack4(buf, 0) << 32) + + (unpack4(buf, 4) & 0xFFFFFFFFL); + value = (value ^ 0x8000000000000000L); // flip MSB because "long" is signed + return value; + } + static int unpack4(byte[] buf, int offset) { + int value = (buf[offset] << 24) | ((buf[offset + 1] << 16) & 0x00FF0000) + | ((buf[offset + 2] << 8) & 0x0000FF00) + | ((buf[offset + 3] << 0) & 0x000000FF); - /** - * Convert a byte array (encoded as UTF-8) into a String - */ - public static String convertToString( byte[] buf ) - { - try { - // see the following page for character encoding - // http://java.sun.com/products/jdk/1.1/docs/guide/intl/encoding.doc.html - return new String( buf, "UTF8" ); - } catch ( java.io.UnsupportedEncodingException uee ) { - uee.printStackTrace(); - throw new Error( "Platform doesn't support UTF8 encoding" ); - } - } + return value; + } + static final void pack2(byte[] data, int offs, int val) { + data[offs++] = (byte) (val >> 8); + data[offs++] = (byte) val; + } - /** - * Convert a byte array into an integer (signed 32-bit) value. - */ - public static int convertToInt( byte[] buf ) - { - int value = unpack4( buf, 0 ); - value = ( value ^ 0x80000000 ); // flip MSB because "int" is signed - return value; - } + static final void pack4(byte[] data, int offs, int val) { + data[offs++] = (byte) (val >> 24); + data[offs++] = (byte) (val >> 16); + data[offs++] = (byte) (val >> 8); + data[offs++] = (byte) val; + } + static final void pack8(byte[] data, int offs, long val) { + pack4(data, 0, (int) (val >> 32)); + pack4(data, 4, (int) val); + } - /** - * Convert a byte array into a long (signed 64-bit) value. - */ - public static long convertToLong( byte[] buf ) - { - long value = ( (long) unpack4( buf, 0 ) << 32 ) - + ( unpack4( buf, 4 ) & 0xFFFFFFFFL ); - value = ( value ^ 0x8000000000000000L ); // flip MSB because "long" is signed - return value; - } + /** + * Test static methods + */ + public static void main(String[] args) { + byte[] buf; + buf = convertToByteArray(5); + System.out.println("int value of 5 is: " + convertToInt(buf)); + buf = convertToByteArray(-1); + System.out.println("int value of -1 is: " + convertToInt(buf)); + buf = convertToByteArray(22111000); + System.out.println("int value of 22111000 is: " + convertToInt(buf)); - static int unpack4( byte[] buf, int offset ) - { - int value = ( buf[ offset ] << 24 ) - | ( ( buf[ offset+1 ] << 16 ) & 0x00FF0000 ) - | ( ( buf[ offset+2 ] << 8 ) & 0x0000FF00 ) - | ( ( buf[ offset+3 ] << 0 ) & 0x000000FF ); + buf = convertToByteArray(5L); + System.out.println("long value of 5 is: " + convertToLong(buf)); - return value; - } + buf = convertToByteArray(-1L); + System.out.println("long value of -1 is: " + convertToLong(buf)); + buf = convertToByteArray(1112223334445556667L); + System.out.println("long value of 1112223334445556667 is: " + + convertToLong(buf)); + } - static final void pack2( byte[] data, int offs, int val ) - { - data[offs++] = (byte) ( val >> 8 ); - data[offs++] = (byte) val; - } - - - static final void pack4( byte[] data, int offs, int val ) - { - data[offs++] = (byte) ( val >> 24 ); - data[offs++] = (byte) ( val >> 16 ); - data[offs++] = (byte) ( val >> 8 ); - data[offs++] = (byte) val; - } - - - static final void pack8( byte[] data, int offs, long val ) - { - pack4( data, 0, (int) ( val >> 32 ) ); - pack4( data, 4, (int) val ); - } - - - /** - * Test static methods - */ - public static void main( String[] args ) - { - byte[] buf; - - buf = convertToByteArray( (int) 5 ); - System.out.println( "int value of 5 is: " + convertToInt( buf ) ); - - buf = convertToByteArray( (int) -1 ); - System.out.println( "int value of -1 is: " + convertToInt( buf ) ); - - buf = convertToByteArray( (int) 22111000 ); - System.out.println( "int value of 22111000 is: " + convertToInt( buf ) ); - - - buf = convertToByteArray( (long) 5L ); - System.out.println( "long value of 5 is: " + convertToLong( buf ) ); - - buf = convertToByteArray( (long) -1L ); - System.out.println( "long value of -1 is: " + convertToLong( buf ) ); - - buf = convertToByteArray( (long) 1112223334445556667L ); - System.out.println( "long value of 1112223334445556667 is: " + convertToLong( buf ) ); - } - } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/IntegerComparator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/IntegerComparator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/IntegerComparator.java (working copy) @@ -69,55 +69,53 @@ /** * Comparator for Integer objects. - * + * * @author Christof Dallermassl * @version $Id: IntegerComparator.java,v 1.2 2002/05/31 06:33:20 boisvert Exp $ */ -public final class IntegerComparator - implements Comparator, Serializable -{ +public final class IntegerComparator implements Comparator, Serializable { - /** - * Version id for serialization. - */ - final static long serialVersionUID = 1L; + /** + * Version id for serialization. + */ + final static long serialVersionUID = 1L; + /** + * Compare two objects. + * + * @param obj1 + * First object + * @param obj2 + * Second object + * @return a positive integer if obj1 > obj2, 0 if obj1 == obj2, and a + * negative integer if obj1 < obj2 + */ + public int compare(Object obj1, Object obj2) { + if (obj1 == obj2) { + return 0; + } - /** - * Compare two objects. - * - * @param obj1 First object - * @param obj2 Second object - * @return a positive integer if obj1 > obj2, 0 if obj1 == obj2, - * and a negative integer if obj1 < obj2 - */ - public int compare( Object obj1, Object obj2 ) - { - if ( obj1 == obj2 ) { - return 0; - } + if (obj1 == null) { + throw new IllegalArgumentException("Argument 'obj1' is null"); + } - if ( obj1 == null ) { - throw new IllegalArgumentException( "Argument 'obj1' is null" ); - } + if (obj2 == null) { + throw new IllegalArgumentException("Argument 'obj2' is null"); + } - if ( obj2 == null ) { - throw new IllegalArgumentException( "Argument 'obj2' is null" ); - } + // complicated to avoid usage of Integer.compareTo, as this + // method is Java 1.2 only! + int int1 = ((Integer) obj1).intValue(); + int int2 = ((Integer) obj2).intValue(); + if (int1 == int2) { + return 0; + } - // complicated to avoid usage of Integer.compareTo, as this - // method is Java 1.2 only! - int int1 = ( (Integer) obj1 ).intValue(); - int int2 = ( (Integer) obj2 ).intValue(); - if ( int1 == int2 ) { - return 0; - } - - if ( int1 < int2 ) { - return -1; - } else { - return 1; - } + if (int1 < int2) { + return -1; + } else { + return 1; } + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/IntegerSerializer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/IntegerSerializer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/IntegerSerializer.java (working copy) @@ -68,52 +68,43 @@ /** * Optimized serializer for integers. - * + * * @author Alex Boisvert * @version $Id: IntegerSerializer.java,v 1.2 2003/09/21 15:47:00 boisvert Exp $ */ -public class IntegerSerializer - implements Serializer -{ +public class IntegerSerializer implements Serializer { - - public static final IntegerSerializer INSTANCE = new IntegerSerializer(); - - - /** - * Construct an IntegerSerializer. - */ - public IntegerSerializer() - { - // no op - } + public static final IntegerSerializer INSTANCE = new IntegerSerializer(); - - /** - * Serialize the content of an object into a byte array. - * - * @param obj Object to serialize - * @return a byte array representing the object's state - */ - public byte[] serialize( Object obj ) - throws IOException - { - Integer number = (Integer) obj; - return Conversion.convertToByteArray( number.intValue() ); - } - - - /** - * Deserialize the content of an object from a byte array. - * - * @param serialized Byte array representation of the object - * @return deserialized object - */ - public Object deserialize( byte[] serialized ) - throws IOException - { - int number = Conversion.convertToInt( serialized ); - return new Integer( number ); - } + /** + * Construct an IntegerSerializer. + */ + public IntegerSerializer() { + // no op + } + /** + * Serialize the content of an object into a byte array. + * + * @param obj + * Object to serialize + * @return a byte array representing the object's state + */ + public byte[] serialize(Object obj) throws IOException { + Integer number = (Integer) obj; + return Conversion.convertToByteArray(number.intValue()); + } + + /** + * Deserialize the content of an object from a byte array. + * + * @param serialized + * Byte array representation of the object + * @return deserialized object + */ + public Object deserialize(byte[] serialized) throws IOException { + int number = Conversion.convertToInt(serialized); + return new Integer(number); + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/LongComparator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/LongComparator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/LongComparator.java (working copy) @@ -69,48 +69,46 @@ /** * Comparator for java.lang.Long objects. - * + * * @author Alex Boisvert * @version $Id: LongComparator.java,v 1.4 2002/05/31 06:33:20 boisvert Exp $ */ -public final class LongComparator - implements Comparator, Serializable -{ +public final class LongComparator implements Comparator, Serializable { - /** - * Version id for serialization. - */ - final static long serialVersionUID = 1L; + /** + * Version id for serialization. + */ + final static long serialVersionUID = 1L; + /** + * Compare two objects. + * + * @param obj1 + * First object + * @param obj2 + * Second object + * @return a positive integer if obj1 > obj2, 0 if obj1 == obj2, and a + * negative integer if obj1 < obj2 + */ + public int compare(Object obj1, Object obj2) { + if (obj1 == null) { + throw new IllegalArgumentException("Argument 'obj1' is null"); + } - /** - * Compare two objects. - * - * @param obj1 First object - * @param obj2 Second object - * @return a positive integer if obj1 > obj2, 0 if obj1 == obj2, - * and a negative integer if obj1 < obj2 - */ - public int compare( Object obj1, Object obj2 ) - { - if ( obj1 == null ) { - throw new IllegalArgumentException( "Argument 'obj1' is null" ); - } + if (obj2 == null) { + throw new IllegalArgumentException("Argument 'obj2' is null"); + } - if ( obj2 == null ) { - throw new IllegalArgumentException( "Argument 'obj2' is null" ); - } + long l1 = ((Long) obj1).longValue(); + long l2 = ((Long) obj2).longValue(); - long l1 = ( (Long) obj1 ).longValue(); - long l2 = ( (Long) obj2 ).longValue(); + if (l1 > l2) { + return 1; + } else if (l1 == l2) { + return 0; + } else { + return -1; + } + } - if ( l1 > l2 ) { - return 1; - } else if ( l1 == l2 ) { - return 0; - } else { - return -1; - } - } - } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/LongSerializer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/LongSerializer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/LongSerializer.java (working copy) @@ -68,52 +68,43 @@ /** * Optimized serializer for long integers. - * + * * @author Alex Boisvert * @version $Id: LongSerializer.java,v 1.2 2003/09/21 15:47:00 boisvert Exp $ */ -public class LongSerializer - implements Serializer -{ +public class LongSerializer implements Serializer { - - public static final LongSerializer INSTANCE = new LongSerializer(); - - - /** - * Construct a LongSerializer. - */ - public LongSerializer() - { - // no op - } + public static final LongSerializer INSTANCE = new LongSerializer(); - - /** - * Serialize the content of an object into a byte array. - * - * @param obj Object to serialize - * @return a byte array representing the object's state - */ - public byte[] serialize( Object obj ) - throws IOException - { - Long number = (Long) obj; - return Conversion.convertToByteArray( number.longValue() ); - } - - - /** - * Deserialize the content of an object from a byte array. - * - * @param serialized Byte array representation of the object - * @return deserialized object - */ - public Object deserialize( byte[] serialized ) - throws IOException - { - long number = Conversion.convertToLong( serialized ); - return new Long( number ); - } + /** + * Construct a LongSerializer. + */ + public LongSerializer() { + // no op + } + /** + * Serialize the content of an object into a byte array. + * + * @param obj + * Object to serialize + * @return a byte array representing the object's state + */ + public byte[] serialize(Object obj) throws IOException { + Long number = (Long) obj; + return Conversion.convertToByteArray(number.longValue()); + } + + /** + * Deserialize the content of an object from a byte array. + * + * @param serialized + * Byte array representation of the object + * @return deserialized object + */ + public Object deserialize(byte[] serialized) throws IOException { + long number = Conversion.convertToLong(serialized); + return new Long(number); + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/CachePolicyListener.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/CachePolicyListener.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/CachePolicyListener.java (working copy) @@ -66,30 +66,32 @@ package org.apache.hadoop.hive.ql.util.jdbm.helper; /** - * Callback interface between {@link CachePolicy} and a Cache implementation - * to notify about cached object eviction. + * Callback interface between {@link CachePolicy} and a Cache implementation to + * notify about cached object eviction. *

* Note that CachePolicy implementations typically use - * object equality when removing listeners, so concrete - * implementations of this interface should also pay attention to - * their {@link Object#equals(Object)} and {@link Object#hashCode()} - * methods. - * + * object equality when removing listeners, so concrete implementations + * of this interface should also pay attention to their + * {@link Object#equals(Object)} and {@link Object#hashCode()} methods. + * * @author Alex Boisvert - * @version $Id: CachePolicyListener.java,v 1.3 2003/11/01 13:25:41 dranatunga Exp $ + * @version $Id: CachePolicyListener.java,v 1.3 2003/11/01 13:25:41 dranatunga + * Exp $ */ public interface CachePolicyListener { - /** - * Notification that the cache this listener is attached to is evicting - * the object indicated. - * - * @param obj object being evited from cache - * @throws CacheEvictionException if this listener encountered problems - * while preparing for the specified object's eviction. For example, - * a listener may try to persist the object to disk, and encounter - * an IOException. - */ - public void cacheObjectEvicted(Object obj) throws CacheEvictionException; + /** + * Notification that the cache this listener is attached to is evicting the + * object indicated. + * + * @param obj + * object being evited from cache + * @throws CacheEvictionException + * if this listener encountered problems while preparing for the + * specified object's eviction. For example, a listener may try to + * persist the object to disk, and encounter an + * IOException. + */ + public void cacheObjectEvicted(Object obj) throws CacheEvictionException; } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/Tuple.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/Tuple.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/Tuple.java (working copy) @@ -64,76 +64,70 @@ package org.apache.hadoop.hive.ql.util.jdbm.helper; - /** * Tuple consisting of a key-value pair. - * + * * @author Alex Boisvert * @version $Id: Tuple.java,v 1.2 2001/05/19 14:02:00 boisvert Exp $ */ public final class Tuple { - /** - * Key - */ - private Object _key; + /** + * Key + */ + private Object _key; + /** + * Value + */ + private Object _value; - /** - * Value - */ - private Object _value; + /** + * Construct an empty Tuple. + */ + public Tuple() { + // empty + } + /** + * Construct a Tuple. + * + * @param key + * The key. + * @param value + * The value. + */ + public Tuple(Object key, Object value) { + _key = key; + _value = value; + } - /** - * Construct an empty Tuple. - */ - public Tuple() { - // empty - } + /** + * Get the key. + */ + public Object getKey() { + return _key; + } + /** + * Set the key. + */ + public void setKey(Object key) { + _key = key; + } - /** - * Construct a Tuple. - * - * @param key The key. - * @param value The value. - */ - public Tuple( Object key, Object value ) { - _key = key; - _value = value; - } + /** + * Get the value. + */ + public Object getValue() { + return _value; + } + /** + * Set the value. + */ + public void setValue(Object value) { + _value = value; + } - /** - * Get the key. - */ - public Object getKey() { - return _key; - } - - - /** - * Set the key. - */ - public void setKey( Object key ) { - _key = key; - } - - - /** - * Get the value. - */ - public Object getValue() { - return _value; - } - - - /** - * Set the value. - */ - public void setValue( Object value ) { - _value = value; - } - } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/ByteArrayComparator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/ByteArrayComparator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/ByteArrayComparator.java (working copy) @@ -64,89 +64,86 @@ package org.apache.hadoop.hive.ql.util.jdbm.helper; -import java.util.Comparator; import java.io.Serializable; +import java.util.Comparator; /** * Comparator for byte arrays. - * + * * @author Alex Boisvert - * @version $Id: ByteArrayComparator.java,v 1.4 2002/05/31 06:33:20 boisvert Exp $ + * @version $Id: ByteArrayComparator.java,v 1.4 2002/05/31 06:33:20 boisvert Exp + * $ */ -public final class ByteArrayComparator - implements Comparator, Serializable -{ +public final class ByteArrayComparator implements Comparator, Serializable { - /** - * Version id for serialization. - */ - final static long serialVersionUID = 1L; + /** + * Version id for serialization. + */ + final static long serialVersionUID = 1L; + /** + * Compare two objects. + * + * @param obj1 + * First object + * @param obj2 + * Second object + * @return a positive integer if obj1 > obj2, 0 if obj1 == obj2, and a + * negative integer if obj1 < obj2 + */ + public int compare(Object obj1, Object obj2) { + if (obj1 == null) { + throw new IllegalArgumentException("Argument 'obj1' is null"); + } - /** - * Compare two objects. - * - * @param obj1 First object - * @param obj2 Second object - * @return a positive integer if obj1 > obj2, 0 if obj1 == obj2, - * and a negative integer if obj1 < obj2 - */ - public int compare( Object obj1, Object obj2 ) - { - if ( obj1 == null ) { - throw new IllegalArgumentException( "Argument 'obj1' is null" ); - } + if (obj2 == null) { + throw new IllegalArgumentException("Argument 'obj2' is null"); + } - if ( obj2 == null ) { - throw new IllegalArgumentException( "Argument 'obj2' is null" ); - } + return compareByteArray((byte[]) obj1, (byte[]) obj2); + } - return compareByteArray( (byte[]) obj1, (byte[]) obj2 ); - } + /** + * Compare two byte arrays. + */ + public static int compareByteArray(byte[] thisKey, byte[] otherKey) { + int len = Math.min(thisKey.length, otherKey.length); - - /** - * Compare two byte arrays. - */ - public static int compareByteArray( byte[] thisKey, byte[] otherKey ) - { - int len = Math.min( thisKey.length, otherKey.length ); - - // compare the byte arrays - for ( int i=0; i= 0 ) { - if ( otherKey[i] >= 0 ) { - // both positive - if ( thisKey[i] < otherKey[i] ) { - return -1; - } else if ( thisKey[i] > otherKey[i] ) { - return 1; - } - } else { - // otherKey is negative => greater (because MSB is 1) - return -1; - } - } else { - if ( otherKey[i] >= 0 ) { - // thisKey is negative => greater (because MSB is 1) - return 1; - } else { - // both negative - if ( thisKey[i] < otherKey[i] ) { - return -1; - } else if ( thisKey[i] > otherKey[i] ) { - return 1; - } - } - } + // compare the byte arrays + for (int i = 0; i < len; i++) { + if (thisKey[i] >= 0) { + if (otherKey[i] >= 0) { + // both positive + if (thisKey[i] < otherKey[i]) { + return -1; + } else if (thisKey[i] > otherKey[i]) { + return 1; + } + } else { + // otherKey is negative => greater (because MSB is 1) + return -1; } - if ( thisKey.length == otherKey.length) { - return 0; - } - if ( thisKey.length < otherKey.length ) { + } else { + if (otherKey[i] >= 0) { + // thisKey is negative => greater (because MSB is 1) + return 1; + } else { + // both negative + if (thisKey[i] < otherKey[i]) { return -1; + } else if (thisKey[i] > otherKey[i]) { + return 1; + } } - return 1; + } } + if (thisKey.length == otherKey.length) { + return 0; + } + if (thisKey.length < otherKey.length) { + return -1; + } + return 1; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/ByteArraySerializer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/ByteArraySerializer.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/ByteArraySerializer.java (working copy) @@ -66,55 +66,48 @@ import java.io.IOException; - /** - * Serializer for byte arrays -- simple returns the byte array itself. No actual + * Serializer for byte arrays -- simple returns the byte array itself. No actual * serialization is performed. - * + * * @author Alex Boisvert - * @version $Id: ByteArraySerializer.java,v 1.1 2003/03/21 02:48:42 boisvert Exp $ + * @version $Id: ByteArraySerializer.java,v 1.1 2003/03/21 02:48:42 boisvert Exp + * $ */ -public final class ByteArraySerializer - implements Serializer -{ +public final class ByteArraySerializer implements Serializer { - /** - * Version id for serialization. - */ - final static long serialVersionUID = 1L; + /** + * Version id for serialization. + */ + final static long serialVersionUID = 1L; + /** + * Static instance. + */ + public static final ByteArraySerializer INSTANCE = new ByteArraySerializer(); - /** - * Static instance. - */ - public static final ByteArraySerializer INSTANCE = new ByteArraySerializer(); - - - /** - * Serialize the content of an object into a byte array. - * - * @param obj Object to serialize - * @return a byte array representing the object's state - * - */ - public byte[] serialize( Object obj ) - throws IOException - { - return (byte[]) obj; - } + /** + * Serialize the content of an object into a byte array. + * + * @param obj + * Object to serialize + * @return a byte array representing the object's state + * + */ + public byte[] serialize(Object obj) throws IOException { + return (byte[]) obj; + } - - /** - * Deserialize the content of an object from a byte array. - * - * @param serialized Byte array representation of the object - * @return deserialized object - * - */ - public Object deserialize( byte[] serialized ) - throws IOException - { - return serialized; - } + /** + * Deserialize the content of an object from a byte array. + * + * @param serialized + * Byte array representation of the object + * @return deserialized object + * + */ + public Object deserialize(byte[] serialized) throws IOException { + return serialized; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/TupleBrowser.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/TupleBrowser.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/TupleBrowser.java (working copy) @@ -67,33 +67,32 @@ import java.io.IOException; /** - * Browser to traverse a collection of tuples. The browser allows for - * forward and reverse order traversal. - * + * Browser to traverse a collection of tuples. The browser allows for forward + * and reverse order traversal. + * * @author Alex Boisvert * @version $Id: TupleBrowser.java,v 1.2 2001/05/19 14:02:00 boisvert Exp $ */ public abstract class TupleBrowser { - /** - * Get the next tuple. - * - * @param tuple Tuple into which values are copied. - * @return True if values have been copied in tuple, or false if there is - * no next tuple. - */ - public abstract boolean getNext( Tuple tuple ) - throws IOException; + /** + * Get the next tuple. + * + * @param tuple + * Tuple into which values are copied. + * @return True if values have been copied in tuple, or false if there is no + * next tuple. + */ + public abstract boolean getNext(Tuple tuple) throws IOException; + /** + * Get the previous tuple. + * + * @param tuple + * Tuple into which values are copied. + * @return True if values have been copied in tuple, or false if there is no + * previous tuple. + */ + public abstract boolean getPrevious(Tuple tuple) throws IOException; - /** - * Get the previous tuple. - * - * @param tuple Tuple into which values are copied. - * @return True if values have been copied in tuple, or false if there is - * no previous tuple. - */ - public abstract boolean getPrevious( Tuple tuple ) - throws IOException; - } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/StringComparator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/StringComparator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/helper/StringComparator.java (working copy) @@ -68,40 +68,38 @@ import java.util.Comparator; /** - * Comparator for String objects. Delegates to String.compareTo(). - * + * Comparator for String objects. Delegates to String.compareTo(). + * * @author Alex Boisvert * @version $Id: StringComparator.java,v 1.5 2005/06/25 23:12:31 doomdark Exp $ */ -public final class StringComparator - implements Comparator, Serializable -{ +public final class StringComparator implements Comparator, Serializable { - /** - * Version id for serialization. - */ - final static long serialVersionUID = 1L; + /** + * Version id for serialization. + */ + final static long serialVersionUID = 1L; + /** + * Compare two objects. + * + * @param obj1 + * First object + * @param obj2 + * Second object + * @return a positive integer if obj1 > obj2, 0 if obj1 == obj2, and a + * negative integer if obj1 < obj2 + */ + public int compare(Object obj1, Object obj2) { + if (obj1 == null) { + throw new IllegalArgumentException("Argument 'obj1' is null"); + } - /** - * Compare two objects. - * - * @param obj1 First object - * @param obj2 Second object - * @return a positive integer if obj1 > obj2, 0 if obj1 == obj2, - * and a negative integer if obj1 < obj2 - */ - public int compare( Object obj1, Object obj2 ) - { - if ( obj1 == null ) { - throw new IllegalArgumentException( "Argument 'obj1' is null" ); - } + if (obj2 == null) { + throw new IllegalArgumentException("Argument 'obj2' is null"); + } - if ( obj2 == null ) { - throw new IllegalArgumentException( "Argument 'obj2' is null" ); - } + return ((String) obj1).compareTo((String) obj2); + } - return ( (String) obj1 ).compareTo((String) obj2 ); - } - } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/RecordManagerFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/RecordManagerFactory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/RecordManagerFactory.java (working copy) @@ -66,82 +66,82 @@ package org.apache.hadoop.hive.ql.util.jdbm; +import java.io.File; import java.io.IOException; -import java.io.File; import java.util.Properties; /** * This is the factory class to use for instantiating {@link RecordManager} * instances. - * + * * @author Alex Boisvert * @author Cees de Groot - * @version $Id: RecordManagerFactory.java,v 1.2 2005/06/25 23:12:31 doomdark Exp $ + * @version $Id: RecordManagerFactory.java,v 1.2 2005/06/25 23:12:31 doomdark + * Exp $ */ -public final class RecordManagerFactory -{ +public final class RecordManagerFactory { - /** - * Create a record manager. - * - * @param name Name of the record file. - * @throws IOException if an I/O related exception occurs while creating - * or opening the record manager. - * @throws UnsupportedOperationException if some options are not supported by the - * implementation. - * @throws IllegalArgumentException if some options are invalid. - */ - public static RecordManager createRecordManager( String name ) - throws IOException - { - return createRecordManager( name, new Properties() ); - } + /** + * Create a record manager. + * + * @param name + * Name of the record file. + * @throws IOException + * if an I/O related exception occurs while creating or opening the + * record manager. + * @throws UnsupportedOperationException + * if some options are not supported by the implementation. + * @throws IllegalArgumentException + * if some options are invalid. + */ + public static RecordManager createRecordManager(String name) + throws IOException { + return createRecordManager(name, new Properties()); + } + /** + * Create a record manager. + * + * @param name + * Name of the record file. + * @param options + * Record manager options. + * @throws IOException + * if an I/O related exception occurs while creating or opening the + * record manager. + * @throws UnsupportedOperationException + * if some options are not supported by the implementation. + * @throws IllegalArgumentException + * if some options are invalid. + */ + public static RecordManager createRecordManager(String name, + Properties options) throws IOException { + RecordManagerProvider factory = getFactory(options); + return factory.createRecordManager(name, options); + } - /** - * Create a record manager. - * - * @param name Name of the record file. - * @param options Record manager options. - * @throws IOException if an I/O related exception occurs while creating - * or opening the record manager. - * @throws UnsupportedOperationException if some options are not supported by the - * implementation. - * @throws IllegalArgumentException if some options are invalid. - */ - public static RecordManager createRecordManager( String name, - Properties options ) - throws IOException - { - RecordManagerProvider factory = getFactory(options); - return factory.createRecordManager( name, options ); - } - - public static RecordManager createRecordManager( File file, Properties options) - throws IOException - { - RecordManagerProvider factory = getFactory(options); - return factory.createRecordManager( file, options ); - } - - private static RecordManagerProvider getFactory(Properties options) { - String provider; - Class clazz; - RecordManagerProvider factory; + public static RecordManager createRecordManager(File file, Properties options) + throws IOException { + RecordManagerProvider factory = getFactory(options); + return factory.createRecordManager(file, options); + } - provider = options.getProperty( RecordManagerOptions.PROVIDER_FACTORY, - "org.apache.hadoop.hive.ql.util.jdbm.recman.Provider" ); + private static RecordManagerProvider getFactory(Properties options) { + String provider; + Class clazz; + RecordManagerProvider factory; - try { - clazz = Class.forName( provider ); - factory = (RecordManagerProvider) clazz.newInstance(); - } catch ( Exception except ) { - throw new IllegalArgumentException( "Invalid record manager provider: " - + provider - + "\n[" + except.getClass().getName() - + ": " + except.getMessage() - + "]" ); - } - return factory; + provider = options.getProperty(RecordManagerOptions.PROVIDER_FACTORY, + "org.apache.hadoop.hive.ql.util.jdbm.recman.Provider"); + + try { + clazz = Class.forName(provider); + factory = (RecordManagerProvider) clazz.newInstance(); + } catch (Exception except) { + throw new IllegalArgumentException("Invalid record manager provider: " + + provider + "\n[" + except.getClass().getName() + ": " + + except.getMessage() + "]"); } + return factory; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/RecordManagerProvider.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/RecordManagerProvider.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/RecordManagerProvider.java (working copy) @@ -66,36 +66,38 @@ package org.apache.hadoop.hive.ql.util.jdbm; +import java.io.File; import java.io.IOException; -import java.io.File; import java.util.Properties; /** - * Provider of RecordManager implementation. Classes implementing this - * interface act as a factory to provide implementations of RecordManager. - * + * Provider of RecordManager implementation. Classes implementing this interface + * act as a factory to provide implementations of RecordManager. + * * @author Alex Boisvert - * @version $Id: RecordManagerProvider.java,v 1.2 2005/06/25 23:12:31 doomdark Exp $ + * @version $Id: RecordManagerProvider.java,v 1.2 2005/06/25 23:12:31 doomdark + * Exp $ */ -public interface RecordManagerProvider -{ +public interface RecordManagerProvider { - /** - * Create a record manager. - * - * @param filename Base filename of the record file. - * @param options Record manager options. - * @throws IOException if an I/O related exception occurs while creating - * or opening the record manager. - * @throws UnsupportedOperationException if some options are not supported by the - * implementation. - * @throws IllegalArgumentException if some options are invalid. - */ - public RecordManager createRecordManager( String filename, - Properties options ) - throws IOException; - - public RecordManager createRecordManager( File file, - Properties options ) - throws IOException; + /** + * Create a record manager. + * + * @param filename + * Base filename of the record file. + * @param options + * Record manager options. + * @throws IOException + * if an I/O related exception occurs while creating or opening the + * record manager. + * @throws UnsupportedOperationException + * if some options are not supported by the implementation. + * @throws IllegalArgumentException + * if some options are invalid. + */ + public RecordManager createRecordManager(String filename, Properties options) + throws IOException; + + public RecordManager createRecordManager(File file, Properties options) + throws IOException; } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/RecordCache.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/RecordCache.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/RecordCache.java (working copy) @@ -68,31 +68,31 @@ import java.io.IOException; /** - * This interface is used for synchronization. - *

- * RecordManager ensures that the cache has the up-to-date information - * by way of an invalidation protocol. + * This interface is used for synchronization. + *

+ * RecordManager ensures that the cache has the up-to-date information by way of + * an invalidation protocol. */ public interface RecordCache { - /** - * Notification to flush content related to a given record. - */ - public void flush(long recid) throws IOException; + /** + * Notification to flush content related to a given record. + */ + public void flush(long recid) throws IOException; - /** - * Notification to flush data all of records. - */ - public void flushAll() throws IOException; + /** + * Notification to flush data all of records. + */ + public void flushAll() throws IOException; - /** - * Notification to invalidate content related to given record. - */ - public void invalidate(long recid) throws IOException; + /** + * Notification to invalidate content related to given record. + */ + public void invalidate(long recid) throws IOException; - /** - * Notification to invalidate content of all records. - */ - public void invalidateAll() throws IOException; + /** + * Notification to invalidate content of all records. + */ + public void invalidateAll() throws IOException; } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BaseRecordManager.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BaseRecordManager.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BaseRecordManager.java (working copy) @@ -66,456 +66,411 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; -import java.io.IOException; import java.io.File; - +import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.apache.hadoop.hive.ql.util.jdbm.RecordManager; -import org.apache.hadoop.hive.ql.util.jdbm.helper.Serializer; import org.apache.hadoop.hive.ql.util.jdbm.helper.DefaultSerializer; +import org.apache.hadoop.hive.ql.util.jdbm.helper.Serializer; /** - * This class manages records, which are uninterpreted blobs of data. The - * set of operations is simple and straightforward: you communicate with - * the class using long "rowids" and byte[] data blocks. Rowids are returned - * on inserts and you can stash them away someplace safe to be able to get - * back to them. Data blocks can be as long as you wish, and may have - * lengths different from the original when updating. - *

- * Operations are synchronized, so that only one of them will happen - * concurrently even if you hammer away from multiple threads. Operations - * are made atomic by keeping a transaction log which is recovered after - * a crash, so the operations specified by this interface all have ACID - * properties. - *

- * You identify a file by just the name. The package attaches .db - * for the database file, and .lg for the transaction log. The - * transaction log is synchronized regularly and then restarted, so don't - * worry if you see the size going up and down. - * + * This class manages records, which are uninterpreted blobs of data. The set of + * operations is simple and straightforward: you communicate with the class + * using long "rowids" and byte[] data blocks. Rowids are returned on inserts + * and you can stash them away someplace safe to be able to get back to them. + * Data blocks can be as long as you wish, and may have lengths different from + * the original when updating. + *

+ * Operations are synchronized, so that only one of them will happen + * concurrently even if you hammer away from multiple threads. Operations are + * made atomic by keeping a transaction log which is recovered after a crash, so + * the operations specified by this interface all have ACID properties. + *

+ * You identify a file by just the name. The package attaches .db for + * the database file, and .lg for the transaction log. The transaction + * log is synchronized regularly and then restarted, so don't worry if you see + * the size going up and down. + * * @author Alex Boisvert * @author Cees de Groot * @version $Id: BaseRecordManager.java,v 1.8 2005/06/25 23:12:32 doomdark Exp $ */ -public final class BaseRecordManager - implements RecordManager -{ +public final class BaseRecordManager implements RecordManager { - /** - * Underlying record file. - */ - private RecordFile _file; + /** + * Underlying record file. + */ + private RecordFile _file; + /** + * Physical row identifier manager. + */ + private final PhysicalRowIdManager _physMgr; - /** - * Physical row identifier manager. - */ - private PhysicalRowIdManager _physMgr; + /** + * Logigal to Physical row identifier manager. + */ + private final LogicalRowIdManager _logMgr; + /** + * Page manager. + */ + private PageManager _pageman; - /** - * Logigal to Physical row identifier manager. - */ - private LogicalRowIdManager _logMgr; + /** + * Reserved slot for name directory. + */ + public static final int NAME_DIRECTORY_ROOT = 0; + /** + * Static debugging flag + */ + public static final boolean DEBUG = false; - /** - * Page manager. - */ - private PageManager _pageman; + /** + * Directory of named JDBMHashtables. This directory is a persistent + * directory, stored as a Hashtable. It can be retrived by using the + * NAME_DIRECTORY_ROOT. + */ + private Map _nameDirectory; + /** + * Creates a record manager for the indicated file + * + * @throws IOException + * when the file cannot be opened or is not a valid file + * content-wise. + */ + public BaseRecordManager(String filename) throws IOException { + _file = new RecordFile(filename); + _pageman = new PageManager(_file); + _physMgr = new PhysicalRowIdManager(_file, _pageman); + _logMgr = new LogicalRowIdManager(_file, _pageman); + } - /** - * Reserved slot for name directory. - */ - public static final int NAME_DIRECTORY_ROOT = 0; + /** + * Creates a record manager for the indicated file + * + * @throws IOException + * when the file cannot be opened or is not a valid file + * content-wise. + */ + public BaseRecordManager(File file) throws IOException { + _file = new RecordFile(file); + _pageman = new PageManager(_file); + _physMgr = new PhysicalRowIdManager(_file, _pageman); + _logMgr = new LogicalRowIdManager(_file, _pageman); + } + /** + * Get the underlying Transaction Manager + */ + public synchronized TransactionManager getTransactionManager() { + checkIfClosed(); - /** - * Static debugging flag - */ - public static final boolean DEBUG = false; + return _file.txnMgr; + } - - /** - * Directory of named JDBMHashtables. This directory is a persistent - * directory, stored as a Hashtable. It can be retrived by using - * the NAME_DIRECTORY_ROOT. - */ - private Map _nameDirectory; + /** + * Switches off transactioning for the record manager. This means that a) a + * transaction log is not kept, and b) writes aren't synch'ed after every + * update. This is useful when batch inserting into a new database. + *

+ * Only call this method directly after opening the file, otherwise the + * results will be undefined. + */ + public synchronized void disableTransactions() { + checkIfClosed(); + _file.disableTransactions(); + } - /** - * Creates a record manager for the indicated file - * - * @throws IOException when the file cannot be opened or is not - * a valid file content-wise. - */ - public BaseRecordManager( String filename ) - throws IOException - { - _file = new RecordFile( filename ); - _pageman = new PageManager( _file ); - _physMgr = new PhysicalRowIdManager( _file, _pageman ); - _logMgr = new LogicalRowIdManager( _file, _pageman ); - } - - /** - * Creates a record manager for the indicated file - * - * @throws IOException when the file cannot be opened or is not - * a valid file content-wise. - */ - public BaseRecordManager( File file ) - throws IOException - { - _file = new RecordFile( file ); - _pageman = new PageManager( _file ); - _physMgr = new PhysicalRowIdManager( _file, _pageman ); - _logMgr = new LogicalRowIdManager( _file, _pageman ); - } - - + /** + * Closes the record manager. + * + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public synchronized void close() throws IOException { + checkIfClosed(); + _pageman.close(); + _pageman = null; - /** - * Get the underlying Transaction Manager - */ - public synchronized TransactionManager getTransactionManager() - { - checkIfClosed(); + _file.close(); + _file = null; + } - return _file.txnMgr; - } + /** + * Inserts a new record using standard java object serialization. + * + * @param obj + * the object for the new record. + * @return the rowid for the new record. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public long insert(Object obj) throws IOException { + return insert(obj, DefaultSerializer.INSTANCE); + } + /** + * Inserts a new record using a custom serializer. + * + * @param obj + * the object for the new record. + * @param serializer + * a custom serializer + * @return the rowid for the new record. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public synchronized long insert(Object obj, Serializer serializer) + throws IOException { + byte[] data; + long recid; + Location physRowId; - /** - * Switches off transactioning for the record manager. This means - * that a) a transaction log is not kept, and b) writes aren't - * synch'ed after every update. This is useful when batch inserting - * into a new database. - *

- * Only call this method directly after opening the file, otherwise - * the results will be undefined. - */ - public synchronized void disableTransactions() - { - checkIfClosed(); + checkIfClosed(); - _file.disableTransactions(); + data = serializer.serialize(obj); + physRowId = _physMgr.insert(data, 0, data.length); + recid = _logMgr.insert(physRowId).toLong(); + if (DEBUG) { + System.out.println("BaseRecordManager.insert() recid " + recid + + " length " + data.length); } + return recid; + } - - /** - * Closes the record manager. - * - * @throws IOException when one of the underlying I/O operations fails. - */ - public synchronized void close() - throws IOException - { - checkIfClosed(); - - _pageman.close(); - _pageman = null; - - _file.close(); - _file = null; + /** + * Deletes a record. + * + * @param recid + * the rowid for the record that should be deleted. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public synchronized void delete(long recid) throws IOException { + checkIfClosed(); + if (recid <= 0) { + throw new IllegalArgumentException("Argument 'recid' is invalid: " + + recid); } - - /** - * Inserts a new record using standard java object serialization. - * - * @param obj the object for the new record. - * @return the rowid for the new record. - * @throws IOException when one of the underlying I/O operations fails. - */ - public long insert( Object obj ) - throws IOException - { - return insert( obj, DefaultSerializer.INSTANCE ); + if (DEBUG) { + System.out.println("BaseRecordManager.delete() recid " + recid); } - - /** - * Inserts a new record using a custom serializer. - * - * @param obj the object for the new record. - * @param serializer a custom serializer - * @return the rowid for the new record. - * @throws IOException when one of the underlying I/O operations fails. - */ - public synchronized long insert( Object obj, Serializer serializer ) - throws IOException - { - byte[] data; - long recid; - Location physRowId; - - checkIfClosed(); + Location logRowId = new Location(recid); + Location physRowId = _logMgr.fetch(logRowId); + _physMgr.delete(physRowId); + _logMgr.delete(logRowId); + } - data = serializer.serialize( obj ); - physRowId = _physMgr.insert( data, 0, data.length ); - recid = _logMgr.insert( physRowId ).toLong(); - if ( DEBUG ) { - System.out.println( "BaseRecordManager.insert() recid " + recid + " length " + data.length ) ; - } - return recid; - } + /** + * Updates a record using standard java object serialization. + * + * @param recid + * the recid for the record that is to be updated. + * @param obj + * the new object for the record. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public void update(long recid, Object obj) throws IOException { + update(recid, obj, DefaultSerializer.INSTANCE); + } - /** - * Deletes a record. - * - * @param recid the rowid for the record that should be deleted. - * @throws IOException when one of the underlying I/O operations fails. - */ - public synchronized void delete( long recid ) - throws IOException - { - checkIfClosed(); - if ( recid <= 0 ) { - throw new IllegalArgumentException( "Argument 'recid' is invalid: " - + recid ); - } - - if ( DEBUG ) { - System.out.println( "BaseRecordManager.delete() recid " + recid ) ; - } - - Location logRowId = new Location( recid ); - Location physRowId = _logMgr.fetch( logRowId ); - _physMgr.delete( physRowId ); - _logMgr.delete( logRowId ); + /** + * Updates a record using a custom serializer. + * + * @param recid + * the recid for the record that is to be updated. + * @param obj + * the new object for the record. + * @param serializer + * a custom serializer + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public synchronized void update(long recid, Object obj, Serializer serializer) + throws IOException { + checkIfClosed(); + if (recid <= 0) { + throw new IllegalArgumentException("Argument 'recid' is invalid: " + + recid); } + Location logRecid = new Location(recid); + Location physRecid = _logMgr.fetch(logRecid); - /** - * Updates a record using standard java object serialization. - * - * @param recid the recid for the record that is to be updated. - * @param obj the new object for the record. - * @throws IOException when one of the underlying I/O operations fails. - */ - public void update( long recid, Object obj ) - throws IOException - { - update( recid, obj, DefaultSerializer.INSTANCE ); + byte[] data = serializer.serialize(obj); + if (DEBUG) { + System.out.println("BaseRecordManager.update() recid " + recid + + " length " + data.length); } - - /** - * Updates a record using a custom serializer. - * - * @param recid the recid for the record that is to be updated. - * @param obj the new object for the record. - * @param serializer a custom serializer - * @throws IOException when one of the underlying I/O operations fails. - */ - public synchronized void update( long recid, Object obj, Serializer serializer ) - throws IOException - { - checkIfClosed(); - if ( recid <= 0 ) { - throw new IllegalArgumentException( "Argument 'recid' is invalid: " - + recid ); - } - - Location logRecid = new Location( recid ); - Location physRecid = _logMgr.fetch( logRecid ); - - byte[] data = serializer.serialize( obj ); - if ( DEBUG ) { - System.out.println( "BaseRecordManager.update() recid " + recid + " length " + data.length ) ; - } - - Location newRecid = _physMgr.update( physRecid, data, 0, data.length ); - if ( ! newRecid.equals( physRecid ) ) { - _logMgr.update( logRecid, newRecid ); - } + Location newRecid = _physMgr.update(physRecid, data, 0, data.length); + if (!newRecid.equals(physRecid)) { + _logMgr.update(logRecid, newRecid); } + } + /** + * Fetches a record using standard java object serialization. + * + * @param recid + * the recid for the record that must be fetched. + * @return the object contained in the record. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public Object fetch(long recid) throws IOException { + return fetch(recid, DefaultSerializer.INSTANCE); + } - /** - * Fetches a record using standard java object serialization. - * - * @param recid the recid for the record that must be fetched. - * @return the object contained in the record. - * @throws IOException when one of the underlying I/O operations fails. - */ - public Object fetch( long recid ) - throws IOException - { - return fetch( recid, DefaultSerializer.INSTANCE ); - } + /** + * Fetches a record using a custom serializer. + * + * @param recid + * the recid for the record that must be fetched. + * @param serializer + * a custom serializer + * @return the object contained in the record. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public synchronized Object fetch(long recid, Serializer serializer) + throws IOException { + byte[] data; - - /** - * Fetches a record using a custom serializer. - * - * @param recid the recid for the record that must be fetched. - * @param serializer a custom serializer - * @return the object contained in the record. - * @throws IOException when one of the underlying I/O operations fails. - */ - public synchronized Object fetch( long recid, Serializer serializer ) - throws IOException - { - byte[] data; - - checkIfClosed(); - if ( recid <= 0 ) { - throw new IllegalArgumentException( "Argument 'recid' is invalid: " - + recid ); - } - data = _physMgr.fetch( _logMgr.fetch( new Location( recid ) ) ); - if ( DEBUG ) { - System.out.println( "BaseRecordManager.fetch() recid " + recid + " length " + data.length ) ; - } - return serializer.deserialize( data ); + checkIfClosed(); + if (recid <= 0) { + throw new IllegalArgumentException("Argument 'recid' is invalid: " + + recid); } - - - /** - * Returns the number of slots available for "root" rowids. These slots - * can be used to store special rowids, like rowids that point to - * other rowids. Root rowids are useful for bootstrapping access to - * a set of data. - */ - public int getRootCount() - { - return FileHeader.NROOTS; + data = _physMgr.fetch(_logMgr.fetch(new Location(recid))); + if (DEBUG) { + System.out.println("BaseRecordManager.fetch() recid " + recid + + " length " + data.length); } + return serializer.deserialize(data); + } - /** - * Returns the indicated root rowid. - * - * @see #getRootCount - */ - public synchronized long getRoot( int id ) - throws IOException - { - checkIfClosed(); + /** + * Returns the number of slots available for "root" rowids. These slots can be + * used to store special rowids, like rowids that point to other rowids. Root + * rowids are useful for bootstrapping access to a set of data. + */ + public int getRootCount() { + return FileHeader.NROOTS; + } - return _pageman.getFileHeader().getRoot( id ); - } + /** + * Returns the indicated root rowid. + * + * @see #getRootCount + */ + public synchronized long getRoot(int id) throws IOException { + checkIfClosed(); + return _pageman.getFileHeader().getRoot(id); + } - /** - * Sets the indicated root rowid. - * - * @see #getRootCount - */ - public synchronized void setRoot( int id, long rowid ) - throws IOException - { - checkIfClosed(); + /** + * Sets the indicated root rowid. + * + * @see #getRootCount + */ + public synchronized void setRoot(int id, long rowid) throws IOException { + checkIfClosed(); - _pageman.getFileHeader().setRoot( id, rowid ); - } + _pageman.getFileHeader().setRoot(id, rowid); + } + /** + * Obtain the record id of a named object. Returns 0 if named object doesn't + * exist. + */ + public long getNamedObject(String name) throws IOException { + checkIfClosed(); - /** - * Obtain the record id of a named object. Returns 0 if named object - * doesn't exist. - */ - public long getNamedObject( String name ) - throws IOException - { - checkIfClosed(); - - Map nameDirectory = getNameDirectory(); - Long recid = (Long) nameDirectory.get( name ); - if ( recid == null ) { - return 0; - } - return recid.longValue(); + Map nameDirectory = getNameDirectory(); + Long recid = (Long) nameDirectory.get(name); + if (recid == null) { + return 0; } + return recid.longValue(); + } - /** - * Set the record id of a named object. - */ - public void setNamedObject( String name, long recid ) - throws IOException - { - checkIfClosed(); + /** + * Set the record id of a named object. + */ + public void setNamedObject(String name, long recid) throws IOException { + checkIfClosed(); - Map nameDirectory = getNameDirectory(); - if ( recid == 0 ) { - // remove from hashtable - nameDirectory.remove( name ); - } else { - nameDirectory.put( name, new Long( recid ) ); - } - saveNameDirectory( nameDirectory ); + Map nameDirectory = getNameDirectory(); + if (recid == 0) { + // remove from hashtable + nameDirectory.remove(name); + } else { + nameDirectory.put(name, new Long(recid)); } + saveNameDirectory(nameDirectory); + } + /** + * Commit (make persistent) all changes since beginning of transaction. + */ + public synchronized void commit() throws IOException { + checkIfClosed(); - /** - * Commit (make persistent) all changes since beginning of transaction. - */ - public synchronized void commit() - throws IOException - { - checkIfClosed(); + _pageman.commit(); + } - _pageman.commit(); - } + /** + * Rollback (cancel) all changes since beginning of transaction. + */ + public synchronized void rollback() throws IOException { + checkIfClosed(); + _pageman.rollback(); + } - /** - * Rollback (cancel) all changes since beginning of transaction. - */ - public synchronized void rollback() - throws IOException - { - checkIfClosed(); - - _pageman.rollback(); + /** + * Load name directory + */ + private Map getNameDirectory() throws IOException { + // retrieve directory of named hashtable + long nameDirectory_recid = getRoot(NAME_DIRECTORY_ROOT); + if (nameDirectory_recid == 0) { + _nameDirectory = new HashMap(); + nameDirectory_recid = insert(_nameDirectory); + setRoot(NAME_DIRECTORY_ROOT, nameDirectory_recid); + } else { + _nameDirectory = (Map) fetch(nameDirectory_recid); } + return _nameDirectory; + } - - /** - * Load name directory - */ - private Map getNameDirectory() - throws IOException - { - // retrieve directory of named hashtable - long nameDirectory_recid = getRoot( NAME_DIRECTORY_ROOT ); - if ( nameDirectory_recid == 0 ) { - _nameDirectory = new HashMap(); - nameDirectory_recid = insert( _nameDirectory ); - setRoot( NAME_DIRECTORY_ROOT, nameDirectory_recid ); - } else { - _nameDirectory = (Map) fetch( nameDirectory_recid ); - } - return _nameDirectory; + private void saveNameDirectory(Map directory) throws IOException { + long recid = getRoot(NAME_DIRECTORY_ROOT); + if (recid == 0) { + throw new IOException("Name directory must exist"); } + update(recid, _nameDirectory); + } - - private void saveNameDirectory( Map directory ) - throws IOException - { - long recid = getRoot( NAME_DIRECTORY_ROOT ); - if ( recid == 0 ) { - throw new IOException( "Name directory must exist" ); - } - update( recid, _nameDirectory ); + /** + * Check if RecordManager has been closed. If so, throw an + * IllegalStateException. + */ + private void checkIfClosed() throws IllegalStateException { + if (_file == null) { + throw new IllegalStateException("RecordManager has been closed"); } - - - /** - * Check if RecordManager has been closed. If so, throw an - * IllegalStateException. - */ - private void checkIfClosed() - throws IllegalStateException - { - if ( _file == null ) { - throw new IllegalStateException( "RecordManager has been closed" ); - } - } + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/DataPage.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/DataPage.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/DataPage.java (working copy) @@ -66,44 +66,44 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; /** - * Class describing a page that holds data. + * Class describing a page that holds data. */ final class DataPage extends PageHeader { - // offsets - private static final short O_FIRST = PageHeader.SIZE; // short firstrowid - static final short O_DATA = (short)(O_FIRST + Magic.SZ_SHORT); - static final short DATA_PER_PAGE = (short)(RecordFile.BLOCK_SIZE - O_DATA); + // offsets + private static final short O_FIRST = PageHeader.SIZE; // short firstrowid + static final short O_DATA = (short) (O_FIRST + Magic.SZ_SHORT); + static final short DATA_PER_PAGE = (short) (RecordFile.BLOCK_SIZE - O_DATA); - /** - * Constructs a data page view from the indicated block. - */ - DataPage(BlockIo block) { - super(block); - } + /** + * Constructs a data page view from the indicated block. + */ + DataPage(BlockIo block) { + super(block); + } - /** - * Factory method to create or return a data page for the - * indicated block. - */ - static DataPage getDataPageView(BlockIo block) { - BlockView view = block.getView(); - if (view != null && view instanceof DataPage) + /** + * Factory method to create or return a data page for the indicated block. + */ + static DataPage getDataPageView(BlockIo block) { + BlockView view = block.getView(); + if (view != null && view instanceof DataPage) { return (DataPage) view; - else + } else { return new DataPage(block); } + } - /** Returns the first rowid's offset */ - short getFirst() { - return block.readShort(O_FIRST); + /** Returns the first rowid's offset */ + short getFirst() { + return block.readShort(O_FIRST); + } + + /** Sets the first rowid's offset */ + void setFirst(short value) { + paranoiaMagicOk(); + if (value > 0 && value < O_DATA) { + throw new Error("DataPage.setFirst: offset " + value + " too small"); } - - /** Sets the first rowid's offset */ - void setFirst(short value) { - paranoiaMagicOk(); - if (value > 0 && value < O_DATA) - throw new Error("DataPage.setFirst: offset " + value - + " too small"); - block.writeShort(O_FIRST, value); - } + block.writeShort(O_FIRST, value); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/PageManager.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/PageManager.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/PageManager.java (working copy) @@ -65,226 +65,225 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; -import java.io.*; +import java.io.IOException; /** - * This class manages the linked lists of pages that make up a file. + * This class manages the linked lists of pages that make up a file. */ final class PageManager { - // our record file - private RecordFile file; - // header data - private FileHeader header; - private BlockIo headerBuf; - - /** - * Creates a new page manager using the indicated record file. - */ - PageManager(RecordFile file) throws IOException { - this.file = file; - - // check the file header. If the magic is 0, we assume a new - // file. Note that we hold on to the file header node. - headerBuf = file.get(0); - if (headerBuf.readShort(0) == 0) - header = new FileHeader(headerBuf, true); - else - header = new FileHeader(headerBuf, false); + // our record file + private RecordFile file; + // header data + private FileHeader header; + private BlockIo headerBuf; + + /** + * Creates a new page manager using the indicated record file. + */ + PageManager(RecordFile file) throws IOException { + this.file = file; + + // check the file header. If the magic is 0, we assume a new + // file. Note that we hold on to the file header node. + headerBuf = file.get(0); + if (headerBuf.readShort(0) == 0) { + header = new FileHeader(headerBuf, true); + } else { + header = new FileHeader(headerBuf, false); } - - /** - * Allocates a page of the indicated type. Returns recid of the - * page. - */ - long allocate(short type) throws IOException { - - if (type == Magic.FREE_PAGE) - throw new Error("allocate of free page?"); - - // do we have something on the free list? - long retval = header.getFirstOf(Magic.FREE_PAGE); - boolean isNew = false; - if (retval != 0) { - // yes. Point to it and make the next of that page the - // new first free page. - header.setFirstOf(Magic.FREE_PAGE, getNext(retval)); - } - else { - // nope. make a new record - retval = header.getLastOf(Magic.FREE_PAGE); - if (retval == 0) - // very new file - allocate record #1 - retval = 1; - header.setLastOf(Magic.FREE_PAGE, retval + 1); - isNew = true; - } - - // Cool. We have a record, add it to the correct list - BlockIo buf = file.get(retval); - PageHeader pageHdr = isNew ? new PageHeader(buf, type) - : PageHeader.getView(buf); - long oldLast = header.getLastOf(type); - - // Clean data. - System.arraycopy(RecordFile.cleanData, 0, - buf.getData(), 0, - RecordFile.BLOCK_SIZE); - pageHdr.setType(type); - pageHdr.setPrev(oldLast); - pageHdr.setNext(0); - - - if (oldLast == 0) - // This was the first one of this type - header.setFirstOf(type, retval); - header.setLastOf(type, retval); - file.release(retval, true); - - // If there's a previous, fix up its pointer - if (oldLast != 0) { - buf = file.get(oldLast); - pageHdr = PageHeader.getView(buf); - pageHdr.setNext(retval); - file.release(oldLast, true); - } - - // remove the view, we have modified the type. - buf.setView(null); - - return retval; + } + + /** + * Allocates a page of the indicated type. Returns recid of the page. + */ + long allocate(short type) throws IOException { + + if (type == Magic.FREE_PAGE) { + throw new Error("allocate of free page?"); } - - /** - * Frees a page of the indicated type. - */ - void free(short type, long recid) throws IOException { - if (type == Magic.FREE_PAGE) - throw new Error("free free page?"); - if (recid == 0) - throw new Error("free header page?"); - - // get the page and read next and previous pointers - BlockIo buf = file.get(recid); - PageHeader pageHdr = PageHeader.getView(buf); - long prev = pageHdr.getPrev(); - long next = pageHdr.getNext(); - - // put the page at the front of the free list. - pageHdr.setType(Magic.FREE_PAGE); - pageHdr.setNext(header.getFirstOf(Magic.FREE_PAGE)); - pageHdr.setPrev(0); - - header.setFirstOf(Magic.FREE_PAGE, recid); - file.release(recid, true); - - // remove the page from its old list - if (prev != 0) { - buf = file.get(prev); - pageHdr = PageHeader.getView(buf); - pageHdr.setNext(next); - file.release(prev, true); - } - else { - header.setFirstOf(type, next); - } - if (next != 0) { - buf = file.get(next); - pageHdr = PageHeader.getView(buf); - pageHdr.setPrev(prev); - file.release(next, true); - } - else { - header.setLastOf(type, prev); - } - + + // do we have something on the free list? + long retval = header.getFirstOf(Magic.FREE_PAGE); + boolean isNew = false; + if (retval != 0) { + // yes. Point to it and make the next of that page the + // new first free page. + header.setFirstOf(Magic.FREE_PAGE, getNext(retval)); + } else { + // nope. make a new record + retval = header.getLastOf(Magic.FREE_PAGE); + if (retval == 0) { + // very new file - allocate record #1 + retval = 1; + } + header.setLastOf(Magic.FREE_PAGE, retval + 1); + isNew = true; } - - - /** - * Returns the page following the indicated block - */ - long getNext(long block) throws IOException { - try { - return PageHeader.getView(file.get(block)).getNext(); - } finally { - file.release(block, false); - } + + // Cool. We have a record, add it to the correct list + BlockIo buf = file.get(retval); + PageHeader pageHdr = isNew ? new PageHeader(buf, type) : PageHeader + .getView(buf); + long oldLast = header.getLastOf(type); + + // Clean data. + System.arraycopy(RecordFile.cleanData, 0, buf.getData(), 0, + RecordFile.BLOCK_SIZE); + pageHdr.setType(type); + pageHdr.setPrev(oldLast); + pageHdr.setNext(0); + + if (oldLast == 0) { + // This was the first one of this type + header.setFirstOf(type, retval); } - - /** - * Returns the page before the indicated block - */ - long getPrev(long block) throws IOException { - try { - return PageHeader.getView(file.get(block)).getPrev(); - } finally { - file.release(block, false); - } + header.setLastOf(type, retval); + file.release(retval, true); + + // If there's a previous, fix up its pointer + if (oldLast != 0) { + buf = file.get(oldLast); + pageHdr = PageHeader.getView(buf); + pageHdr.setNext(retval); + file.release(oldLast, true); } - - /** - * Returns the first page on the indicated list. - */ - long getFirst(short type) throws IOException { - return header.getFirstOf(type); + + // remove the view, we have modified the type. + buf.setView(null); + + return retval; + } + + /** + * Frees a page of the indicated type. + */ + void free(short type, long recid) throws IOException { + if (type == Magic.FREE_PAGE) { + throw new Error("free free page?"); } + if (recid == 0) { + throw new Error("free header page?"); + } - /** - * Returns the last page on the indicated list. - */ - long getLast(short type) throws IOException { - return header.getLastOf(type); + // get the page and read next and previous pointers + BlockIo buf = file.get(recid); + PageHeader pageHdr = PageHeader.getView(buf); + long prev = pageHdr.getPrev(); + long next = pageHdr.getNext(); + + // put the page at the front of the free list. + pageHdr.setType(Magic.FREE_PAGE); + pageHdr.setNext(header.getFirstOf(Magic.FREE_PAGE)); + pageHdr.setPrev(0); + + header.setFirstOf(Magic.FREE_PAGE, recid); + file.release(recid, true); + + // remove the page from its old list + if (prev != 0) { + buf = file.get(prev); + pageHdr = PageHeader.getView(buf); + pageHdr.setNext(next); + file.release(prev, true); + } else { + header.setFirstOf(type, next); } - - - /** - * Commit all pending (in-memory) data by flushing the page manager. - * This forces a flush of all outstanding blocks (this it's an implicit - * {@link RecordFile#commit} as well). - */ - void commit() throws IOException { - // write the header out - file.release(headerBuf); - file.commit(); + if (next != 0) { + buf = file.get(next); + pageHdr = PageHeader.getView(buf); + pageHdr.setPrev(prev); + file.release(next, true); + } else { + header.setLastOf(type, prev); + } - // and obtain it again - headerBuf = file.get(0); - header = new FileHeader(headerBuf, false); + } + + /** + * Returns the page following the indicated block + */ + long getNext(long block) throws IOException { + try { + return PageHeader.getView(file.get(block)).getNext(); + } finally { + file.release(block, false); } + } - /** - * Flushes the page manager. This forces a flush of all outstanding - * blocks (this it's an implicit {@link RecordFile#commit} as well). - */ - void rollback() throws IOException { - // release header - file.discard(headerBuf); - file.rollback(); - // and obtain it again - headerBuf = file.get(0); - if (headerBuf.readShort(0) == 0) - header = new FileHeader(headerBuf, true); - else - header = new FileHeader(headerBuf, false); + /** + * Returns the page before the indicated block + */ + long getPrev(long block) throws IOException { + try { + return PageHeader.getView(file.get(block)).getPrev(); + } finally { + file.release(block, false); } - - /** - * Closes the page manager. This flushes the page manager and releases - * the lock on the header. - */ - void close() throws IOException { - file.release(headerBuf); - file.commit(); - headerBuf = null; - header = null; - file = null; + } + + /** + * Returns the first page on the indicated list. + */ + long getFirst(short type) throws IOException { + return header.getFirstOf(type); + } + + /** + * Returns the last page on the indicated list. + */ + long getLast(short type) throws IOException { + return header.getLastOf(type); + } + + /** + * Commit all pending (in-memory) data by flushing the page manager. This + * forces a flush of all outstanding blocks (this it's an implicit + * {@link RecordFile#commit} as well). + */ + void commit() throws IOException { + // write the header out + file.release(headerBuf); + file.commit(); + + // and obtain it again + headerBuf = file.get(0); + header = new FileHeader(headerBuf, false); + } + + /** + * Flushes the page manager. This forces a flush of all outstanding blocks + * (this it's an implicit {@link RecordFile#commit} as well). + */ + void rollback() throws IOException { + // release header + file.discard(headerBuf); + file.rollback(); + // and obtain it again + headerBuf = file.get(0); + if (headerBuf.readShort(0) == 0) { + header = new FileHeader(headerBuf, true); + } else { + header = new FileHeader(headerBuf, false); } - - /** - * Returns the file header. - */ - FileHeader getFileHeader() { - return header; - } - + } + + /** + * Closes the page manager. This flushes the page manager and releases the + * lock on the header. + */ + void close() throws IOException { + file.release(headerBuf); + file.commit(); + headerBuf = null; + header = null; + file = null; + } + + /** + * Returns the file header. + */ + FileHeader getFileHeader() { + return header; + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/Location.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/Location.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/Location.java (working copy) @@ -66,74 +66,76 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; /** - * This class represents a location within a file. Both physical and - * logical rowids are based on locations internally - this version is - * used when there is no file block to back the location's data. + * This class represents a location within a file. Both physical and logical + * rowids are based on locations internally - this version is used when there is + * no file block to back the location's data. */ final class Location { - private long block; - private short offset; + private final long block; + private final short offset; - /** - * Creates a location from a (block, offset) tuple. - */ - Location(long block, short offset) { - this.block = block; - this.offset = offset; - } + /** + * Creates a location from a (block, offset) tuple. + */ + Location(long block, short offset) { + this.block = block; + this.offset = offset; + } - /** - * Creates a location from a combined block/offset long, as - * used in the external representation of logical rowids. - * - * @see #toLong() - */ - Location(long blockOffset) { - this.offset = (short) (blockOffset & 0xffff); - this.block = blockOffset >> 16; - } + /** + * Creates a location from a combined block/offset long, as used in the + * external representation of logical rowids. + * + * @see #toLong() + */ + Location(long blockOffset) { + offset = (short) (blockOffset & 0xffff); + block = blockOffset >> 16; + } - /** - * Creates a location based on the data of the physical rowid. - */ - Location(PhysicalRowId src) { - block = src.getBlock(); - offset = src.getOffset(); - } + /** + * Creates a location based on the data of the physical rowid. + */ + Location(PhysicalRowId src) { + block = src.getBlock(); + offset = src.getOffset(); + } - /** - * Returns the file block of the location - */ - long getBlock() { - return block; - } + /** + * Returns the file block of the location + */ + long getBlock() { + return block; + } - /** - * Returns the offset within the block of the location - */ - short getOffset() { - return offset; - } + /** + * Returns the offset within the block of the location + */ + short getOffset() { + return offset; + } - /** - * Returns the external representation of a location when used - * as a logical rowid, which combines the block and the offset - * in a single long. - */ - long toLong() { - return (block << 16) + (long) offset; - } + /** + * Returns the external representation of a location when used as a logical + * rowid, which combines the block and the offset in a single long. + */ + long toLong() { + return (block << 16) + offset; + } - // overrides of java.lang.Object + // overrides of java.lang.Object - public boolean equals(Object o) { - if (o == null || !(o instanceof Location)) - return false; - Location ol = (Location) o; - return ol.block == block && ol.offset == offset; + @Override + public boolean equals(Object o) { + if (o == null || !(o instanceof Location)) { + return false; } + Location ol = (Location) o; + return ol.block == block && ol.offset == offset; + } - public String toString() { - return "PL(" + block + ":" + offset + ")"; - } + @Override + public String toString() { + return "PL(" + block + ":" + offset + ")"; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FreeLogicalRowIdPageManager.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FreeLogicalRowIdPageManager.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FreeLogicalRowIdPageManager.java (working copy) @@ -68,94 +68,89 @@ import java.io.IOException; /** - * This class manages free Logical rowid pages and provides methods - * to free and allocate Logical rowids on a high level. + * This class manages free Logical rowid pages and provides methods to free and + * allocate Logical rowids on a high level. */ final class FreeLogicalRowIdPageManager { - // our record file - private RecordFile file; - // our page manager - private PageManager pageman; + // our record file + private final RecordFile file; + // our page manager + private final PageManager pageman; - /** - * Creates a new instance using the indicated record file and - * page manager. - */ - FreeLogicalRowIdPageManager(RecordFile file, - PageManager pageman) throws IOException { - this.file = file; - this.pageman = pageman; - } + /** + * Creates a new instance using the indicated record file and page manager. + */ + FreeLogicalRowIdPageManager(RecordFile file, PageManager pageman) + throws IOException { + this.file = file; + this.pageman = pageman; + } - /** - * Returns a free Logical rowid, or - * null if nothing was found. - */ - Location get() throws IOException { - - // Loop through the free Logical rowid list until we find - // the first rowid. - Location retval = null; - PageCursor curs = new PageCursor(pageman, Magic.FREELOGIDS_PAGE); - while (curs.next() != 0) { - FreeLogicalRowIdPage fp = FreeLogicalRowIdPage - .getFreeLogicalRowIdPageView(file.get(curs.getCurrent())); - int slot = fp.getFirstAllocated(); - if (slot != -1) { - // got one! - retval = - new Location(fp.get(slot)); - fp.free(slot); - if (fp.getCount() == 0) { - // page became empty - free it - file.release(curs.getCurrent(), false); - pageman.free(Magic.FREELOGIDS_PAGE, curs.getCurrent()); - } - else - file.release(curs.getCurrent(), true); - - return retval; - } - else { - // no luck, go to next page - file.release(curs.getCurrent(), false); - } + /** + * Returns a free Logical rowid, or null if nothing was found. + */ + Location get() throws IOException { + + // Loop through the free Logical rowid list until we find + // the first rowid. + Location retval = null; + PageCursor curs = new PageCursor(pageman, Magic.FREELOGIDS_PAGE); + while (curs.next() != 0) { + FreeLogicalRowIdPage fp = FreeLogicalRowIdPage + .getFreeLogicalRowIdPageView(file.get(curs.getCurrent())); + int slot = fp.getFirstAllocated(); + if (slot != -1) { + // got one! + retval = new Location(fp.get(slot)); + fp.free(slot); + if (fp.getCount() == 0) { + // page became empty - free it + file.release(curs.getCurrent(), false); + pageman.free(Magic.FREELOGIDS_PAGE, curs.getCurrent()); + } else { + file.release(curs.getCurrent(), true); } - return null; + + return retval; + } else { + // no luck, go to next page + file.release(curs.getCurrent(), false); + } } + return null; + } - /** - * Puts the indicated rowid on the free list - */ - void put(Location rowid) - throws IOException { - - PhysicalRowId free = null; - PageCursor curs = new PageCursor(pageman, Magic.FREELOGIDS_PAGE); - long freePage = 0; - while (curs.next() != 0) { - freePage = curs.getCurrent(); - BlockIo curBlock = file.get(freePage); - FreeLogicalRowIdPage fp = FreeLogicalRowIdPage - .getFreeLogicalRowIdPageView(curBlock); - int slot = fp.getFirstFree(); - if (slot != -1) { - free = fp.alloc(slot); - break; - } - - file.release(curBlock); - } - if (free == null) { - // No more space on the free list, add a page. - freePage = pageman.allocate(Magic.FREELOGIDS_PAGE); - BlockIo curBlock = file.get(freePage); - FreeLogicalRowIdPage fp = - FreeLogicalRowIdPage.getFreeLogicalRowIdPageView(curBlock); - free = fp.alloc(0); - } - free.setBlock(rowid.getBlock()); - free.setOffset(rowid.getOffset()); - file.release(freePage, true); + /** + * Puts the indicated rowid on the free list + */ + void put(Location rowid) throws IOException { + + PhysicalRowId free = null; + PageCursor curs = new PageCursor(pageman, Magic.FREELOGIDS_PAGE); + long freePage = 0; + while (curs.next() != 0) { + freePage = curs.getCurrent(); + BlockIo curBlock = file.get(freePage); + FreeLogicalRowIdPage fp = FreeLogicalRowIdPage + .getFreeLogicalRowIdPageView(curBlock); + int slot = fp.getFirstFree(); + if (slot != -1) { + free = fp.alloc(slot); + break; + } + + file.release(curBlock); } + if (free == null) { + // No more space on the free list, add a page. + freePage = pageman.allocate(Magic.FREELOGIDS_PAGE); + BlockIo curBlock = file.get(freePage); + FreeLogicalRowIdPage fp = FreeLogicalRowIdPage + .getFreeLogicalRowIdPageView(curBlock); + free = fp.alloc(0); + } + free.setBlock(rowid.getBlock()); + free.setOffset(rowid.getOffset()); + file.release(freePage, true); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/LogicalRowIdManager.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/LogicalRowIdManager.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/LogicalRowIdManager.java (working copy) @@ -68,96 +68,93 @@ import java.io.IOException; /** - * This class manages the linked lists of logical rowid pages. + * This class manages the linked lists of logical rowid pages. */ final class LogicalRowIdManager { - // our record file and associated page manager - private RecordFile file; - private PageManager pageman; - private FreeLogicalRowIdPageManager freeman; + // our record file and associated page manager + private final RecordFile file; + private final PageManager pageman; + private final FreeLogicalRowIdPageManager freeman; - /** - * Creates a log rowid manager using the indicated record file and - * page manager - */ - LogicalRowIdManager(RecordFile file, PageManager pageman) - throws IOException { - this.file = file; - this.pageman = pageman; - this.freeman = new FreeLogicalRowIdPageManager(file, pageman); + /** + * Creates a log rowid manager using the indicated record file and page + * manager + */ + LogicalRowIdManager(RecordFile file, PageManager pageman) throws IOException { + this.file = file; + this.pageman = pageman; + freeman = new FreeLogicalRowIdPageManager(file, pageman); - } + } - /** - * Creates a new logical rowid pointing to the indicated physical - * id - */ - Location insert(Location loc) - throws IOException { - // check whether there's a free rowid to reuse - Location retval = freeman.get(); - if (retval == null) { + /** + * Creates a new logical rowid pointing to the indicated physical id + */ + Location insert(Location loc) throws IOException { + // check whether there's a free rowid to reuse + Location retval = freeman.get(); + if (retval == null) { // no. This means that we bootstrap things by allocating // a new translation page and freeing all the rowids on it. long firstPage = pageman.allocate(Magic.TRANSLATION_PAGE); short curOffset = TranslationPage.O_TRANS; for (int i = 0; i < TranslationPage.ELEMS_PER_PAGE; i++) { - freeman.put(new Location(firstPage, curOffset)); - curOffset += PhysicalRowId.SIZE; + freeman.put(new Location(firstPage, curOffset)); + curOffset += PhysicalRowId.SIZE; } retval = freeman.get(); if (retval == null) { - throw new Error("couldn't obtain free translation"); + throw new Error("couldn't obtain free translation"); } - } - // write the translation. - update(retval, loc); - return retval; } + // write the translation. + update(retval, loc); + return retval; + } - /** - * Releases the indicated logical rowid. - */ - void delete(Location rowid) - throws IOException { + /** + * Releases the indicated logical rowid. + */ + void delete(Location rowid) throws IOException { - freeman.put(rowid); - } + freeman.put(rowid); + } - /** - * Updates the mapping - * - * @param rowid The logical rowid - * @param loc The physical rowid - */ - void update(Location rowid, Location loc) - throws IOException { + /** + * Updates the mapping + * + * @param rowid + * The logical rowid + * @param loc + * The physical rowid + */ + void update(Location rowid, Location loc) throws IOException { - TranslationPage xlatPage = TranslationPage.getTranslationPageView( - file.get(rowid.getBlock())); - PhysicalRowId physid = xlatPage.get(rowid.getOffset()); - physid.setBlock(loc.getBlock()); - physid.setOffset(loc.getOffset()); - file.release(rowid.getBlock(), true); - } + TranslationPage xlatPage = TranslationPage.getTranslationPageView(file + .get(rowid.getBlock())); + PhysicalRowId physid = xlatPage.get(rowid.getOffset()); + physid.setBlock(loc.getBlock()); + physid.setOffset(loc.getOffset()); + file.release(rowid.getBlock(), true); + } - /** - * Returns a mapping - * - * @param rowid The logical rowid - * @return The physical rowid - */ - Location fetch(Location rowid) - throws IOException { + /** + * Returns a mapping + * + * @param rowid + * The logical rowid + * @return The physical rowid + */ + Location fetch(Location rowid) throws IOException { - TranslationPage xlatPage = TranslationPage.getTranslationPageView( - file.get(rowid.getBlock())); - try { - Location retval = new Location(xlatPage.get(rowid.getOffset())); - return retval; - } finally { - file.release(rowid.getBlock(), false); - } + TranslationPage xlatPage = TranslationPage.getTranslationPageView(file + .get(rowid.getBlock())); + try { + Location retval = new Location(xlatPage.get(rowid.getOffset())); + return retval; + } finally { + file.release(rowid.getBlock(), false); } + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/PageCursor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/PageCursor.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/PageCursor.java (working copy) @@ -68,54 +68,54 @@ import java.io.IOException; /** - * This class provides a cursor that can follow lists of pages - * bi-directionally. + * This class provides a cursor that can follow lists of pages bi-directionally. */ final class PageCursor { - PageManager pageman; - long current; - short type; - - /** - * Constructs a page cursor that starts at the indicated block. - */ - PageCursor(PageManager pageman, long current) { - this.pageman = pageman; - this.current = current; + PageManager pageman; + long current; + short type; + + /** + * Constructs a page cursor that starts at the indicated block. + */ + PageCursor(PageManager pageman, long current) { + this.pageman = pageman; + this.current = current; + } + + /** + * Constructs a page cursor that starts at the first block of the indicated + * list. + */ + PageCursor(PageManager pageman, short type) throws IOException { + this.pageman = pageman; + this.type = type; + } + + /** + * Returns the current value of the cursor. + */ + long getCurrent() throws IOException { + return current; + } + + /** + * Returns the next value of the cursor + */ + long next() throws IOException { + if (current == 0) { + current = pageman.getFirst(type); + } else { + current = pageman.getNext(current); } - - /** - * Constructs a page cursor that starts at the first block - * of the indicated list. - */ - PageCursor(PageManager pageman, short type) throws IOException { - this.pageman = pageman; - this.type = type; - } - - /** - * Returns the current value of the cursor. - */ - long getCurrent() throws IOException { - return current; - } - - /** - * Returns the next value of the cursor - */ - long next() throws IOException { - if (current == 0) - current = pageman.getFirst(type); - else - current = pageman.getNext(current); - return current; - } - - /** - * Returns the previous value of the cursor - */ - long prev() throws IOException { - current = pageman.getPrev(current); - return current; - } + return current; + } + + /** + * Returns the previous value of the cursor + */ + long prev() throws IOException { + current = pageman.getPrev(current); + return current; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FreePhysicalRowIdPageManager.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FreePhysicalRowIdPageManager.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FreePhysicalRowIdPageManager.java (working copy) @@ -68,103 +68,96 @@ import java.io.IOException; /** - * This class manages free physical rowid pages and provides methods - * to free and allocate physical rowids on a high level. + * This class manages free physical rowid pages and provides methods to free and + * allocate physical rowids on a high level. */ -final class FreePhysicalRowIdPageManager -{ - // our record file - protected RecordFile _file; +final class FreePhysicalRowIdPageManager { + // our record file + protected RecordFile _file; - // our page manager - protected PageManager _pageman; + // our page manager + protected PageManager _pageman; - /** - * Creates a new instance using the indicated record file and - * page manager. - */ - FreePhysicalRowIdPageManager( RecordFile file, PageManager pageman ) - throws IOException - { - _file = file; - _pageman = pageman; - } + /** + * Creates a new instance using the indicated record file and page manager. + */ + FreePhysicalRowIdPageManager(RecordFile file, PageManager pageman) + throws IOException { + _file = file; + _pageman = pageman; + } + /** + * Returns a free physical rowid of the indicated size, or null if nothing was + * found. + */ + Location get(int size) throws IOException { + // Loop through the free physical rowid list until we find + // a rowid that's large enough. + Location retval = null; + PageCursor curs = new PageCursor(_pageman, Magic.FREEPHYSIDS_PAGE); - /** - * Returns a free physical rowid of the indicated size, or - * null if nothing was found. - */ - Location get( int size ) - throws IOException - { - // Loop through the free physical rowid list until we find - // a rowid that's large enough. - Location retval = null; - PageCursor curs = new PageCursor( _pageman, Magic.FREEPHYSIDS_PAGE ); + while (curs.next() != 0) { + FreePhysicalRowIdPage fp = FreePhysicalRowIdPage + .getFreePhysicalRowIdPageView(_file.get(curs.getCurrent())); + int slot = fp.getFirstLargerThan(size); + if (slot != -1) { + // got one! + retval = new Location(fp.get(slot)); - while (curs.next() != 0) { - FreePhysicalRowIdPage fp = FreePhysicalRowIdPage - .getFreePhysicalRowIdPageView( _file.get( curs.getCurrent() ) ); - int slot = fp.getFirstLargerThan( size ); - if ( slot != -1 ) { - // got one! - retval = new Location( fp.get( slot ) ); + fp.get(slot).getSize(); + fp.free(slot); + if (fp.getCount() == 0) { + // page became empty - free it + _file.release(curs.getCurrent(), false); + _pageman.free(Magic.FREEPHYSIDS_PAGE, curs.getCurrent()); + } else { + _file.release(curs.getCurrent(), true); + } - int slotsize = fp.get( slot ).getSize(); - fp.free( slot ); - if ( fp.getCount() == 0 ) { - // page became empty - free it - _file.release( curs.getCurrent(), false ); - _pageman.free( Magic.FREEPHYSIDS_PAGE, curs.getCurrent() ); - } else { - _file.release( curs.getCurrent(), true ); - } + return retval; + } else { + // no luck, go to next page + _file.release(curs.getCurrent(), false); + } - return retval; - } else { - // no luck, go to next page - _file.release( curs.getCurrent(), false ); - } - - } - return null; } + return null; + } - /** - * Puts the indicated rowid on the free list - */ - void put(Location rowid, int size) - throws IOException { + /** + * Puts the indicated rowid on the free list + */ + void put(Location rowid, int size) throws IOException { - FreePhysicalRowId free = null; - PageCursor curs = new PageCursor(_pageman, Magic.FREEPHYSIDS_PAGE); - long freePage = 0; - while (curs.next() != 0) { + FreePhysicalRowId free = null; + PageCursor curs = new PageCursor(_pageman, Magic.FREEPHYSIDS_PAGE); + long freePage = 0; + while (curs.next() != 0) { freePage = curs.getCurrent(); BlockIo curBlock = _file.get(freePage); FreePhysicalRowIdPage fp = FreePhysicalRowIdPage - .getFreePhysicalRowIdPageView(curBlock); + .getFreePhysicalRowIdPageView(curBlock); int slot = fp.getFirstFree(); if (slot != -1) { - free = fp.alloc(slot); - break; + free = fp.alloc(slot); + break; } _file.release(curBlock); - } - if (free == null) { + } + if (free == null) { // No more space on the free list, add a page. freePage = _pageman.allocate(Magic.FREEPHYSIDS_PAGE); BlockIo curBlock = _file.get(freePage); - FreePhysicalRowIdPage fp = - FreePhysicalRowIdPage.getFreePhysicalRowIdPageView(curBlock); + FreePhysicalRowIdPage fp = FreePhysicalRowIdPage + .getFreePhysicalRowIdPageView(curBlock); free = fp.alloc(0); - } - - free.setBlock(rowid.getBlock()); - free.setOffset(rowid.getOffset()); - free.setSize(size); - _file.release(freePage, true); } + + free.setBlock(rowid.getBlock()); + free.setOffset(rowid.getOffset()); + free.setSize(size); + _file.release(freePage, true); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/PhysicalRowIdManager.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/PhysicalRowIdManager.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/PhysicalRowIdManager.java (working copy) @@ -68,289 +68,267 @@ import java.io.IOException; /** - * This class manages physical row ids, and their data. + * This class manages physical row ids, and their data. */ -final class PhysicalRowIdManager -{ +final class PhysicalRowIdManager { - // The file we're talking to and the associated page manager. - private RecordFile file; - private PageManager pageman; - private FreePhysicalRowIdPageManager freeman; + // The file we're talking to and the associated page manager. + private final RecordFile file; + private final PageManager pageman; + private final FreePhysicalRowIdPageManager freeman; - /** - * Creates a new rowid manager using the indicated record file. - * and page manager. - */ - PhysicalRowIdManager( RecordFile file, PageManager pageManager ) - throws IOException - { - this.file = file; - this.pageman = pageManager; - this.freeman = new FreePhysicalRowIdPageManager(file, pageman); - } + /** + * Creates a new rowid manager using the indicated record file. and page + * manager. + */ + PhysicalRowIdManager(RecordFile file, PageManager pageManager) + throws IOException { + this.file = file; + pageman = pageManager; + freeman = new FreePhysicalRowIdPageManager(file, pageman); + } - /** - * Inserts a new record. Returns the new physical rowid. - */ - Location insert( byte[] data, int start, int length ) - throws IOException - { - Location retval = alloc( length ); - write( retval, data, start, length ); - return retval; + /** + * Inserts a new record. Returns the new physical rowid. + */ + Location insert(byte[] data, int start, int length) throws IOException { + Location retval = alloc(length); + write(retval, data, start, length); + return retval; + } + + /** + * Updates an existing record. Returns the possibly changed physical rowid. + */ + Location update(Location rowid, byte[] data, int start, int length) + throws IOException { + // fetch the record header + BlockIo block = file.get(rowid.getBlock()); + RecordHeader head = new RecordHeader(block, rowid.getOffset()); + if (length > head.getAvailableSize()) { + // not enough space - we need to copy to a new rowid. + file.release(block); + free(rowid); + rowid = alloc(length); + } else { + file.release(block); } - /** - * Updates an existing record. Returns the possibly changed - * physical rowid. - */ - Location update( Location rowid, byte[] data, int start, int length ) - throws IOException - { - // fetch the record header - BlockIo block = file.get( rowid.getBlock() ); - RecordHeader head = new RecordHeader( block, rowid.getOffset() ); - if ( length > head.getAvailableSize() ) { - // not enough space - we need to copy to a new rowid. - file.release( block ); - free( rowid ); - rowid = alloc( length ); - } else { - file.release( block ); - } + // 'nuff space, write it in and return the rowid. + write(rowid, data, start, length); + return rowid; + } - // 'nuff space, write it in and return the rowid. - write( rowid, data, start, length ); - return rowid; - } + /** + * Deletes a record. + */ + void delete(Location rowid) throws IOException { + free(rowid); + } - /** - * Deletes a record. - */ - void delete( Location rowid ) - throws IOException - { - free( rowid ); + /** + * Retrieves a record. + */ + byte[] fetch(Location rowid) throws IOException { + // fetch the record header + PageCursor curs = new PageCursor(pageman, rowid.getBlock()); + BlockIo block = file.get(curs.getCurrent()); + RecordHeader head = new RecordHeader(block, rowid.getOffset()); + + // allocate a return buffer + byte[] retval = new byte[head.getCurrentSize()]; + if (retval.length == 0) { + file.release(curs.getCurrent(), false); + return retval; } - /** - * Retrieves a record. - */ - byte[] fetch( Location rowid ) - throws IOException - { - // fetch the record header - PageCursor curs = new PageCursor( pageman, rowid.getBlock() ); - BlockIo block = file.get( curs.getCurrent() ); - RecordHeader head = new RecordHeader( block, rowid.getOffset() ); + // copy bytes in + int offsetInBuffer = 0; + int leftToRead = retval.length; + short dataOffset = (short) (rowid.getOffset() + RecordHeader.SIZE); + while (leftToRead > 0) { + // copy current page's data to return buffer + int toCopy = RecordFile.BLOCK_SIZE - dataOffset; + if (leftToRead < toCopy) { + toCopy = leftToRead; + } + System.arraycopy(block.getData(), dataOffset, retval, offsetInBuffer, + toCopy); - // allocate a return buffer - byte[] retval = new byte[ head.getCurrentSize() ]; - if ( retval.length == 0 ) { - file.release( curs.getCurrent(), false ); - return retval; - } + // Go to the next block + leftToRead -= toCopy; + offsetInBuffer += toCopy; - // copy bytes in - int offsetInBuffer = 0; - int leftToRead = retval.length; - short dataOffset = (short) (rowid.getOffset() + RecordHeader.SIZE); - while ( leftToRead > 0 ) { - // copy current page's data to return buffer - int toCopy = RecordFile.BLOCK_SIZE - dataOffset; - if ( leftToRead < toCopy ) { - toCopy = leftToRead; - } - System.arraycopy( block.getData(), dataOffset, - retval, offsetInBuffer, - toCopy ); + file.release(block); - // Go to the next block - leftToRead -= toCopy; - offsetInBuffer += toCopy; + if (leftToRead > 0) { + block = file.get(curs.next()); + dataOffset = DataPage.O_DATA; + } - file.release( block ); + } - if ( leftToRead > 0 ) { - block = file.get( curs.next() ); - dataOffset = DataPage.O_DATA; - } + return retval; + } - } + /** + * Allocate a new rowid with the indicated size. + */ + private Location alloc(int size) throws IOException { + Location retval = freeman.get(size); + if (retval == null) { + retval = allocNew(size, pageman.getLast(Magic.USED_PAGE)); + } + return retval; + } - return retval; + /** + * Allocates a new rowid. The second parameter is there to allow for a + * recursive call - it indicates where the search should start. + */ + private Location allocNew(int size, long start) throws IOException { + BlockIo curBlock; + DataPage curPage; + if (start == 0) { + // we need to create a new page. + start = pageman.allocate(Magic.USED_PAGE); + curBlock = file.get(start); + curPage = DataPage.getDataPageView(curBlock); + curPage.setFirst(DataPage.O_DATA); + RecordHeader hdr = new RecordHeader(curBlock, DataPage.O_DATA); + hdr.setAvailableSize(0); + hdr.setCurrentSize(0); + } else { + curBlock = file.get(start); + curPage = DataPage.getDataPageView(curBlock); } - /** - * Allocate a new rowid with the indicated size. - */ - private Location alloc( int size ) - throws IOException - { - Location retval = freeman.get( size ); - if ( retval == null ) { - retval = allocNew( size, pageman.getLast( Magic.USED_PAGE ) ); - } - return retval; + // follow the rowids on this page to get to the last one. We don't + // fall off, because this is the last page, remember? + short pos = curPage.getFirst(); + if (pos == 0) { + // page is exactly filled by the last block of a record + file.release(curBlock); + return allocNew(size, 0); } - /** - * Allocates a new rowid. The second parameter is there to - * allow for a recursive call - it indicates where the search - * should start. - */ - private Location allocNew( int size, long start ) - throws IOException - { - BlockIo curBlock; - DataPage curPage; - if ( start == 0 ) { - // we need to create a new page. - start = pageman.allocate( Magic.USED_PAGE ); - curBlock = file.get( start ); - curPage = DataPage.getDataPageView( curBlock ); - curPage.setFirst( DataPage.O_DATA ); - RecordHeader hdr = new RecordHeader( curBlock, DataPage.O_DATA ); - hdr.setAvailableSize( 0 ); - hdr.setCurrentSize( 0 ); - } else { - curBlock = file.get( start ); - curPage = DataPage.getDataPageView( curBlock ); - } + RecordHeader hdr = new RecordHeader(curBlock, pos); + while (hdr.getAvailableSize() != 0 && pos < RecordFile.BLOCK_SIZE) { + pos += hdr.getAvailableSize() + RecordHeader.SIZE; + if (pos == RecordFile.BLOCK_SIZE) { + // Again, a filled page. + file.release(curBlock); + return allocNew(size, 0); + } - // follow the rowids on this page to get to the last one. We don't - // fall off, because this is the last page, remember? - short pos = curPage.getFirst(); - if ( pos == 0 ) { - // page is exactly filled by the last block of a record - file.release( curBlock ); - return allocNew( size, 0 ); - } + hdr = new RecordHeader(curBlock, pos); + } - RecordHeader hdr = new RecordHeader( curBlock, pos ); - while ( hdr.getAvailableSize() != 0 && pos < RecordFile.BLOCK_SIZE ) { - pos += hdr.getAvailableSize() + RecordHeader.SIZE; - if ( pos == RecordFile.BLOCK_SIZE ) { - // Again, a filled page. - file.release( curBlock ); - return allocNew( size, 0 ); - } + if (pos == RecordHeader.SIZE) { + // the last record exactly filled the page. Restart forcing + // a new page. + file.release(curBlock); + } - hdr = new RecordHeader( curBlock, pos ); - } + // we have the position, now tack on extra pages until we've got + // enough space. + Location retval = new Location(start, pos); + int freeHere = RecordFile.BLOCK_SIZE - pos - RecordHeader.SIZE; + if (freeHere < size) { + // check whether the last page would have only a small bit left. + // if yes, increase the allocation. A small bit is a record + // header plus 16 bytes. + int lastSize = (size - freeHere) % DataPage.DATA_PER_PAGE; + if ((DataPage.DATA_PER_PAGE - lastSize) < (RecordHeader.SIZE + 16)) { + size += (DataPage.DATA_PER_PAGE - lastSize); + } - if ( pos == RecordHeader.SIZE ) { - // the last record exactly filled the page. Restart forcing - // a new page. - file.release( curBlock ); - } + // write out the header now so we don't have to come back. + hdr.setAvailableSize(size); + file.release(start, true); - // we have the position, now tack on extra pages until we've got - // enough space. - Location retval = new Location( start, pos ); - int freeHere = RecordFile.BLOCK_SIZE - pos - RecordHeader.SIZE; - if ( freeHere < size ) { - // check whether the last page would have only a small bit left. - // if yes, increase the allocation. A small bit is a record - // header plus 16 bytes. - int lastSize = (size - freeHere) % DataPage.DATA_PER_PAGE; - if (( DataPage.DATA_PER_PAGE - lastSize ) < (RecordHeader.SIZE + 16) ) { - size += (DataPage.DATA_PER_PAGE - lastSize); - } - - // write out the header now so we don't have to come back. - hdr.setAvailableSize( size ); - file.release( start, true ); - - int neededLeft = size - freeHere; - // Refactor these two blocks! - while ( neededLeft >= DataPage.DATA_PER_PAGE ) { - start = pageman.allocate( Magic.USED_PAGE ); - curBlock = file.get( start ); - curPage = DataPage.getDataPageView( curBlock ); - curPage.setFirst( (short) 0 ); // no rowids, just data - file.release( start, true ); - neededLeft -= DataPage.DATA_PER_PAGE; - } - if ( neededLeft > 0 ) { - // done with whole chunks, allocate last fragment. - start = pageman.allocate( Magic.USED_PAGE ); - curBlock = file.get( start ); - curPage = DataPage.getDataPageView( curBlock ); - curPage.setFirst( (short) (DataPage.O_DATA + neededLeft) ); - file.release( start, true ); - } - } else { - // just update the current page. If there's less than 16 bytes - // left, we increase the allocation (16 bytes is an arbitrary - // number). - if ( freeHere - size <= (16 + RecordHeader.SIZE) ) { - size = freeHere; - } - hdr.setAvailableSize( size ); - file.release( start, true ); - } - return retval; - + int neededLeft = size - freeHere; + // Refactor these two blocks! + while (neededLeft >= DataPage.DATA_PER_PAGE) { + start = pageman.allocate(Magic.USED_PAGE); + curBlock = file.get(start); + curPage = DataPage.getDataPageView(curBlock); + curPage.setFirst((short) 0); // no rowids, just data + file.release(start, true); + neededLeft -= DataPage.DATA_PER_PAGE; + } + if (neededLeft > 0) { + // done with whole chunks, allocate last fragment. + start = pageman.allocate(Magic.USED_PAGE); + curBlock = file.get(start); + curPage = DataPage.getDataPageView(curBlock); + curPage.setFirst((short) (DataPage.O_DATA + neededLeft)); + file.release(start, true); + } + } else { + // just update the current page. If there's less than 16 bytes + // left, we increase the allocation (16 bytes is an arbitrary + // number). + if (freeHere - size <= (16 + RecordHeader.SIZE)) { + size = freeHere; + } + hdr.setAvailableSize(size); + file.release(start, true); } + return retval; + } - private void free( Location id ) - throws IOException - { - // get the rowid, and write a zero current size into it. - BlockIo curBlock = file.get( id.getBlock() ); - DataPage curPage = DataPage.getDataPageView( curBlock ); - RecordHeader hdr = new RecordHeader( curBlock, id.getOffset() ); - hdr.setCurrentSize( 0 ); - file.release( id.getBlock(), true ); + private void free(Location id) throws IOException { + // get the rowid, and write a zero current size into it. + BlockIo curBlock = file.get(id.getBlock()); + DataPage.getDataPageView(curBlock); + RecordHeader hdr = new RecordHeader(curBlock, id.getOffset()); + hdr.setCurrentSize(0); + file.release(id.getBlock(), true); - // write the rowid to the free list - freeman.put( id, hdr.getAvailableSize() ); + // write the rowid to the free list + freeman.put(id, hdr.getAvailableSize()); + } + + /** + * Writes out data to a rowid. Assumes that any resizing has been done. + */ + private void write(Location rowid, byte[] data, int start, int length) + throws IOException { + PageCursor curs = new PageCursor(pageman, rowid.getBlock()); + BlockIo block = file.get(curs.getCurrent()); + RecordHeader hdr = new RecordHeader(block, rowid.getOffset()); + hdr.setCurrentSize(length); + if (length == 0) { + file.release(curs.getCurrent(), true); + return; } - /** - * Writes out data to a rowid. Assumes that any resizing has been - * done. - */ - private void write(Location rowid, byte[] data, int start, int length ) - throws IOException - { - PageCursor curs = new PageCursor( pageman, rowid.getBlock() ); - BlockIo block = file.get( curs.getCurrent() ); - RecordHeader hdr = new RecordHeader( block, rowid.getOffset() ); - hdr.setCurrentSize( length ); - if ( length == 0 ) { - file.release( curs.getCurrent(), true ); - return; - } + // copy bytes in + int offsetInBuffer = start; + int leftToWrite = length; + short dataOffset = (short) (rowid.getOffset() + RecordHeader.SIZE); + while (leftToWrite > 0) { + // copy current page's data to return buffer + int toCopy = RecordFile.BLOCK_SIZE - dataOffset; - // copy bytes in - int offsetInBuffer = start; - int leftToWrite = length; - short dataOffset = (short) (rowid.getOffset() + RecordHeader.SIZE); - while ( leftToWrite > 0 ) { - // copy current page's data to return buffer - int toCopy = RecordFile.BLOCK_SIZE - dataOffset; + if (leftToWrite < toCopy) { + toCopy = leftToWrite; + } + System.arraycopy(data, offsetInBuffer, block.getData(), dataOffset, + toCopy); - if ( leftToWrite < toCopy ) { - toCopy = leftToWrite; - } - System.arraycopy( data, offsetInBuffer, block.getData(), - dataOffset, toCopy ); + // Go to the next block + leftToWrite -= toCopy; + offsetInBuffer += toCopy; - // Go to the next block - leftToWrite -= toCopy; - offsetInBuffer += toCopy; + file.release(curs.getCurrent(), true); - file.release( curs.getCurrent(), true ); - - if ( leftToWrite > 0 ) { - block = file.get( curs.next() ); - dataOffset = DataPage.O_DATA; - } - } + if (leftToWrite > 0) { + block = file.get(curs.next()); + dataOffset = DataPage.O_DATA; + } } + } } - Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/RecordFile.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/RecordFile.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/RecordFile.java (working copy) @@ -65,384 +65,389 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; -import java.io.*; -import java.util.*; +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; /** - * This class represents a random access file as a set of fixed size - * records. Each record has a physical record number, and records are - * cached in order to improve access. + * This class represents a random access file as a set of fixed size records. + * Each record has a physical record number, and records are cached in order to + * improve access. *

- * The set of dirty records on the in-use list constitutes a transaction. - * Later on, we will send these records to some recovery thingy. + * The set of dirty records on the in-use list constitutes a transaction. Later + * on, we will send these records to some recovery thingy. */ public final class RecordFile { - final TransactionManager txnMgr; + final TransactionManager txnMgr; - // Todo: reorganize in hashes and fifos as necessary. - // free -> inUse -> dirty -> inTxn -> free - // free is a cache, thus a FIFO. The rest are hashes. - private final LinkedList free = new LinkedList(); - private final HashMap inUse = new HashMap(); - private final HashMap dirty = new HashMap(); - private final HashMap inTxn = new HashMap(); + // Todo: reorganize in hashes and fifos as necessary. + // free -> inUse -> dirty -> inTxn -> free + // free is a cache, thus a FIFO. The rest are hashes. + private final LinkedList free = new LinkedList(); + private final HashMap inUse = new HashMap(); + private final HashMap dirty = new HashMap(); + private final HashMap inTxn = new HashMap(); - // transactions disabled? - private boolean transactionsDisabled = false; + // transactions disabled? + private boolean transactionsDisabled = false; - /** The length of a single block. */ - public final static int BLOCK_SIZE = 8192;//4096; + /** The length of a single block. */ + public final static int BLOCK_SIZE = 8192;// 4096; - /** The extension of a record file */ - final static String extension = ".db"; + /** The extension of a record file */ + final static String extension = ".db"; - /** A block of clean data to wipe clean pages. */ - final static byte[] cleanData = new byte[BLOCK_SIZE]; + /** A block of clean data to wipe clean pages. */ + final static byte[] cleanData = new byte[BLOCK_SIZE]; - private RandomAccessFile file; - private final String fileName; + private RandomAccessFile file; + private final String fileName; - /** - * Creates a new object on the indicated filename. The file is - * opened in read/write mode. - * - * @param fileName the name of the file to open or create, without - * an extension. - * @throws IOException whenever the creation of the underlying - * RandomAccessFile throws it. - */ - RecordFile(String fileName) throws IOException { - this.fileName = fileName; - file = new RandomAccessFile(fileName + extension, "rw"); - txnMgr = new TransactionManager(this); - } - - /** - * Creates a new object on the indicated filename. The file is - * opened in read/write mode. - * - * @param fileName the name of the file to open or create, without - * an extension. - * @throws IOException whenever the creation of the underlying - * RandomAccessFile throws it. - */ - RecordFile(File file) throws IOException { - this.fileName = file.getName(); - this.file = new RandomAccessFile(file, "rw"); - txnMgr = new TransactionManager(this); - } + /** + * Creates a new object on the indicated filename. The file is opened in + * read/write mode. + * + * @param fileName + * the name of the file to open or create, without an extension. + * @throws IOException + * whenever the creation of the underlying RandomAccessFile throws + * it. + */ + RecordFile(String fileName) throws IOException { + this.fileName = fileName; + file = new RandomAccessFile(fileName + extension, "rw"); + txnMgr = new TransactionManager(this); + } - /** - * Returns the file name. - */ - String getFileName() { - return fileName; - } + /** + * Creates a new object on the indicated filename. The file is opened in + * read/write mode. + * + * @param fileName + * the name of the file to open or create, without an extension. + * @throws IOException + * whenever the creation of the underlying RandomAccessFile throws + * it. + */ + RecordFile(File file) throws IOException { + fileName = file.getName(); + this.file = new RandomAccessFile(file, "rw"); + txnMgr = new TransactionManager(this); + } - /** - * Disables transactions: doesn't sync and doesn't use the - * transaction manager. - */ - void disableTransactions() { - transactionsDisabled = true; - } + /** + * Returns the file name. + */ + String getFileName() { + return fileName; + } - /** - * Gets a block from the file. The returned byte array is - * the in-memory copy of the record, and thus can be written - * (and subsequently released with a dirty flag in order to - * write the block back). - * - * @param blockid The record number to retrieve. - */ - BlockIo get(long blockid) throws IOException { - Long key = new Long(blockid); + /** + * Disables transactions: doesn't sync and doesn't use the transaction + * manager. + */ + void disableTransactions() { + transactionsDisabled = true; + } - // try in transaction list, dirty list, free list - BlockIo node = (BlockIo) inTxn.get(key); - if (node != null) { - inTxn.remove(key); - inUse.put(key, node); - return node; - } - node = (BlockIo) dirty.get(key); - if (node != null) { - dirty.remove(key); - inUse.put(key, node); - return node; - } - for (Iterator i = free.iterator(); i.hasNext(); ) { - BlockIo cur = (BlockIo) i.next(); - if (cur.getBlockId() == blockid) { - node = cur; - i.remove(); - inUse.put(key, node); - return node; - } - } + /** + * Gets a block from the file. The returned byte array is the in-memory copy + * of the record, and thus can be written (and subsequently released with a + * dirty flag in order to write the block back). + * + * @param blockid + * The record number to retrieve. + */ + BlockIo get(long blockid) throws IOException { + Long key = new Long(blockid); - // sanity check: can't be on in use list - if (inUse.get(key) != null) { - throw new Error("double get for block " + blockid); - } + // try in transaction list, dirty list, free list + BlockIo node = (BlockIo) inTxn.get(key); + if (node != null) { + inTxn.remove(key); + inUse.put(key, node); + return node; + } + node = (BlockIo) dirty.get(key); + if (node != null) { + dirty.remove(key); + inUse.put(key, node); + return node; + } + for (Iterator i = free.iterator(); i.hasNext();) { + BlockIo cur = (BlockIo) i.next(); + if (cur.getBlockId() == blockid) { + node = cur; + i.remove(); + inUse.put(key, node); + return node; + } + } - // get a new node and read it from the file - node = getNewNode(blockid); - long offset = blockid * BLOCK_SIZE; - if (file.length() > 0 && offset <= file.length()) { - read(file, offset, node.getData(), BLOCK_SIZE); - } else { - System.arraycopy(cleanData, 0, node.getData(), 0, BLOCK_SIZE); - } - inUse.put(key, node); - node.setClean(); - return node; - } + // sanity check: can't be on in use list + if (inUse.get(key) != null) { + throw new Error("double get for block " + blockid); + } - - /** - * Releases a block. - * - * @param blockid The record number to release. - * @param isDirty If true, the block was modified since the get(). - */ - void release(long blockid, boolean isDirty) - throws IOException { - BlockIo node = (BlockIo) inUse.get(new Long(blockid)); - if (node == null) - throw new IOException("bad blockid " + blockid + " on release"); - if (!node.isDirty() && isDirty) - node.setDirty(); - release(node); + // get a new node and read it from the file + node = getNewNode(blockid); + long offset = blockid * BLOCK_SIZE; + if (file.length() > 0 && offset <= file.length()) { + read(file, offset, node.getData(), BLOCK_SIZE); + } else { + System.arraycopy(cleanData, 0, node.getData(), 0, BLOCK_SIZE); } + inUse.put(key, node); + node.setClean(); + return node; + } - /** - * Releases a block. - * - * @param block The block to release. - */ - void release(BlockIo block) { - Long key = new Long(block.getBlockId()); - inUse.remove(key); - if (block.isDirty()) { - // System.out.println( "Dirty: " + key + block ); - dirty.put(key, block); - } else { - if (!transactionsDisabled && block.isInTransaction()) { - inTxn.put(key, block); - } else { - free.add(block); - } - } + /** + * Releases a block. + * + * @param blockid + * The record number to release. + * @param isDirty + * If true, the block was modified since the get(). + */ + void release(long blockid, boolean isDirty) throws IOException { + BlockIo node = (BlockIo) inUse.get(new Long(blockid)); + if (node == null) { + throw new IOException("bad blockid " + blockid + " on release"); } + if (!node.isDirty() && isDirty) { + node.setDirty(); + } + release(node); + } - /** - * Discards a block (will not write the block even if it's dirty) - * - * @param block The block to discard. - */ - void discard(BlockIo block) { - Long key = new Long(block.getBlockId()); - inUse.remove(key); - - // note: block not added to free list on purpose, because - // it's considered invalid + /** + * Releases a block. + * + * @param block + * The block to release. + */ + void release(BlockIo block) { + Long key = new Long(block.getBlockId()); + inUse.remove(key); + if (block.isDirty()) { + // System.out.println( "Dirty: " + key + block ); + dirty.put(key, block); + } else { + if (!transactionsDisabled && block.isInTransaction()) { + inTxn.put(key, block); + } else { + free.add(block); + } } + } - /** - * Commits the current transaction by flushing all dirty buffers - * to disk. - */ - void commit() throws IOException { - // debugging... - if (!inUse.isEmpty() && inUse.size() > 1) { - showList(inUse.values().iterator()); - throw new Error("in use list not empty at commit time (" - + inUse.size() + ")"); - } + /** + * Discards a block (will not write the block even if it's dirty) + * + * @param block + * The block to discard. + */ + void discard(BlockIo block) { + Long key = new Long(block.getBlockId()); + inUse.remove(key); - // System.out.println("committing..."); + // note: block not added to free list on purpose, because + // it's considered invalid + } - if ( dirty.size() == 0 ) { - // if no dirty blocks, skip commit process - return; - } + /** + * Commits the current transaction by flushing all dirty buffers to disk. + */ + void commit() throws IOException { + // debugging... + if (!inUse.isEmpty() && inUse.size() > 1) { + showList(inUse.values().iterator()); + throw new Error("in use list not empty at commit time (" + inUse.size() + + ")"); + } - if (!transactionsDisabled) { - txnMgr.start(); - } + // System.out.println("committing..."); - for (Iterator i = dirty.values().iterator(); i.hasNext(); ) { - BlockIo node = (BlockIo) i.next(); - i.remove(); - // System.out.println("node " + node + " map size now " + dirty.size()); - if (transactionsDisabled) { - long offset = node.getBlockId() * BLOCK_SIZE; - file.seek(offset); - file.write(node.getData()); - node.setClean(); - free.add(node); - } - else { - txnMgr.add(node); - inTxn.put(new Long(node.getBlockId()), node); - } - } - if (!transactionsDisabled) { - txnMgr.commit(); - } + if (dirty.size() == 0) { + // if no dirty blocks, skip commit process + return; } - /** - * Rollback the current transaction by discarding all dirty buffers - */ - void rollback() throws IOException { - // debugging... - if (!inUse.isEmpty()) { - showList(inUse.values().iterator()); - throw new Error("in use list not empty at rollback time (" - + inUse.size() + ")"); - } - // System.out.println("rollback..."); - dirty.clear(); + if (!transactionsDisabled) { + txnMgr.start(); + } - txnMgr.synchronizeLogFromDisk(); + for (Iterator i = dirty.values().iterator(); i.hasNext();) { + BlockIo node = (BlockIo) i.next(); + i.remove(); + // System.out.println("node " + node + " map size now " + dirty.size()); + if (transactionsDisabled) { + long offset = node.getBlockId() * BLOCK_SIZE; + file.seek(offset); + file.write(node.getData()); + node.setClean(); + free.add(node); + } else { + txnMgr.add(node); + inTxn.put(new Long(node.getBlockId()), node); + } + } + if (!transactionsDisabled) { + txnMgr.commit(); + } + } - if (!inTxn.isEmpty()) { - showList(inTxn.values().iterator()); - throw new Error("in txn list not empty at rollback time (" - + inTxn.size() + ")"); - }; + /** + * Rollback the current transaction by discarding all dirty buffers + */ + void rollback() throws IOException { + // debugging... + if (!inUse.isEmpty()) { + showList(inUse.values().iterator()); + throw new Error("in use list not empty at rollback time (" + inUse.size() + + ")"); } + // System.out.println("rollback..."); + dirty.clear(); - /** - * Commits and closes file. - */ - void close() throws IOException { - if (!dirty.isEmpty()) { - commit(); - } - txnMgr.shutdown(); - if ( transactionsDisabled ) { - txnMgr.removeLogFile(); - } + txnMgr.synchronizeLogFromDisk(); - if (!inTxn.isEmpty()) { - showList(inTxn.values().iterator()); - throw new Error("In transaction not empty"); - } + if (!inTxn.isEmpty()) { + showList(inTxn.values().iterator()); + throw new Error("in txn list not empty at rollback time (" + inTxn.size() + + ")"); + } + ; + } - // these actually ain't that bad in a production release - if (!dirty.isEmpty()) { - System.out.println("ERROR: dirty blocks at close time"); - showList(dirty.values().iterator()); - throw new Error("Dirty blocks at close time"); - } - if (!inUse.isEmpty()) { - System.out.println("ERROR: inUse blocks at close time"); - showList(inUse.values().iterator()); - throw new Error("inUse blocks at close time"); - } - - // debugging stuff to keep an eye on the free list - // System.out.println("Free list size:" + free.size()); - file.close(); - file = null; + /** + * Commits and closes file. + */ + void close() throws IOException { + if (!dirty.isEmpty()) { + commit(); } + txnMgr.shutdown(); + if (transactionsDisabled) { + txnMgr.removeLogFile(); + } - - /** - * Force closing the file and underlying transaction manager. - * Used for testing purposed only. - */ - void forceClose() throws IOException { - txnMgr.forceClose(); - file.close(); + if (!inTxn.isEmpty()) { + showList(inTxn.values().iterator()); + throw new Error("In transaction not empty"); } - /** - * Prints contents of a list - */ - private void showList(Iterator i) { - int cnt = 0; - while (i.hasNext()) { - System.out.println("elem " + cnt + ": " + i.next()); - cnt++; - } + // these actually ain't that bad in a production release + if (!dirty.isEmpty()) { + System.out.println("ERROR: dirty blocks at close time"); + showList(dirty.values().iterator()); + throw new Error("Dirty blocks at close time"); } + if (!inUse.isEmpty()) { + System.out.println("ERROR: inUse blocks at close time"); + showList(inUse.values().iterator()); + throw new Error("inUse blocks at close time"); + } + // debugging stuff to keep an eye on the free list + // System.out.println("Free list size:" + free.size()); + file.close(); + file = null; + } - /** - * Returns a new node. The node is retrieved (and removed) - * from the released list or created new. - */ - private BlockIo getNewNode(long blockid) - throws IOException { + /** + * Force closing the file and underlying transaction manager. Used for testing + * purposed only. + */ + void forceClose() throws IOException { + txnMgr.forceClose(); + file.close(); + } - BlockIo retval = null; - if (!free.isEmpty()) { - retval = (BlockIo) free.removeFirst(); - } - if (retval == null) - retval = new BlockIo(0, new byte[BLOCK_SIZE]); - - retval.setBlockId(blockid); - retval.setView(null); - return retval; + /** + * Prints contents of a list + */ + private void showList(Iterator i) { + int cnt = 0; + while (i.hasNext()) { + System.out.println("elem " + cnt + ": " + i.next()); + cnt++; } + } - /** - * Synchs a node to disk. This is called by the transaction manager's - * synchronization code. - */ - void synch(BlockIo node) throws IOException { - byte[] data = node.getData(); - if (data != null) { - long offset = node.getBlockId() * BLOCK_SIZE; - file.seek(offset); - file.write(data); - } + /** + * Returns a new node. The node is retrieved (and removed) from the released + * list or created new. + */ + private BlockIo getNewNode(long blockid) throws IOException { + + BlockIo retval = null; + if (!free.isEmpty()) { + retval = (BlockIo) free.removeFirst(); } + if (retval == null) { + retval = new BlockIo(0, new byte[BLOCK_SIZE]); + } - /** - * Releases a node from the transaction list, if it was sitting - * there. - * - * @param recycle true if block data can be reused - */ - void releaseFromTransaction(BlockIo node, boolean recycle) - throws IOException { - Long key = new Long(node.getBlockId()); - if ((inTxn.remove(key) != null) && recycle) { - free.add(node); - } + retval.setBlockId(blockid); + retval.setView(null); + return retval; + } + + /** + * Synchs a node to disk. This is called by the transaction manager's + * synchronization code. + */ + void synch(BlockIo node) throws IOException { + byte[] data = node.getData(); + if (data != null) { + long offset = node.getBlockId() * BLOCK_SIZE; + file.seek(offset); + file.write(data); } + } - /** - * Synchronizes the file. - */ - void sync() throws IOException { - file.getFD().sync(); + /** + * Releases a node from the transaction list, if it was sitting there. + * + * @param recycle + * true if block data can be reused + */ + void releaseFromTransaction(BlockIo node, boolean recycle) throws IOException { + Long key = new Long(node.getBlockId()); + if ((inTxn.remove(key) != null) && recycle) { + free.add(node); } + } + /** + * Synchronizes the file. + */ + void sync() throws IOException { + file.getFD().sync(); + } - /** - * Utility method: Read a block from a RandomAccessFile - */ - private static void read(RandomAccessFile file, long offset, - byte[] buffer, int nBytes) throws IOException { - file.seek(offset); - int remaining = nBytes; - int pos = 0; - while (remaining > 0) { - int read = file.read(buffer, pos, remaining); - if (read == -1) { - System.arraycopy(cleanData, 0, buffer, pos, remaining); - break; - } - remaining -= read; - pos += read; - } + /** + * Utility method: Read a block from a RandomAccessFile + */ + private static void read(RandomAccessFile file, long offset, byte[] buffer, + int nBytes) throws IOException { + file.seek(offset); + int remaining = nBytes; + int pos = 0; + while (remaining > 0) { + int read = file.read(buffer, pos, remaining); + if (read == -1) { + System.arraycopy(cleanData, 0, buffer, pos, remaining); + break; + } + remaining -= read; + pos += read; } + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FreeLogicalRowIdPage.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FreeLogicalRowIdPage.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FreeLogicalRowIdPage.java (working copy) @@ -66,107 +66,108 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; /** - * Class describing a page that holds logical rowids that were freed. Note - * that the methods have *physical* rowids in their signatures - this is - * because logical and physical rowids are internally the same, only their - * external representation (i.e. in the client API) differs. + * Class describing a page that holds logical rowids that were freed. Note that + * the methods have *physical* rowids in their signatures - this is because + * logical and physical rowids are internally the same, only their external + * representation (i.e. in the client API) differs. */ class FreeLogicalRowIdPage extends PageHeader { - // offsets - private static final short O_COUNT = PageHeader.SIZE; // short count - static final short O_FREE = (short)(O_COUNT + Magic.SZ_SHORT); - static final short ELEMS_PER_PAGE = (short) - ((RecordFile.BLOCK_SIZE - O_FREE) / PhysicalRowId.SIZE); + // offsets + private static final short O_COUNT = PageHeader.SIZE; // short count + static final short O_FREE = (short) (O_COUNT + Magic.SZ_SHORT); + static final short ELEMS_PER_PAGE = (short) ((RecordFile.BLOCK_SIZE - O_FREE) / PhysicalRowId.SIZE); - // slots we returned. - final PhysicalRowId[] slots = new PhysicalRowId[ELEMS_PER_PAGE]; + // slots we returned. + final PhysicalRowId[] slots = new PhysicalRowId[ELEMS_PER_PAGE]; - /** - * Constructs a data page view from the indicated block. - */ - FreeLogicalRowIdPage(BlockIo block) { - super(block); - } + /** + * Constructs a data page view from the indicated block. + */ + FreeLogicalRowIdPage(BlockIo block) { + super(block); + } - /** - * Factory method to create or return a data page for the - * indicated block. - */ - static FreeLogicalRowIdPage getFreeLogicalRowIdPageView(BlockIo block) { + /** + * Factory method to create or return a data page for the indicated block. + */ + static FreeLogicalRowIdPage getFreeLogicalRowIdPageView(BlockIo block) { - BlockView view = block.getView(); - if (view != null && view instanceof FreeLogicalRowIdPage) - return (FreeLogicalRowIdPage) view; - else - return new FreeLogicalRowIdPage(block); + BlockView view = block.getView(); + if (view != null && view instanceof FreeLogicalRowIdPage) { + return (FreeLogicalRowIdPage) view; + } else { + return new FreeLogicalRowIdPage(block); } + } - /** Returns the number of free rowids */ - short getCount() { - return block.readShort(O_COUNT); - } + /** Returns the number of free rowids */ + short getCount() { + return block.readShort(O_COUNT); + } - /** Sets the number of free rowids */ - private void setCount(short i) { - block.writeShort(O_COUNT, i); - } + /** Sets the number of free rowids */ + private void setCount(short i) { + block.writeShort(O_COUNT, i); + } - /** Frees a slot */ - void free(int slot) { - get(slot).setBlock(0); - setCount((short) (getCount() - 1)); - } + /** Frees a slot */ + void free(int slot) { + get(slot).setBlock(0); + setCount((short) (getCount() - 1)); + } - /** Allocates a slot */ - PhysicalRowId alloc(int slot) { - setCount((short) (getCount() + 1)); - get(slot).setBlock(-1); - return get(slot); - } + /** Allocates a slot */ + PhysicalRowId alloc(int slot) { + setCount((short) (getCount() + 1)); + get(slot).setBlock(-1); + return get(slot); + } - /** Returns true if a slot is allocated */ - boolean isAllocated(int slot) { - return get(slot).getBlock() > 0; - } + /** Returns true if a slot is allocated */ + boolean isAllocated(int slot) { + return get(slot).getBlock() > 0; + } - /** Returns true if a slot is free */ - boolean isFree(int slot) { - return !isAllocated(slot); - } + /** Returns true if a slot is free */ + boolean isFree(int slot) { + return !isAllocated(slot); + } - - /** Returns the value of the indicated slot */ - PhysicalRowId get(int slot) { - if (slots[slot] == null) - slots[slot] = new PhysicalRowId(block, slotToOffset(slot));; - return slots[slot]; + /** Returns the value of the indicated slot */ + PhysicalRowId get(int slot) { + if (slots[slot] == null) { + slots[slot] = new PhysicalRowId(block, slotToOffset(slot)); } + ; + return slots[slot]; + } - /** Converts slot to offset */ - private short slotToOffset(int slot) { - return (short) (O_FREE + - (slot * PhysicalRowId.SIZE)); - } + /** Converts slot to offset */ + private short slotToOffset(int slot) { + return (short) (O_FREE + (slot * PhysicalRowId.SIZE)); + } - /** - * Returns first free slot, -1 if no slots are available - */ - int getFirstFree() { - for (int i = 0; i < ELEMS_PER_PAGE; i++) { - if (isFree(i)) - return i; - } - return -1; + /** + * Returns first free slot, -1 if no slots are available + */ + int getFirstFree() { + for (int i = 0; i < ELEMS_PER_PAGE; i++) { + if (isFree(i)) { + return i; + } } + return -1; + } - /** - * Returns first allocated slot, -1 if no slots are available. - */ - int getFirstAllocated() { - for (int i = 0; i < ELEMS_PER_PAGE; i++) { - if (isAllocated(i)) - return i; - } - return -1; + /** + * Returns first allocated slot, -1 if no slots are available. + */ + int getFirstAllocated() { + for (int i = 0; i < ELEMS_PER_PAGE; i++) { + if (isAllocated(i)) { + return i; + } } + return -1; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/TransactionManager.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/TransactionManager.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/TransactionManager.java (working copy) @@ -65,379 +65,371 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; -import java.io.*; -import java.util.*; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.Iterator; +import java.util.TreeSet; /** - * This class manages the transaction log that belongs to every - * {@link RecordFile}. The transaction log is either clean, or - * in progress. In the latter case, the transaction manager - * takes care of a roll forward. + * This class manages the transaction log that belongs to every + * {@link RecordFile}. The transaction log is either clean, or in progress. In + * the latter case, the transaction manager takes care of a roll forward. *

- * Implementation note: this is a proof-of-concept implementation - * which hasn't been optimized for speed. For instance, all sorts - * of streams are created for every transaction. + * Implementation note: this is a proof-of-concept implementation which hasn't + * been optimized for speed. For instance, all sorts of streams are created for + * every transaction. */ // TODO: Handle the case where we are recovering lg9 and lg0, were we // should start with lg9 instead of lg0! public final class TransactionManager { - private RecordFile owner; + private final RecordFile owner; - // streams for transaction log. - private FileOutputStream fos; - private ObjectOutputStream oos; + // streams for transaction log. + private FileOutputStream fos; + private ObjectOutputStream oos; - /** - * By default, we keep 10 transactions in the log file before - * synchronizing it with the main database file. - */ - static final int DEFAULT_TXNS_IN_LOG = 10; + /** + * By default, we keep 10 transactions in the log file before synchronizing it + * with the main database file. + */ + static final int DEFAULT_TXNS_IN_LOG = 10; - /** - * Maximum number of transactions before the log file is - * synchronized with the main database file. - */ - private int _maxTxns = DEFAULT_TXNS_IN_LOG; + /** + * Maximum number of transactions before the log file is synchronized with the + * main database file. + */ + private int _maxTxns = DEFAULT_TXNS_IN_LOG; - /** - * In-core copy of transactions. We could read everything back from - * the log file, but the RecordFile needs to keep the dirty blocks in - * core anyway, so we might as well point to them and spare us a lot - * of hassle. - */ - private ArrayList[] txns = new ArrayList[DEFAULT_TXNS_IN_LOG]; - private int curTxn = -1; + /** + * In-core copy of transactions. We could read everything back from the log + * file, but the RecordFile needs to keep the dirty blocks in core anyway, so + * we might as well point to them and spare us a lot of hassle. + */ + private ArrayList[] txns = new ArrayList[DEFAULT_TXNS_IN_LOG]; + private int curTxn = -1; - /** Extension of a log file. */ - static final String extension = ".lg"; - - /** log file name */ - private String logFileName; + /** Extension of a log file. */ + static final String extension = ".lg"; - /** - * Instantiates a transaction manager instance. If recovery - * needs to be performed, it is done. - * - * @param owner the RecordFile instance that owns this transaction mgr. - */ - TransactionManager(RecordFile owner) throws IOException { - this.owner = owner; - logFileName = null; - recover(); - open(); - } + /** log file name */ + private String logFileName; - - /** - * Synchronize log file data with the main database file. - *

- * After this call, the main database file is guaranteed to be - * consistent and guaranteed to be the only file needed for - * backup purposes. - */ - public void synchronizeLog() - throws IOException - { - synchronizeLogFromMemory(); - } + /** + * Instantiates a transaction manager instance. If recovery needs to be + * performed, it is done. + * + * @param owner + * the RecordFile instance that owns this transaction mgr. + */ + TransactionManager(RecordFile owner) throws IOException { + this.owner = owner; + logFileName = null; + recover(); + open(); + } - - /** - * Set the maximum number of transactions to record in - * the log (and keep in memory) before the log is - * synchronized with the main database file. - *

- * This method must be called while there are no - * pending transactions in the log. - */ - public void setMaximumTransactionsInLog( int maxTxns ) - throws IOException - { - if ( maxTxns <= 0 ) { - throw new IllegalArgumentException( - "Argument 'maxTxns' must be greater than 0." ); - } - if ( curTxn != -1 ) { - throw new IllegalStateException( - "Cannot change setting while transactions are pending in the log" ); - } - _maxTxns = maxTxns; - txns = new ArrayList[ maxTxns ]; - } + /** + * Synchronize log file data with the main database file. + *

+ * After this call, the main database file is guaranteed to be consistent and + * guaranteed to be the only file needed for backup purposes. + */ + public void synchronizeLog() throws IOException { + synchronizeLogFromMemory(); + } - - /** Builds logfile name */ - private String makeLogName() { - return owner.getFileName() + extension; + /** + * Set the maximum number of transactions to record in the log (and keep in + * memory) before the log is synchronized with the main database file. + *

+ * This method must be called while there are no pending transactions in the + * log. + */ + public void setMaximumTransactionsInLog(int maxTxns) throws IOException { + if (maxTxns <= 0) { + throw new IllegalArgumentException( + "Argument 'maxTxns' must be greater than 0."); } + if (curTxn != -1) { + throw new IllegalStateException( + "Cannot change setting while transactions are pending in the log"); + } + _maxTxns = maxTxns; + txns = new ArrayList[maxTxns]; + } + /** Builds logfile name */ + private String makeLogName() { + return owner.getFileName() + extension; + } - /** Synchs in-core transactions to data file and opens a fresh log */ - private void synchronizeLogFromMemory() throws IOException { - close(); + /** Synchs in-core transactions to data file and opens a fresh log */ + private void synchronizeLogFromMemory() throws IOException { + close(); - TreeSet blockList = new TreeSet( new BlockIoComparator() ); + TreeSet blockList = new TreeSet(new BlockIoComparator()); - int numBlocks = 0; - int writtenBlocks = 0; - for (int i = 0; i < _maxTxns; i++) { - if (txns[i] == null) - continue; - // Add each block to the blockList, replacing the old copy of this - // block if necessary, thus avoiding writing the same block twice - for (Iterator k = txns[i].iterator(); k.hasNext(); ) { - BlockIo block = (BlockIo)k.next(); - if ( blockList.contains( block ) ) { - block.decrementTransactionCount(); - } - else { - writtenBlocks++; - boolean result = blockList.add( block ); - } - numBlocks++; - } - - txns[i] = null; + int numBlocks = 0; + int writtenBlocks = 0; + for (int i = 0; i < _maxTxns; i++) { + if (txns[i] == null) { + continue; + } + // Add each block to the blockList, replacing the old copy of this + // block if necessary, thus avoiding writing the same block twice + for (Iterator k = txns[i].iterator(); k.hasNext();) { + BlockIo block = (BlockIo) k.next(); + if (blockList.contains(block)) { + block.decrementTransactionCount(); + } else { + writtenBlocks++; + blockList.add(block); } - // Write the blocks from the blockList to disk - synchronizeBlocks(blockList.iterator(), true); + numBlocks++; + } - owner.sync(); - open(); + txns[i] = null; } + // Write the blocks from the blockList to disk + synchronizeBlocks(blockList.iterator(), true); + owner.sync(); + open(); + } - /** Opens the log file */ - private void open() throws IOException { - logFileName = makeLogName(); - fos = new FileOutputStream(logFileName); - oos = new ObjectOutputStream(fos); - oos.writeShort(Magic.LOGFILE_HEADER); - oos.flush(); - curTxn = -1; + /** Opens the log file */ + private void open() throws IOException { + logFileName = makeLogName(); + fos = new FileOutputStream(logFileName); + oos = new ObjectOutputStream(fos); + oos.writeShort(Magic.LOGFILE_HEADER); + oos.flush(); + curTxn = -1; + } + + /** Startup recovery on all files */ + private void recover() throws IOException { + String logName = makeLogName(); + File logFile = new File(logName); + if (!logFile.exists()) { + return; } + if (logFile.length() == 0) { + logFile.delete(); + return; + } - /** Startup recovery on all files */ - private void recover() throws IOException { - String logName = makeLogName(); - File logFile = new File(logName); - if (!logFile.exists()) - return; - if (logFile.length() == 0) { - logFile.delete(); - return; - } + FileInputStream fis = new FileInputStream(logFile); + ObjectInputStream ois = new ObjectInputStream(fis); - FileInputStream fis = new FileInputStream(logFile); - ObjectInputStream ois = new ObjectInputStream(fis); + try { + if (ois.readShort() != Magic.LOGFILE_HEADER) { + throw new Error("Bad magic on log file"); + } + } catch (IOException e) { + // corrupted/empty logfile + logFile.delete(); + return; + } - try { - if (ois.readShort() != Magic.LOGFILE_HEADER) - throw new Error("Bad magic on log file"); - } catch (IOException e) { - // corrupted/empty logfile - logFile.delete(); - return; - } + while (true) { + ArrayList blocks = null; + try { + blocks = (ArrayList) ois.readObject(); + } catch (ClassNotFoundException e) { + throw new Error("Unexcepted exception: " + e); + } catch (IOException e) { + // corrupted logfile, ignore rest of transactions + break; + } + synchronizeBlocks(blocks.iterator(), false); - while (true) { - ArrayList blocks = null; - try { - blocks = (ArrayList) ois.readObject(); - } catch (ClassNotFoundException e) { - throw new Error("Unexcepted exception: " + e); - } catch (IOException e) { - // corrupted logfile, ignore rest of transactions - break; - } - synchronizeBlocks(blocks.iterator(), false); - - // ObjectInputStream must match exactly each - // ObjectOutputStream created during writes - try { - ois = new ObjectInputStream(fis); - } catch (IOException e) { - // corrupted logfile, ignore rest of transactions - break; - } - } - owner.sync(); - logFile.delete(); + // ObjectInputStream must match exactly each + // ObjectOutputStream created during writes + try { + ois = new ObjectInputStream(fis); + } catch (IOException e) { + // corrupted logfile, ignore rest of transactions + break; + } } + owner.sync(); + logFile.delete(); + } - /** Synchronizes the indicated blocks with the owner. */ - private void synchronizeBlocks(Iterator blockIterator, boolean fromCore) - throws IOException { - // write block vector elements to the data file. - while ( blockIterator.hasNext() ) { - BlockIo cur = (BlockIo)blockIterator.next(); - owner.synch(cur); - if (fromCore) { - cur.decrementTransactionCount(); - if (!cur.isInTransaction()) { - owner.releaseFromTransaction(cur, true); - } - } + /** Synchronizes the indicated blocks with the owner. */ + private void synchronizeBlocks(Iterator blockIterator, boolean fromCore) + throws IOException { + // write block vector elements to the data file. + while (blockIterator.hasNext()) { + BlockIo cur = (BlockIo) blockIterator.next(); + owner.synch(cur); + if (fromCore) { + cur.decrementTransactionCount(); + if (!cur.isInTransaction()) { + owner.releaseFromTransaction(cur, true); } + } } + } - - /** Set clean flag on the blocks. */ - private void setClean(ArrayList blocks) - throws IOException { - for (Iterator k = blocks.iterator(); k.hasNext(); ) { - BlockIo cur = (BlockIo) k.next(); - cur.setClean(); - } + /** Set clean flag on the blocks. */ + private void setClean(ArrayList blocks) throws IOException { + for (Iterator k = blocks.iterator(); k.hasNext();) { + BlockIo cur = (BlockIo) k.next(); + cur.setClean(); } + } - /** Discards the indicated blocks and notify the owner. */ - private void discardBlocks(ArrayList blocks) - throws IOException { - for (Iterator k = blocks.iterator(); k.hasNext(); ) { - BlockIo cur = (BlockIo) k.next(); - cur.decrementTransactionCount(); - if (!cur.isInTransaction()) { - owner.releaseFromTransaction(cur, false); - } - } + /** Discards the indicated blocks and notify the owner. */ + private void discardBlocks(ArrayList blocks) throws IOException { + for (Iterator k = blocks.iterator(); k.hasNext();) { + BlockIo cur = (BlockIo) k.next(); + cur.decrementTransactionCount(); + if (!cur.isInTransaction()) { + owner.releaseFromTransaction(cur, false); + } } + } - /** - * Starts a transaction. This can block if all slots have been filled - * with full transactions, waiting for the synchronization thread to - * clean out slots. - */ - void start() throws IOException { - curTxn++; - if (curTxn == _maxTxns) { - synchronizeLogFromMemory(); - curTxn = 0; - } - txns[curTxn] = new ArrayList(); + /** + * Starts a transaction. This can block if all slots have been filled with + * full transactions, waiting for the synchronization thread to clean out + * slots. + */ + void start() throws IOException { + curTxn++; + if (curTxn == _maxTxns) { + synchronizeLogFromMemory(); + curTxn = 0; } + txns[curTxn] = new ArrayList(); + } - /** - * Indicates the block is part of the transaction. - */ - void add(BlockIo block) throws IOException { - block.incrementTransactionCount(); - txns[curTxn].add(block); - } + /** + * Indicates the block is part of the transaction. + */ + void add(BlockIo block) throws IOException { + block.incrementTransactionCount(); + txns[curTxn].add(block); + } - /** - * Commits the transaction to the log file. - */ - void commit() throws IOException { - oos.writeObject(txns[curTxn]); - sync(); + /** + * Commits the transaction to the log file. + */ + void commit() throws IOException { + oos.writeObject(txns[curTxn]); + sync(); - // set clean flag to indicate blocks have been written to log - setClean(txns[curTxn]); + // set clean flag to indicate blocks have been written to log + setClean(txns[curTxn]); - // open a new ObjectOutputStream in order to store - // newer states of BlockIo - oos = new ObjectOutputStream(fos); - } + // open a new ObjectOutputStream in order to store + // newer states of BlockIo + oos = new ObjectOutputStream(fos); + } - /** Flushes and syncs */ - private void sync() throws IOException { - oos.flush(); - fos.flush(); - fos.getFD().sync(); - } + /** Flushes and syncs */ + private void sync() throws IOException { + oos.flush(); + fos.flush(); + fos.getFD().sync(); + } - /** - * Shutdowns the transaction manager. Resynchronizes outstanding - * logs. - */ - void shutdown() throws IOException { - synchronizeLogFromMemory(); - close(); - } + /** + * Shutdowns the transaction manager. Resynchronizes outstanding logs. + */ + void shutdown() throws IOException { + synchronizeLogFromMemory(); + close(); + } - /** - * Closes open files. - */ - private void close() throws IOException { - sync(); - oos.close(); - fos.close(); - oos = null; - fos = null; + /** + * Closes open files. + */ + private void close() throws IOException { + sync(); + oos.close(); + fos.close(); + oos = null; + fos = null; + } + + public void removeLogFile() { + // if file is not closed yet, just return + if (oos != null) { + return; } - - public void removeLogFile() { - // if file is not closed yet, just return - if ( oos != null ) - return; - if ( logFileName != null ) { - File file = new File(logFileName) ; - file.delete(); - logFileName = null; - } + if (logFileName != null) { + File file = new File(logFileName); + file.delete(); + logFileName = null; } + } - /** - * Force closing the file without synchronizing pending transaction data. - * Used for testing purposes only. - */ - void forceClose() throws IOException { - oos.close(); - fos.close(); - oos = null; - fos = null; - } + /** + * Force closing the file without synchronizing pending transaction data. Used + * for testing purposes only. + */ + void forceClose() throws IOException { + oos.close(); + fos.close(); + oos = null; + fos = null; + } - /** - * Use the disk-based transaction log to synchronize the data file. - * Outstanding memory logs are discarded because they are believed - * to be inconsistent. - */ - void synchronizeLogFromDisk() throws IOException { - close(); + /** + * Use the disk-based transaction log to synchronize the data file. + * Outstanding memory logs are discarded because they are believed to be + * inconsistent. + */ + void synchronizeLogFromDisk() throws IOException { + close(); - for ( int i=0; i < _maxTxns; i++ ) { - if (txns[i] == null) - continue; - discardBlocks(txns[i]); - txns[i] = null; - } - - recover(); - open(); + for (int i = 0; i < _maxTxns; i++) { + if (txns[i] == null) { + continue; + } + discardBlocks(txns[i]); + txns[i] = null; } + recover(); + open(); + } - /** INNER CLASS. - * Comparator class for use by the tree set used to store the blocks - * to write for this transaction. The BlockIo objects are ordered by - * their blockIds. - */ - public static class BlockIoComparator - implements Comparator - { + /** + * INNER CLASS. Comparator class for use by the tree set used to store the + * blocks to write for this transaction. The BlockIo objects are ordered by + * their blockIds. + */ + public static class BlockIoComparator implements Comparator { - public int compare( Object o1, Object o2 ) { - BlockIo block1 = (BlockIo)o1; - BlockIo block2 = (BlockIo)o2; - int result = 0; - if ( block1.getBlockId() == block2.getBlockId() ) { - result = 0; - } - else if ( block1.getBlockId() < block2.getBlockId() ) { - result = -1; - } - else { - result = 1; - } - return result; - } + public int compare(Object o1, Object o2) { + BlockIo block1 = (BlockIo) o1; + BlockIo block2 = (BlockIo) o2; + int result = 0; + if (block1.getBlockId() == block2.getBlockId()) { + result = 0; + } else if (block1.getBlockId() < block2.getBlockId()) { + result = -1; + } else { + result = 1; + } + return result; + } - public boolean equals(Object obj) { - return super.equals(obj); - } - } // class BlockIOComparator + @Override + public boolean equals(Object obj) { + return super.equals(obj); + } + } // class BlockIOComparator } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FreePhysicalRowIdPage.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FreePhysicalRowIdPage.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FreePhysicalRowIdPage.java (working copy) @@ -66,103 +66,105 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; /** - * Class describing a page that holds physical rowids that were freed. + * Class describing a page that holds physical rowids that were freed. */ final class FreePhysicalRowIdPage extends PageHeader { - // offsets - private static final short O_COUNT = PageHeader.SIZE; // short count - static final short O_FREE = O_COUNT + Magic.SZ_SHORT; - static final short ELEMS_PER_PAGE = - (RecordFile.BLOCK_SIZE - O_FREE) / FreePhysicalRowId.SIZE; - - // slots we returned. - FreePhysicalRowId[] slots = new FreePhysicalRowId[ELEMS_PER_PAGE]; + // offsets + private static final short O_COUNT = PageHeader.SIZE; // short count + static final short O_FREE = O_COUNT + Magic.SZ_SHORT; + static final short ELEMS_PER_PAGE = (RecordFile.BLOCK_SIZE - O_FREE) + / FreePhysicalRowId.SIZE; - /** - * Constructs a data page view from the indicated block. - */ - FreePhysicalRowIdPage(BlockIo block) { - super(block); - } + // slots we returned. + FreePhysicalRowId[] slots = new FreePhysicalRowId[ELEMS_PER_PAGE]; - /** - * Factory method to create or return a data page for the - * indicated block. - */ - static FreePhysicalRowIdPage getFreePhysicalRowIdPageView(BlockIo block) { - BlockView view = block.getView(); - if (view != null && view instanceof FreePhysicalRowIdPage) + /** + * Constructs a data page view from the indicated block. + */ + FreePhysicalRowIdPage(BlockIo block) { + super(block); + } + + /** + * Factory method to create or return a data page for the indicated block. + */ + static FreePhysicalRowIdPage getFreePhysicalRowIdPageView(BlockIo block) { + BlockView view = block.getView(); + if (view != null && view instanceof FreePhysicalRowIdPage) { return (FreePhysicalRowIdPage) view; - else + } else { return new FreePhysicalRowIdPage(block); } + } - /** Returns the number of free rowids */ - short getCount() { - return block.readShort(O_COUNT); - } + /** Returns the number of free rowids */ + short getCount() { + return block.readShort(O_COUNT); + } - /** Sets the number of free rowids */ - private void setCount(short i) { - block.writeShort(O_COUNT, i); - } + /** Sets the number of free rowids */ + private void setCount(short i) { + block.writeShort(O_COUNT, i); + } - /** Frees a slot */ - void free(int slot) { - get(slot).setSize(0); - setCount((short) (getCount() - 1)); - } + /** Frees a slot */ + void free(int slot) { + get(slot).setSize(0); + setCount((short) (getCount() - 1)); + } - /** Allocates a slot */ - FreePhysicalRowId alloc(int slot) { - setCount((short) (getCount() + 1)); - return get(slot); - } + /** Allocates a slot */ + FreePhysicalRowId alloc(int slot) { + setCount((short) (getCount() + 1)); + return get(slot); + } - /** Returns true if a slot is allocated */ - boolean isAllocated(int slot) { - return get(slot).getSize() != 0; - } + /** Returns true if a slot is allocated */ + boolean isAllocated(int slot) { + return get(slot).getSize() != 0; + } - /** Returns true if a slot is free */ - boolean isFree(int slot) { - return !isAllocated(slot); + /** Returns true if a slot is free */ + boolean isFree(int slot) { + return !isAllocated(slot); + } + + /** Returns the value of the indicated slot */ + FreePhysicalRowId get(int slot) { + if (slots[slot] == null) { + slots[slot] = new FreePhysicalRowId(block, slotToOffset(slot)); } - - - /** Returns the value of the indicated slot */ - FreePhysicalRowId get(int slot) { - if (slots[slot] == null) - slots[slot] = new FreePhysicalRowId(block, slotToOffset(slot));; - return slots[slot]; - } + ; + return slots[slot]; + } - /** Converts slot to offset */ - short slotToOffset(int slot) { - return (short) (O_FREE + - (slot * FreePhysicalRowId.SIZE)); + /** Converts slot to offset */ + short slotToOffset(int slot) { + return (short) (O_FREE + (slot * FreePhysicalRowId.SIZE)); + } + + /** + * Returns first free slot, -1 if no slots are available + */ + int getFirstFree() { + for (int i = 0; i < ELEMS_PER_PAGE; i++) { + if (isFree(i)) { + return i; + } } - - /** - * Returns first free slot, -1 if no slots are available - */ - int getFirstFree() { - for (int i = 0; i < ELEMS_PER_PAGE; i++) { - if (isFree(i)) - return i; + return -1; } - return -1; + + /** + * Returns first slot with available size >= indicated size, or -1 if no slots + * are available. + **/ + int getFirstLargerThan(int size) { + for (int i = 0; i < ELEMS_PER_PAGE; i++) { + if (isAllocated(i) && get(i).getSize() >= size) { + return i; + } } - - /** - * Returns first slot with available size >= indicated size, - * or -1 if no slots are available. - **/ - int getFirstLargerThan(int size) { - for (int i = 0; i < ELEMS_PER_PAGE; i++) { - if (isAllocated(i) && get(i).getSize() >= size) - return i; + return -1; } - return -1; - } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/Magic.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/Magic.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/Magic.java (working copy) @@ -66,40 +66,40 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; /** - * This interface contains magic cookies. + * This interface contains magic cookies. */ public interface Magic { - /** Magic cookie at start of file */ - public short FILE_HEADER = 0x1350; + /** Magic cookie at start of file */ + public short FILE_HEADER = 0x1350; - /** Magic for blocks. They're offset by the block type magic codes. */ - public short BLOCK = 0x1351; + /** Magic for blocks. They're offset by the block type magic codes. */ + public short BLOCK = 0x1351; - /** Magics for blocks in certain lists. Offset by baseBlockMagic */ - short FREE_PAGE = 0; - short USED_PAGE = 1; - short TRANSLATION_PAGE = 2; - short FREELOGIDS_PAGE = 3; - short FREEPHYSIDS_PAGE = 4; + /** Magics for blocks in certain lists. Offset by baseBlockMagic */ + short FREE_PAGE = 0; + short USED_PAGE = 1; + short TRANSLATION_PAGE = 2; + short FREELOGIDS_PAGE = 3; + short FREEPHYSIDS_PAGE = 4; - /** Number of lists in a file */ - public short NLISTS = 5; + /** Number of lists in a file */ + public short NLISTS = 5; - /** - * Maximum number of blocks in a file, leaving room for a 16 bit - * offset encoded within a long. - */ - long MAX_BLOCKS = 0x7FFFFFFFFFFFL; + /** + * Maximum number of blocks in a file, leaving room for a 16 bit offset + * encoded within a long. + */ + long MAX_BLOCKS = 0x7FFFFFFFFFFFL; - /** Magic for transaction file */ - short LOGFILE_HEADER = 0x1360; + /** Magic for transaction file */ + short LOGFILE_HEADER = 0x1360; - /** Size of an externalized byte */ - public short SZ_BYTE = 1; - /** Size of an externalized short */ - public short SZ_SHORT = 2; - /** Size of an externalized int */ - public short SZ_INT = 4; - /** Size of an externalized long */ - public short SZ_LONG = 8; + /** Size of an externalized byte */ + public short SZ_BYTE = 1; + /** Size of an externalized short */ + public short SZ_SHORT = 2; + /** Size of an externalized int */ + public short SZ_INT = 4; + /** Size of an externalized long */ + public short SZ_LONG = 8; } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/PhysicalRowId.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/PhysicalRowId.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/PhysicalRowId.java (working copy) @@ -66,48 +66,48 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; /** - * A physical rowid is nothing else than a pointer to a physical location - * in a file - a (block, offset) tuple. - *

- * Note: The fact that the offset is modelled as a short limits - * the block size to 32k. + * A physical rowid is nothing else than a pointer to a physical location in a + * file - a (block, offset) tuple. + *

+ * Note: The fact that the offset is modelled as a short limits the block + * size to 32k. */ class PhysicalRowId { - // offsets - private static final short O_BLOCK = 0; // long block - private static final short O_OFFSET = Magic.SZ_LONG; // short offset - static final int SIZE = O_OFFSET + Magic.SZ_SHORT; - - // my block and the position within the block - BlockIo block; - short pos; + // offsets + private static final short O_BLOCK = 0; // long block + private static final short O_OFFSET = Magic.SZ_LONG; // short offset + static final int SIZE = O_OFFSET + Magic.SZ_SHORT; - /** - * Constructs a physical rowid from the indicated data starting at - * the indicated position. - */ - PhysicalRowId(BlockIo block, short pos) { - this.block = block; - this.pos = pos; - } - - /** Returns the block number */ - long getBlock() { - return block.readLong(pos + O_BLOCK); - } - - /** Sets the block number */ - void setBlock(long value) { - block.writeLong(pos + O_BLOCK, value); - } - - /** Returns the offset */ - short getOffset() { - return block.readShort(pos + O_OFFSET); - } - - /** Sets the offset */ - void setOffset(short value) { - block.writeShort(pos + O_OFFSET, value); - } + // my block and the position within the block + BlockIo block; + short pos; + + /** + * Constructs a physical rowid from the indicated data starting at the + * indicated position. + */ + PhysicalRowId(BlockIo block, short pos) { + this.block = block; + this.pos = pos; + } + + /** Returns the block number */ + long getBlock() { + return block.readLong(pos + O_BLOCK); + } + + /** Sets the block number */ + void setBlock(long value) { + block.writeLong(pos + O_BLOCK, value); + } + + /** Returns the offset */ + short getOffset() { + return block.readShort(pos + O_OFFSET); + } + + /** Sets the offset */ + void setOffset(short value) { + block.writeShort(pos + O_OFFSET, value); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BlockIo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BlockIo.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BlockIo.java (working copy) @@ -65,261 +65,249 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; -import java.io.*; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; /** - * This class wraps a page-sized byte array and provides methods - * to read and write data to and from it. The readers and writers - * are just the ones that the rest of the toolkit needs, nothing else. - * Values written are compatible with java.io routines. - * - * @see java.io.DataInput - * @see java.io.DataOutput + * This class wraps a page-sized byte array and provides methods to read and + * write data to and from it. The readers and writers are just the ones that the + * rest of the toolkit needs, nothing else. Values written are compatible with + * java.io routines. + * + * @see java.io.DataInput + * @see java.io.DataOutput */ public final class BlockIo implements java.io.Externalizable { - public final static long serialVersionUID = 2L; + public final static long serialVersionUID = 2L; - private long blockId; + private long blockId; - private transient byte[] data; // work area - private transient BlockView view = null; - private transient boolean dirty = false; - private transient int transactionCount = 0; + private transient byte[] data; // work area + private transient BlockView view = null; + private transient boolean dirty = false; + private transient int transactionCount = 0; - /** - * Default constructor for serialization - */ - public BlockIo() { - // empty - } + /** + * Default constructor for serialization + */ + public BlockIo() { + // empty + } - /** - * Constructs a new BlockIo instance working on the indicated - * buffer. - */ - BlockIo(long blockId, byte[] data) { - // removeme for production version - if (blockId > 10000000000L) - throw new Error("bogus block id " + blockId); - this.blockId = blockId; - this.data = data; + /** + * Constructs a new BlockIo instance working on the indicated buffer. + */ + BlockIo(long blockId, byte[] data) { + // removeme for production version + if (blockId > 10000000000L) { + throw new Error("bogus block id " + blockId); } + this.blockId = blockId; + this.data = data; + } - /** - * Returns the underlying array - */ - byte[] getData() { - return data; - } + /** + * Returns the underlying array + */ + byte[] getData() { + return data; + } - /** - * Sets the block number. Should only be called by RecordFile. - */ - void setBlockId(long id) { - if (isInTransaction()) - throw new Error("BlockId assigned for transaction block"); - // removeme for production version - if (id > 10000000000L) - throw new Error("bogus block id " + id); - blockId = id; + /** + * Sets the block number. Should only be called by RecordFile. + */ + void setBlockId(long id) { + if (isInTransaction()) { + throw new Error("BlockId assigned for transaction block"); } - - /** - * Returns the block number. - */ - long getBlockId() { - return blockId; + // removeme for production version + if (id > 10000000000L) { + throw new Error("bogus block id " + id); } + blockId = id; + } - /** - * Returns the current view of the block. - */ - public BlockView getView() { - return view; - } + /** + * Returns the block number. + */ + long getBlockId() { + return blockId; + } - /** - * Sets the current view of the block. - */ - public void setView(BlockView view) { - this.view = view; - } + /** + * Returns the current view of the block. + */ + public BlockView getView() { + return view; + } - /** - * Sets the dirty flag - */ - void setDirty() { - dirty = true; - } + /** + * Sets the current view of the block. + */ + public void setView(BlockView view) { + this.view = view; + } - /** - * Clears the dirty flag - */ - void setClean() { - dirty = false; - } + /** + * Sets the dirty flag + */ + void setDirty() { + dirty = true; + } - /** - * Returns true if the dirty flag is set. - */ - boolean isDirty() { - return dirty; - } + /** + * Clears the dirty flag + */ + void setClean() { + dirty = false; + } - /** - * Returns true if the block is still dirty with respect to the - * transaction log. - */ - boolean isInTransaction() { - return transactionCount != 0; - } + /** + * Returns true if the dirty flag is set. + */ + boolean isDirty() { + return dirty; + } - /** - * Increments transaction count for this block, to signal that this - * block is in the log but not yet in the data file. The method also - * takes a snapshot so that the data may be modified in new transactions. - */ - synchronized void incrementTransactionCount() { - transactionCount++; - // @fixme(alex) - setClean(); - } + /** + * Returns true if the block is still dirty with respect to the transaction + * log. + */ + boolean isInTransaction() { + return transactionCount != 0; + } - /** - * Decrements transaction count for this block, to signal that this - * block has been written from the log to the data file. - */ - synchronized void decrementTransactionCount() { - transactionCount--; - if (transactionCount < 0) - throw new Error("transaction count on block " - + getBlockId() + " below zero!"); + /** + * Increments transaction count for this block, to signal that this block is + * in the log but not yet in the data file. The method also takes a snapshot + * so that the data may be modified in new transactions. + */ + synchronized void incrementTransactionCount() { + transactionCount++; + // @fixme(alex) + setClean(); + } + /** + * Decrements transaction count for this block, to signal that this block has + * been written from the log to the data file. + */ + synchronized void decrementTransactionCount() { + transactionCount--; + if (transactionCount < 0) { + throw new Error("transaction count on block " + getBlockId() + + " below zero!"); } - /** - * Reads a byte from the indicated position - */ - public byte readByte(int pos) { - return data[pos]; - } + } - /** - * Writes a byte to the indicated position - */ - public void writeByte(int pos, byte value) { - data[pos] = value; - setDirty(); - } + /** + * Reads a byte from the indicated position + */ + public byte readByte(int pos) { + return data[pos]; + } - /** - * Reads a short from the indicated position - */ - public short readShort(int pos) { - return (short) - (((short) (data[pos+0] & 0xff) << 8) | - ((short) (data[pos+1] & 0xff) << 0)); - } + /** + * Writes a byte to the indicated position + */ + public void writeByte(int pos, byte value) { + data[pos] = value; + setDirty(); + } - /** - * Writes a short to the indicated position - */ - public void writeShort(int pos, short value) { - data[pos+0] = (byte)(0xff & (value >> 8)); - data[pos+1] = (byte)(0xff & (value >> 0)); - setDirty(); - } + /** + * Reads a short from the indicated position + */ + public short readShort(int pos) { + return (short) (((short) (data[pos + 0] & 0xff) << 8) | ((short) (data[pos + 1] & 0xff) << 0)); + } - /** - * Reads an int from the indicated position - */ - public int readInt(int pos) { - return - (((int)(data[pos+0] & 0xff) << 24) | - ((int)(data[pos+1] & 0xff) << 16) | - ((int)(data[pos+2] & 0xff) << 8) | - ((int)(data[pos+3] & 0xff) << 0)); - } + /** + * Writes a short to the indicated position + */ + public void writeShort(int pos, short value) { + data[pos + 0] = (byte) (0xff & (value >> 8)); + data[pos + 1] = (byte) (0xff & (value >> 0)); + setDirty(); + } - /** - * Writes an int to the indicated position - */ - public void writeInt(int pos, int value) { - data[pos+0] = (byte)(0xff & (value >> 24)); - data[pos+1] = (byte)(0xff & (value >> 16)); - data[pos+2] = (byte)(0xff & (value >> 8)); - data[pos+3] = (byte)(0xff & (value >> 0)); - setDirty(); - } + /** + * Reads an int from the indicated position + */ + public int readInt(int pos) { + return (((data[pos + 0] & 0xff) << 24) | ((data[pos + 1] & 0xff) << 16) + | ((data[pos + 2] & 0xff) << 8) | ((data[pos + 3] & 0xff) << 0)); + } - /** - * Reads a long from the indicated position - */ - public long readLong( int pos ) - { - // Contributed by Erwin Bolwidt - // Gives about 15% performance improvement - return - ( (long)( ((data[pos+0] & 0xff) << 24) | - ((data[pos+1] & 0xff) << 16) | - ((data[pos+2] & 0xff) << 8) | - ((data[pos+3] & 0xff) ) ) << 32 ) | - ( (long)( ((data[pos+4] & 0xff) << 24) | - ((data[pos+5] & 0xff) << 16) | - ((data[pos+6] & 0xff) << 8) | - ((data[pos+7] & 0xff) ) ) & 0xffffffff ); - /* Original version by Alex Boisvert. Might be faster on 64-bit JVMs. - return - (((long)(data[pos+0] & 0xff) << 56) | - ((long)(data[pos+1] & 0xff) << 48) | - ((long)(data[pos+2] & 0xff) << 40) | - ((long)(data[pos+3] & 0xff) << 32) | - ((long)(data[pos+4] & 0xff) << 24) | - ((long)(data[pos+5] & 0xff) << 16) | - ((long)(data[pos+6] & 0xff) << 8) | - ((long)(data[pos+7] & 0xff) << 0)); - */ - } + /** + * Writes an int to the indicated position + */ + public void writeInt(int pos, int value) { + data[pos + 0] = (byte) (0xff & (value >> 24)); + data[pos + 1] = (byte) (0xff & (value >> 16)); + data[pos + 2] = (byte) (0xff & (value >> 8)); + data[pos + 3] = (byte) (0xff & (value >> 0)); + setDirty(); + } - /** - * Writes a long to the indicated position + /** + * Reads a long from the indicated position + */ + public long readLong(int pos) { + // Contributed by Erwin Bolwidt + // Gives about 15% performance improvement + return ((long) (((data[pos + 0] & 0xff) << 24) + | ((data[pos + 1] & 0xff) << 16) | ((data[pos + 2] & 0xff) << 8) | ((data[pos + 3] & 0xff))) << 32) + | ((long) (((data[pos + 4] & 0xff) << 24) + | ((data[pos + 5] & 0xff) << 16) | ((data[pos + 6] & 0xff) << 8) | ((data[pos + 7] & 0xff))) & 0xffffffff); + /* + * Original version by Alex Boisvert. Might be faster on 64-bit JVMs. return + * (((long)(data[pos+0] & 0xff) << 56) | ((long)(data[pos+1] & 0xff) << 48) + * | ((long)(data[pos+2] & 0xff) << 40) | ((long)(data[pos+3] & 0xff) << 32) + * | ((long)(data[pos+4] & 0xff) << 24) | ((long)(data[pos+5] & 0xff) << 16) + * | ((long)(data[pos+6] & 0xff) << 8) | ((long)(data[pos+7] & 0xff) << 0)); */ - public void writeLong(int pos, long value) { - data[pos+0] = (byte)(0xff & (value >> 56)); - data[pos+1] = (byte)(0xff & (value >> 48)); - data[pos+2] = (byte)(0xff & (value >> 40)); - data[pos+3] = (byte)(0xff & (value >> 32)); - data[pos+4] = (byte)(0xff & (value >> 24)); - data[pos+5] = (byte)(0xff & (value >> 16)); - data[pos+6] = (byte)(0xff & (value >> 8)); - data[pos+7] = (byte)(0xff & (value >> 0)); - setDirty(); - } + } - // overrides java.lang.Object + /** + * Writes a long to the indicated position + */ + public void writeLong(int pos, long value) { + data[pos + 0] = (byte) (0xff & (value >> 56)); + data[pos + 1] = (byte) (0xff & (value >> 48)); + data[pos + 2] = (byte) (0xff & (value >> 40)); + data[pos + 3] = (byte) (0xff & (value >> 32)); + data[pos + 4] = (byte) (0xff & (value >> 24)); + data[pos + 5] = (byte) (0xff & (value >> 16)); + data[pos + 6] = (byte) (0xff & (value >> 8)); + data[pos + 7] = (byte) (0xff & (value >> 0)); + setDirty(); + } - public String toString() { - return "BlockIO(" - + blockId + "," - + dirty + "," - + view + ")"; - } + // overrides java.lang.Object - // implement externalizable interface - public void readExternal(ObjectInput in) - throws IOException, ClassNotFoundException { - blockId = in.readLong(); - int length = in.readInt(); - data = new byte[length]; - in.readFully(data); - } + @Override + public String toString() { + return "BlockIO(" + blockId + "," + dirty + "," + view + ")"; + } - // implement externalizable interface - public void writeExternal(ObjectOutput out) throws IOException { - out.writeLong(blockId); - out.writeInt(data.length); - out.write(data); - } + // implement externalizable interface + public void readExternal(ObjectInput in) throws IOException, + ClassNotFoundException { + blockId = in.readLong(); + int length = in.readInt(); + data = new byte[length]; + in.readFully(data); + } + // implement externalizable interface + public void writeExternal(ObjectOutput out) throws IOException { + out.writeLong(blockId); + out.writeInt(data.length); + out.write(data); + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FreePhysicalRowId.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FreePhysicalRowId.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FreePhysicalRowId.java (working copy) @@ -66,30 +66,30 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; /** - * This class extends the physical rowid with a size value to indicated - * the size of a free rowid on the free rowid list. + * This class extends the physical rowid with a size value to indicated the size + * of a free rowid on the free rowid list. */ final class FreePhysicalRowId extends PhysicalRowId { - // offsets - private static final short O_SIZE = PhysicalRowId.SIZE; // int size - static final short SIZE = O_SIZE + Magic.SZ_INT; + // offsets + private static final short O_SIZE = PhysicalRowId.SIZE; // int size + static final short SIZE = O_SIZE + Magic.SZ_INT; - /** - * Constructs a physical rowid from the indicated data starting at - * the indicated position. - */ - FreePhysicalRowId(BlockIo block, short pos) { - super(block, pos); - } + /** + * Constructs a physical rowid from the indicated data starting at the + * indicated position. + */ + FreePhysicalRowId(BlockIo block, short pos) { + super(block, pos); + } - /** Returns the size */ - int getSize() { - return block.readInt(pos + O_SIZE); - } + /** Returns the size */ + int getSize() { + return block.readInt(pos + O_SIZE); + } - /** Sets the size */ - void setSize(int value) { - block.writeInt(pos + O_SIZE, value); - } + /** Sets the size */ + void setSize(int value) { + block.writeInt(pos + O_SIZE, value); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/PageHeader.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/PageHeader.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/PageHeader.java (working copy) @@ -65,110 +65,113 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; +import java.io.IOException; + /** - * This class represents a page header. It is the common superclass for - * all different page views. + * This class represents a page header. It is the common superclass for all + * different page views. */ public class PageHeader implements BlockView { - // offsets - private static final short O_MAGIC = 0; // short magic - private static final short O_NEXT = Magic.SZ_SHORT; // long next - private static final short O_PREV = O_NEXT + Magic.SZ_LONG; // long prev - protected static final short SIZE = O_PREV + Magic.SZ_LONG; + // offsets + private static final short O_MAGIC = 0; // short magic + private static final short O_NEXT = Magic.SZ_SHORT; // long next + private static final short O_PREV = O_NEXT + Magic.SZ_LONG; // long prev + protected static final short SIZE = O_PREV + Magic.SZ_LONG; - // my block - protected BlockIo block; + // my block + protected BlockIo block; - /** - * Constructs a PageHeader object from a block - * - * @param block The block that contains the file header - * @throws IOException if the block is too short to keep the file - * header. - */ - protected PageHeader(BlockIo block) { - initialize(block); - if (!magicOk()) - throw new Error("CRITICAL: page header magic for block " - + block.getBlockId() + " not OK " - + getMagic()); + /** + * Constructs a PageHeader object from a block + * + * @param block + * The block that contains the file header + * @throws IOException + * if the block is too short to keep the file header. + */ + protected PageHeader(BlockIo block) { + initialize(block); + if (!magicOk()) { + throw new Error("CRITICAL: page header magic for block " + + block.getBlockId() + " not OK " + getMagic()); } - - /** - * Constructs a new PageHeader of the indicated type. Used for newly - * created pages. - */ - PageHeader(BlockIo block, short type) { - initialize(block); - setType(type); + } + + /** + * Constructs a new PageHeader of the indicated type. Used for newly created + * pages. + */ + PageHeader(BlockIo block, short type) { + initialize(block); + setType(type); + } + + /** + * Factory method to create or return a page header for the indicated block. + */ + static PageHeader getView(BlockIo block) { + BlockView view = block.getView(); + if (view != null && view instanceof PageHeader) { + return (PageHeader) view; + } else { + return new PageHeader(block); } - - /** - * Factory method to create or return a page header for the - * indicated block. - */ - static PageHeader getView(BlockIo block) { - BlockView view = block.getView(); - if (view != null && view instanceof PageHeader) - return (PageHeader) view; - else - return new PageHeader(block); + } + + private void initialize(BlockIo block) { + this.block = block; + block.setView(this); + } + + /** + * Returns true if the magic corresponds with the fileHeader magic. + */ + private boolean magicOk() { + int magic = getMagic(); + return magic >= Magic.BLOCK + && magic <= (Magic.BLOCK + Magic.FREEPHYSIDS_PAGE); + } + + /** + * For paranoia mode + */ + protected void paranoiaMagicOk() { + if (!magicOk()) { + throw new Error("CRITICAL: page header magic not OK " + getMagic()); } - - private void initialize(BlockIo block) { - this.block = block; - block.setView(this); - } - - /** - * Returns true if the magic corresponds with the fileHeader magic. - */ - private boolean magicOk() { - int magic = getMagic(); - return magic >= Magic.BLOCK - && magic <= (Magic.BLOCK + Magic.FREEPHYSIDS_PAGE); - } - - /** - * For paranoia mode - */ - protected void paranoiaMagicOk() { - if (!magicOk()) - throw new Error("CRITICAL: page header magic not OK " - + getMagic()); - } - - /** Returns the magic code */ - short getMagic() { - return block.readShort(O_MAGIC); - } + } - /** Returns the next block. */ - long getNext() { - paranoiaMagicOk(); - return block.readLong(O_NEXT); - } - - /** Sets the next block. */ - void setNext(long next) { - paranoiaMagicOk(); - block.writeLong(O_NEXT, next); - } - - /** Returns the previous block. */ - long getPrev() { - paranoiaMagicOk(); - return block.readLong(O_PREV); - } - - /** Sets the previous block. */ - void setPrev(long prev) { - paranoiaMagicOk(); - block.writeLong(O_PREV, prev); - } - - /** Sets the type of the page header */ - void setType(short type) { - block.writeShort(O_MAGIC, (short) (Magic.BLOCK + type)); - } + /** Returns the magic code */ + short getMagic() { + return block.readShort(O_MAGIC); + } + + /** Returns the next block. */ + long getNext() { + paranoiaMagicOk(); + return block.readLong(O_NEXT); + } + + /** Sets the next block. */ + void setNext(long next) { + paranoiaMagicOk(); + block.writeLong(O_NEXT, next); + } + + /** Returns the previous block. */ + long getPrev() { + paranoiaMagicOk(); + return block.readLong(O_PREV); + } + + /** Sets the previous block. */ + void setPrev(long prev) { + paranoiaMagicOk(); + block.writeLong(O_PREV, prev); + } + + /** Sets the type of the page header */ + void setType(short type) { + block.writeShort(O_MAGIC, (short) (Magic.BLOCK + type)); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BlockView.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BlockView.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BlockView.java (working copy) @@ -66,10 +66,10 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; /** - * This is a marker interface that is implemented by classes that - * interpret blocks of data by pretending to be an overlay. - * - * @see BlockIo#setView + * This is a marker interface that is implemented by classes that interpret + * blocks of data by pretending to be an overlay. + * + * @see BlockIo#setView */ public interface BlockView { } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/RecordHeader.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/RecordHeader.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/RecordHeader.java (working copy) @@ -66,60 +66,59 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; /** - * The data that comes at the start of a record of data. It stores - * both the current size and the avaliable size for the record - the latter - * can be bigger than the former, which allows the record to grow without - * needing to be moved and which allows the system to put small records - * in larger free spots. + * The data that comes at the start of a record of data. It stores both the + * current size and the avaliable size for the record - the latter can be bigger + * than the former, which allows the record to grow without needing to be moved + * and which allows the system to put small records in larger free spots. */ class RecordHeader { - // offsets - private static final short O_CURRENTSIZE = 0; // int currentSize - private static final short O_AVAILABLESIZE = Magic.SZ_INT; // int availableSize - static final int SIZE = O_AVAILABLESIZE + Magic.SZ_INT; - - // my block and the position within the block - private BlockIo block; - private short pos; + // offsets + private static final short O_CURRENTSIZE = 0; // int currentSize + private static final short O_AVAILABLESIZE = Magic.SZ_INT; // int + // availableSize + static final int SIZE = O_AVAILABLESIZE + Magic.SZ_INT; - /** - * Constructs a record header from the indicated data starting at - * the indicated position. - */ - RecordHeader(BlockIo block, short pos) { - this.block = block; - this.pos = pos; - if (pos > (RecordFile.BLOCK_SIZE - SIZE)) - throw new Error("Offset too large for record header (" - + block.getBlockId() + ":" - + pos + ")"); - } + // my block and the position within the block + private final BlockIo block; + private final short pos; - /** Returns the current size */ - int getCurrentSize() { - return block.readInt(pos + O_CURRENTSIZE); + /** + * Constructs a record header from the indicated data starting at the + * indicated position. + */ + RecordHeader(BlockIo block, short pos) { + this.block = block; + this.pos = pos; + if (pos > (RecordFile.BLOCK_SIZE - SIZE)) { + throw new Error("Offset too large for record header (" + + block.getBlockId() + ":" + pos + ")"); } - - /** Sets the current size */ - void setCurrentSize(int value) { - block.writeInt(pos + O_CURRENTSIZE, value); - } - - /** Returns the available size */ - int getAvailableSize() { - return block.readInt(pos + O_AVAILABLESIZE); - } - - /** Sets the available size */ - void setAvailableSize(int value) { - block.writeInt(pos + O_AVAILABLESIZE, value); - } + } - // overrides java.lang.Object - public String toString() { - return "RH(" + block.getBlockId() + ":" + pos - + ", avl=" + getAvailableSize() - + ", cur=" + getCurrentSize() - + ")"; - } + /** Returns the current size */ + int getCurrentSize() { + return block.readInt(pos + O_CURRENTSIZE); + } + + /** Sets the current size */ + void setCurrentSize(int value) { + block.writeInt(pos + O_CURRENTSIZE, value); + } + + /** Returns the available size */ + int getAvailableSize() { + return block.readInt(pos + O_AVAILABLESIZE); + } + + /** Sets the available size */ + void setAvailableSize(int value) { + block.writeInt(pos + O_AVAILABLESIZE, value); + } + + // overrides java.lang.Object + @Override + public String toString() { + return "RH(" + block.getBlockId() + ":" + pos + ", avl=" + + getAvailableSize() + ", cur=" + getCurrentSize() + ")"; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FileHeader.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FileHeader.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/FileHeader.java (working copy) @@ -65,110 +65,112 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; +import java.io.IOException; + /** - * This class represents a file header. It is a 1:1 representation of - * the data that appears in block 0 of a file. + * This class represents a file header. It is a 1:1 representation of the data + * that appears in block 0 of a file. */ class FileHeader implements BlockView { - // offsets - private static final short O_MAGIC = 0; // short magic - private static final short O_LISTS = Magic.SZ_SHORT; // long[2*NLISTS] - private static final int O_ROOTS = - O_LISTS + (Magic.NLISTS * 2 * Magic.SZ_LONG); + // offsets + private static final short O_MAGIC = 0; // short magic + private static final short O_LISTS = Magic.SZ_SHORT; // long[2*NLISTS] + private static final int O_ROOTS = O_LISTS + + (Magic.NLISTS * 2 * Magic.SZ_LONG); - // my block - private BlockIo block; + // my block + private final BlockIo block; - /** The number of "root" rowids available in the file. */ - static final int NROOTS = - (RecordFile.BLOCK_SIZE - O_ROOTS) / Magic.SZ_LONG; + /** The number of "root" rowids available in the file. */ + static final int NROOTS = (RecordFile.BLOCK_SIZE - O_ROOTS) / Magic.SZ_LONG; - /** - * Constructs a FileHeader object from a block. - * - * @param block The block that contains the file header - * @param isNew If true, the file header is for a new file. - * @throws IOException if the block is too short to keep the file - * header. - */ - FileHeader(BlockIo block, boolean isNew) { - this.block = block; - if (isNew) - block.writeShort(O_MAGIC, Magic.FILE_HEADER); - else if (!magicOk()) - throw new Error("CRITICAL: file header magic not OK " - + block.readShort(O_MAGIC)); + /** + * Constructs a FileHeader object from a block. + * + * @param block + * The block that contains the file header + * @param isNew + * If true, the file header is for a new file. + * @throws IOException + * if the block is too short to keep the file header. + */ + FileHeader(BlockIo block, boolean isNew) { + this.block = block; + if (isNew) { + block.writeShort(O_MAGIC, Magic.FILE_HEADER); + } else if (!magicOk()) { + throw new Error("CRITICAL: file header magic not OK " + + block.readShort(O_MAGIC)); } + } - /** Returns true if the magic corresponds with the fileHeader magic. */ - private boolean magicOk() { - return block.readShort(O_MAGIC) == Magic.FILE_HEADER; - } + /** Returns true if the magic corresponds with the fileHeader magic. */ + private boolean magicOk() { + return block.readShort(O_MAGIC) == Magic.FILE_HEADER; + } + /** Returns the offset of the "first" block of the indicated list */ + private short offsetOfFirst(int list) { + return (short) (O_LISTS + (2 * Magic.SZ_LONG * list)); + } - /** Returns the offset of the "first" block of the indicated list */ - private short offsetOfFirst(int list) { - return (short) (O_LISTS + (2 * Magic.SZ_LONG * list)); - } + /** Returns the offset of the "last" block of the indicated list */ + private short offsetOfLast(int list) { + return (short) (offsetOfFirst(list) + Magic.SZ_LONG); + } - /** Returns the offset of the "last" block of the indicated list */ - private short offsetOfLast(int list) { - return (short) (offsetOfFirst(list) + Magic.SZ_LONG); - } + /** Returns the offset of the indicated root */ + private short offsetOfRoot(int root) { + return (short) (O_ROOTS + (root * Magic.SZ_LONG)); + } - /** Returns the offset of the indicated root */ - private short offsetOfRoot(int root) { - return (short) (O_ROOTS + (root * Magic.SZ_LONG)); - } + /** + * Returns the first block of the indicated list + */ + long getFirstOf(int list) { + return block.readLong(offsetOfFirst(list)); + } - /** - * Returns the first block of the indicated list - */ - long getFirstOf(int list) { - return block.readLong(offsetOfFirst(list)); - } - - /** - * Sets the first block of the indicated list - */ - void setFirstOf(int list, long value) { - block.writeLong(offsetOfFirst(list), value); - } - - /** - * Returns the last block of the indicated list - */ - long getLastOf(int list) { - return block.readLong(offsetOfLast(list)); - } - - /** - * Sets the last block of the indicated list - */ - void setLastOf(int list, long value) { - block.writeLong(offsetOfLast(list), value); - } + /** + * Sets the first block of the indicated list + */ + void setFirstOf(int list, long value) { + block.writeLong(offsetOfFirst(list), value); + } - /** - * Returns the indicated root rowid. A root rowid is a special rowid - * that needs to be kept between sessions. It could conceivably be - * stored in a special file, but as a large amount of space in the - * block header is wasted anyway, it's more useful to store it where - * it belongs. - * - * @see #NROOTS - */ - long getRoot(int root) { - return block.readLong(offsetOfRoot(root)); - } + /** + * Returns the last block of the indicated list + */ + long getLastOf(int list) { + return block.readLong(offsetOfLast(list)); + } - /** - * Sets the indicated root rowid. - * - * @see #getRoot - * @see #NROOTS - */ - void setRoot(int root, long rowid) { - block.writeLong(offsetOfRoot(root), rowid); - } + /** + * Sets the last block of the indicated list + */ + void setLastOf(int list, long value) { + block.writeLong(offsetOfLast(list), value); + } + + /** + * Returns the indicated root rowid. A root rowid is a special rowid that + * needs to be kept between sessions. It could conceivably be stored in a + * special file, but as a large amount of space in the block header is wasted + * anyway, it's more useful to store it where it belongs. + * + * @see #NROOTS + */ + long getRoot(int root) { + return block.readLong(offsetOfRoot(root)); + } + + /** + * Sets the indicated root rowid. + * + * @see #getRoot + * @see #NROOTS + */ + void setRoot(int root, long rowid) { + block.writeLong(offsetOfRoot(root), rowid); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/Provider.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/Provider.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/Provider.java (working copy) @@ -66,86 +66,84 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; -import java.io.IOException; import java.io.File; +import java.io.IOException; import java.util.Properties; import org.apache.hadoop.hive.ql.util.jdbm.RecordManager; import org.apache.hadoop.hive.ql.util.jdbm.RecordManagerOptions; import org.apache.hadoop.hive.ql.util.jdbm.RecordManagerProvider; - import org.apache.hadoop.hive.ql.util.jdbm.helper.MRU; /** * Provider of the default RecordManager implementation. - * + * * @author Alex Boisvert * @version $Id: Provider.java,v 1.3 2005/06/25 23:12:32 doomdark Exp $ */ -public final class Provider - implements RecordManagerProvider -{ +public final class Provider implements RecordManagerProvider { - /** - * Create a default implementation record manager. - * - * @param name Name of the record file. - * @param options Record manager options. - * @throws IOException if an I/O related exception occurs while creating - * or opening the record manager. - * @throws UnsupportedOperationException if some options are not supported by the - * implementation. - * @throws IllegalArgumentException if some options are invalid. - */ - public RecordManager createRecordManager( String name, - Properties options ) - throws IOException - { - RecordManager recman; + /** + * Create a default implementation record manager. + * + * @param name + * Name of the record file. + * @param options + * Record manager options. + * @throws IOException + * if an I/O related exception occurs while creating or opening the + * record manager. + * @throws UnsupportedOperationException + * if some options are not supported by the implementation. + * @throws IllegalArgumentException + * if some options are invalid. + */ + public RecordManager createRecordManager(String name, Properties options) + throws IOException { + RecordManager recman; - recman = new BaseRecordManager( name ); - recman = getCachedRecordManager(recman, options); - return recman; - } - - private RecordManager getCachedRecordManager(RecordManager recman, Properties options) - { - String value; - int cacheSize; - - value = options.getProperty( RecordManagerOptions.DISABLE_TRANSACTIONS, "false" ); - if ( value.equalsIgnoreCase( "TRUE" ) ) { - ( (BaseRecordManager) recman ).disableTransactions(); - } + recman = new BaseRecordManager(name); + recman = getCachedRecordManager(recman, options); + return recman; + } - value = options.getProperty( RecordManagerOptions.CACHE_SIZE, "1000" ); - cacheSize = Integer.parseInt( value ); + private RecordManager getCachedRecordManager(RecordManager recman, + Properties options) { + String value; + int cacheSize; - value = options.getProperty( RecordManagerOptions.CACHE_TYPE, - RecordManagerOptions.NORMAL_CACHE ); - if ( value.equalsIgnoreCase( RecordManagerOptions.NORMAL_CACHE ) ) { - MRU cache = new MRU( cacheSize ); - recman = new CacheRecordManager( recman, cache ); - } else if ( value.equalsIgnoreCase( RecordManagerOptions.SOFT_REF_CACHE ) ) { - throw new IllegalArgumentException( "Soft reference cache not implemented" ); - } else if ( value.equalsIgnoreCase( RecordManagerOptions.WEAK_REF_CACHE ) ) { - throw new IllegalArgumentException( "Weak reference cache not implemented" ); - } else if ( value.equalsIgnoreCase(RecordManagerOptions.NO_CACHE) ){ - // do nothing - } else { - throw new IllegalArgumentException( "Invalid cache type: " + value ); - } - - return recman; + value = options.getProperty(RecordManagerOptions.DISABLE_TRANSACTIONS, + "false"); + if (value.equalsIgnoreCase("TRUE")) { + ((BaseRecordManager) recman).disableTransactions(); } - public RecordManager createRecordManager ( File file, - Properties options ) - throws IOException - { - RecordManager recman = new BaseRecordManager(file); - recman = getCachedRecordManager(recman, options); - return recman; + value = options.getProperty(RecordManagerOptions.CACHE_SIZE, "1000"); + cacheSize = Integer.parseInt(value); + + value = options.getProperty(RecordManagerOptions.CACHE_TYPE, + RecordManagerOptions.NORMAL_CACHE); + if (value.equalsIgnoreCase(RecordManagerOptions.NORMAL_CACHE)) { + MRU cache = new MRU(cacheSize); + recman = new CacheRecordManager(recman, cache); + } else if (value.equalsIgnoreCase(RecordManagerOptions.SOFT_REF_CACHE)) { + throw new IllegalArgumentException("Soft reference cache not implemented"); + } else if (value.equalsIgnoreCase(RecordManagerOptions.WEAK_REF_CACHE)) { + throw new IllegalArgumentException("Weak reference cache not implemented"); + } else if (value.equalsIgnoreCase(RecordManagerOptions.NO_CACHE)) { + // do nothing + } else { + throw new IllegalArgumentException("Invalid cache type: " + value); } + return recman; + } + + public RecordManager createRecordManager(File file, Properties options) + throws IOException { + RecordManager recman = new BaseRecordManager(file); + recman = getCachedRecordManager(recman, options); + return recman; + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/TranslationPage.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/TranslationPage.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/TranslationPage.java (working copy) @@ -66,44 +66,45 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; /** - * Class describing a page that holds translations from physical rowids - * to logical rowids. In fact, the page just holds physical rowids - the - * page's block is the block for the logical rowid, the offset serve - * as offset for the rowids. + * Class describing a page that holds translations from physical rowids to + * logical rowids. In fact, the page just holds physical rowids - the page's + * block is the block for the logical rowid, the offset serve as offset for the + * rowids. */ final class TranslationPage extends PageHeader { - // offsets - static final short O_TRANS = PageHeader.SIZE; // short count - static final short ELEMS_PER_PAGE = - (RecordFile.BLOCK_SIZE - O_TRANS) / PhysicalRowId.SIZE; - - // slots we returned. - final PhysicalRowId[] slots = new PhysicalRowId[ELEMS_PER_PAGE]; + // offsets + static final short O_TRANS = PageHeader.SIZE; // short count + static final short ELEMS_PER_PAGE = (RecordFile.BLOCK_SIZE - O_TRANS) + / PhysicalRowId.SIZE; - /** - * Constructs a data page view from the indicated block. - */ - TranslationPage(BlockIo block) { - super(block); - } + // slots we returned. + final PhysicalRowId[] slots = new PhysicalRowId[ELEMS_PER_PAGE]; - /** - * Factory method to create or return a data page for the - * indicated block. - */ - static TranslationPage getTranslationPageView(BlockIo block) { - BlockView view = block.getView(); - if (view != null && view instanceof TranslationPage) - return (TranslationPage) view; - else - return new TranslationPage(block); + /** + * Constructs a data page view from the indicated block. + */ + TranslationPage(BlockIo block) { + super(block); + } + + /** + * Factory method to create or return a data page for the indicated block. + */ + static TranslationPage getTranslationPageView(BlockIo block) { + BlockView view = block.getView(); + if (view != null && view instanceof TranslationPage) { + return (TranslationPage) view; + } else { + return new TranslationPage(block); } + } - /** Returns the value of the indicated rowid on the page */ - PhysicalRowId get(short offset) { - int slot = (offset - O_TRANS) / PhysicalRowId.SIZE; - if (slots[slot] == null) - slots[slot] = new PhysicalRowId(block, offset); - return slots[slot]; + /** Returns the value of the indicated rowid on the page */ + PhysicalRowId get(short offset) { + int slot = (offset - O_TRANS) / PhysicalRowId.SIZE; + if (slots[slot] == null) { + slots[slot] = new PhysicalRowId(block, offset); } + return slots[slot]; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/CacheRecordManager.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/CacheRecordManager.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/CacheRecordManager.java (working copy) @@ -66,6 +66,9 @@ package org.apache.hadoop.hive.ql.util.jdbm.recman; +import java.io.IOException; +import java.util.Enumeration; + import org.apache.hadoop.hive.ql.util.jdbm.RecordManager; import org.apache.hadoop.hive.ql.util.jdbm.helper.CacheEvictionException; import org.apache.hadoop.hive.ql.util.jdbm.helper.CachePolicy; @@ -74,402 +77,360 @@ import org.apache.hadoop.hive.ql.util.jdbm.helper.Serializer; import org.apache.hadoop.hive.ql.util.jdbm.helper.WrappedRuntimeException; -import java.io.IOException; -import java.util.Enumeration; - /** - * A RecordManager wrapping and caching another RecordManager. - * + * A RecordManager wrapping and caching another RecordManager. + * * @author Alex Boisvert * @author Cees de Groot - * @version $Id: CacheRecordManager.java,v 1.9 2005/06/25 23:12:32 doomdark Exp $ + * @version $Id: CacheRecordManager.java,v 1.9 2005/06/25 23:12:32 doomdark Exp + * $ */ -public class CacheRecordManager - implements RecordManager -{ +public class CacheRecordManager implements RecordManager { - /** - * Wrapped RecordManager - */ - protected RecordManager _recman; + /** + * Wrapped RecordManager + */ + protected RecordManager _recman; + /** + * Cache for underlying RecordManager + */ + protected CachePolicy _cache; - /** - * Cache for underlying RecordManager - */ - protected CachePolicy _cache; - - - /** - * Construct a CacheRecordManager wrapping another RecordManager and - * using a given cache policy. - * - * @param recman Wrapped RecordManager - * @param cache Cache policy - */ - public CacheRecordManager( RecordManager recman, CachePolicy cache ) - { - if ( recman == null ) { - throw new IllegalArgumentException( "Argument 'recman' is null" ); - } - if ( cache == null ) { - throw new IllegalArgumentException( "Argument 'cache' is null" ); - } - _recman = recman; - _cache = cache; - - _cache.addListener( new CacheListener() ); + /** + * Construct a CacheRecordManager wrapping another RecordManager and using a + * given cache policy. + * + * @param recman + * Wrapped RecordManager + * @param cache + * Cache policy + */ + public CacheRecordManager(RecordManager recman, CachePolicy cache) { + if (recman == null) { + throw new IllegalArgumentException("Argument 'recman' is null"); } - - - /** - * Get the underlying Record Manager. - * - * @return underlying RecordManager or null if CacheRecordManager has - * been closed. - */ - public RecordManager getRecordManager() - { - return _recman; + if (cache == null) { + throw new IllegalArgumentException("Argument 'cache' is null"); } + _recman = recman; + _cache = cache; - - /** - * Get the underlying cache policy - * - * @return underlying CachePolicy or null if CacheRecordManager has - * been closed. - */ - public CachePolicy getCachePolicy() - { - return _cache; - } + _cache.addListener(new CacheListener()); + } - - /** - * Inserts a new record using a custom serializer. - * - * @param obj the object for the new record. - * @return the rowid for the new record. - * @throws IOException when one of the underlying I/O operations fails. - */ - public long insert( Object obj ) - throws IOException - { - return insert( obj, DefaultSerializer.INSTANCE ); - } - - - /** - * Inserts a new record using a custom serializer. - * - * @param obj the object for the new record. - * @param serializer a custom serializer - * @return the rowid for the new record. - * @throws IOException when one of the underlying I/O operations fails. - */ - public synchronized long insert( Object obj, Serializer serializer ) - throws IOException - { - checkIfClosed(); + /** + * Get the underlying Record Manager. + * + * @return underlying RecordManager or null if CacheRecordManager has been + * closed. + */ + public RecordManager getRecordManager() { + return _recman; + } - long recid = _recman.insert( obj, serializer ); - try { - _cache.put( new Long( recid ), new CacheEntry( recid, obj, serializer, false ) ); - } catch ( CacheEvictionException except ) { - throw new WrappedRuntimeException( except ); - } - return recid; - } + /** + * Get the underlying cache policy + * + * @return underlying CachePolicy or null if CacheRecordManager has been + * closed. + */ + public CachePolicy getCachePolicy() { + return _cache; + } + /** + * Inserts a new record using a custom serializer. + * + * @param obj + * the object for the new record. + * @return the rowid for the new record. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public long insert(Object obj) throws IOException { + return insert(obj, DefaultSerializer.INSTANCE); + } - /** - * Deletes a record. - * - * @param recid the rowid for the record that should be deleted. - * @throws IOException when one of the underlying I/O operations fails. - */ - public synchronized void delete( long recid ) - throws IOException - { - checkIfClosed(); + /** + * Inserts a new record using a custom serializer. + * + * @param obj + * the object for the new record. + * @param serializer + * a custom serializer + * @return the rowid for the new record. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public synchronized long insert(Object obj, Serializer serializer) + throws IOException { + checkIfClosed(); - _recman.delete( recid ); - _cache.remove( new Long( recid ) ); + long recid = _recman.insert(obj, serializer); + try { + _cache + .put(new Long(recid), new CacheEntry(recid, obj, serializer, false)); + } catch (CacheEvictionException except) { + throw new WrappedRuntimeException(except); } + return recid; + } + /** + * Deletes a record. + * + * @param recid + * the rowid for the record that should be deleted. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public synchronized void delete(long recid) throws IOException { + checkIfClosed(); - /** - * Updates a record using standard Java serialization. - * - * @param recid the recid for the record that is to be updated. - * @param obj the new object for the record. - * @throws IOException when one of the underlying I/O operations fails. - */ - public void update( long recid, Object obj ) - throws IOException - { - update( recid, obj, DefaultSerializer.INSTANCE ); - } - + _recman.delete(recid); + _cache.remove(new Long(recid)); + } - /** - * Updates a record using a custom serializer. - * - * @param recid the recid for the record that is to be updated. - * @param obj the new object for the record. - * @param serializer a custom serializer - * @throws IOException when one of the underlying I/O operations fails. - */ - public synchronized void update( long recid, Object obj, - Serializer serializer ) - throws IOException - { - CacheEntry entry; - Long id; - - checkIfClosed(); + /** + * Updates a record using standard Java serialization. + * + * @param recid + * the recid for the record that is to be updated. + * @param obj + * the new object for the record. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public void update(long recid, Object obj) throws IOException { + update(recid, obj, DefaultSerializer.INSTANCE); + } - id = new Long( recid ); - try { - entry = (CacheEntry) _cache.get( id ); - if ( entry != null ) { - // reuse existing cache entry - entry._obj = obj; - entry._serializer = serializer; - entry._isDirty = true; - } else { - _cache.put( id, new CacheEntry( recid, obj, serializer, true ) ); - } - } catch ( CacheEvictionException except ) { - throw new IOException( except.getMessage() ); - } - } + /** + * Updates a record using a custom serializer. + * + * @param recid + * the recid for the record that is to be updated. + * @param obj + * the new object for the record. + * @param serializer + * a custom serializer + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public synchronized void update(long recid, Object obj, Serializer serializer) + throws IOException { + CacheEntry entry; + Long id; + checkIfClosed(); - /** - * Fetches a record using standard Java serialization. - * - * @param recid the recid for the record that must be fetched. - * @return the object contained in the record. - * @throws IOException when one of the underlying I/O operations fails. - */ - public Object fetch( long recid ) - throws IOException - { - return fetch( recid, DefaultSerializer.INSTANCE ); + id = new Long(recid); + try { + entry = (CacheEntry) _cache.get(id); + if (entry != null) { + // reuse existing cache entry + entry._obj = obj; + entry._serializer = serializer; + entry._isDirty = true; + } else { + _cache.put(id, new CacheEntry(recid, obj, serializer, true)); + } + } catch (CacheEvictionException except) { + throw new IOException(except.getMessage()); } + } - - /** - * Fetches a record using a custom serializer. - * - * @param recid the recid for the record that must be fetched. - * @param serializer a custom serializer - * @return the object contained in the record. - * @throws IOException when one of the underlying I/O operations fails. - */ - public synchronized Object fetch( long recid, Serializer serializer ) - throws IOException - { - checkIfClosed(); + /** + * Fetches a record using standard Java serialization. + * + * @param recid + * the recid for the record that must be fetched. + * @return the object contained in the record. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public Object fetch(long recid) throws IOException { + return fetch(recid, DefaultSerializer.INSTANCE); + } - Long id = new Long( recid ); - CacheEntry entry = (CacheEntry) _cache.get( id ); - if ( entry == null ) { - entry = new CacheEntry( recid, null, serializer, false ); - entry._obj = _recman.fetch( recid, serializer ); - try { - _cache.put( id, entry ); - } catch ( CacheEvictionException except ) { - throw new WrappedRuntimeException( except ); - } - } - return entry._obj; + /** + * Fetches a record using a custom serializer. + * + * @param recid + * the recid for the record that must be fetched. + * @param serializer + * a custom serializer + * @return the object contained in the record. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public synchronized Object fetch(long recid, Serializer serializer) + throws IOException { + checkIfClosed(); + + Long id = new Long(recid); + CacheEntry entry = (CacheEntry) _cache.get(id); + if (entry == null) { + entry = new CacheEntry(recid, null, serializer, false); + entry._obj = _recman.fetch(recid, serializer); + try { + _cache.put(id, entry); + } catch (CacheEvictionException except) { + throw new WrappedRuntimeException(except); + } } + return entry._obj; + } + /** + * Closes the record manager. + * + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public synchronized void close() throws IOException { + checkIfClosed(); - /** - * Closes the record manager. - * - * @throws IOException when one of the underlying I/O operations fails. - */ - public synchronized void close() - throws IOException - { - checkIfClosed(); + updateCacheEntries(); + _recman.close(); + _recman = null; + _cache = null; + } - updateCacheEntries(); - _recman.close(); - _recman = null; - _cache = null; - } + /** + * Returns the number of slots available for "root" rowids. These slots can be + * used to store special rowids, like rowids that point to other rowids. Root + * rowids are useful for bootstrapping access to a set of data. + */ + public synchronized int getRootCount() { + checkIfClosed(); + return _recman.getRootCount(); + } - /** - * Returns the number of slots available for "root" rowids. These slots - * can be used to store special rowids, like rowids that point to - * other rowids. Root rowids are useful for bootstrapping access to - * a set of data. - */ - public synchronized int getRootCount() - { - checkIfClosed(); + /** + * Returns the indicated root rowid. + * + * @see #getRootCount + */ + public synchronized long getRoot(int id) throws IOException { + checkIfClosed(); - return _recman.getRootCount(); - } + return _recman.getRoot(id); + } + /** + * Sets the indicated root rowid. + * + * @see #getRootCount + */ + public synchronized void setRoot(int id, long rowid) throws IOException { + checkIfClosed(); - /** - * Returns the indicated root rowid. - * - * @see #getRootCount - */ - public synchronized long getRoot( int id ) - throws IOException - { - checkIfClosed(); + _recman.setRoot(id, rowid); + } - return _recman.getRoot( id ); - } + /** + * Commit (make persistent) all changes since beginning of transaction. + */ + public synchronized void commit() throws IOException { + checkIfClosed(); + updateCacheEntries(); + _recman.commit(); + } + /** + * Rollback (cancel) all changes since beginning of transaction. + */ + public synchronized void rollback() throws IOException { + checkIfClosed(); - /** - * Sets the indicated root rowid. - * - * @see #getRootCount - */ - public synchronized void setRoot( int id, long rowid ) - throws IOException - { - checkIfClosed(); + _recman.rollback(); - _recman.setRoot( id, rowid ); - } + // discard all cache entries since we don't know which entries + // where part of the transaction + _cache.removeAll(); + } + /** + * Obtain the record id of a named object. Returns 0 if named object doesn't + * exist. + */ + public synchronized long getNamedObject(String name) throws IOException { + checkIfClosed(); - /** - * Commit (make persistent) all changes since beginning of transaction. - */ - public synchronized void commit() - throws IOException - { - checkIfClosed(); - updateCacheEntries(); - _recman.commit(); - } + return _recman.getNamedObject(name); + } + /** + * Set the record id of a named object. + */ + public synchronized void setNamedObject(String name, long recid) + throws IOException { + checkIfClosed(); - /** - * Rollback (cancel) all changes since beginning of transaction. - */ - public synchronized void rollback() - throws IOException - { - checkIfClosed(); + _recman.setNamedObject(name, recid); + } - _recman.rollback(); - - // discard all cache entries since we don't know which entries - // where part of the transaction - _cache.removeAll(); + /** + * Check if RecordManager has been closed. If so, throw an + * IllegalStateException + */ + private void checkIfClosed() throws IllegalStateException { + if (_recman == null) { + throw new IllegalStateException("RecordManager has been closed"); } + } - - /** - * Obtain the record id of a named object. Returns 0 if named object - * doesn't exist. - */ - public synchronized long getNamedObject( String name ) - throws IOException - { - checkIfClosed(); - - return _recman.getNamedObject( name ); + /** + * Update all dirty cache objects to the underlying RecordManager. + */ + protected void updateCacheEntries() throws IOException { + Enumeration enume = _cache.elements(); + while (enume.hasMoreElements()) { + CacheEntry entry = (CacheEntry) enume.nextElement(); + if (entry._isDirty) { + _recman.update(entry._recid, entry._obj, entry._serializer); + entry._isDirty = false; + } } + } + private class CacheEntry { - /** - * Set the record id of a named object. - */ - public synchronized void setNamedObject( String name, long recid ) - throws IOException - { - checkIfClosed(); + long _recid; + Object _obj; + Serializer _serializer; + boolean _isDirty; - _recman.setNamedObject( name, recid ); + CacheEntry(long recid, Object obj, Serializer serializer, boolean isDirty) { + _recid = recid; + _obj = obj; + _serializer = serializer; + _isDirty = isDirty; } + } // class CacheEntry - /** - * Check if RecordManager has been closed. If so, throw an - * IllegalStateException - */ - private void checkIfClosed() - throws IllegalStateException - { - if ( _recman == null ) { - throw new IllegalStateException( "RecordManager has been closed" ); - } - } + private class CacheListener implements CachePolicyListener { - /** - * Update all dirty cache objects to the underlying RecordManager. + * Notification that cache is evicting an object + * + * @arg obj object evited from cache + * */ - protected void updateCacheEntries() - throws IOException - { - Enumeration enume = _cache.elements(); - while ( enume.hasMoreElements() ) { - CacheEntry entry = (CacheEntry) enume.nextElement(); - if ( entry._isDirty ) { - _recman.update( entry._recid, entry._obj, entry._serializer ); - entry._isDirty = false; - } + public void cacheObjectEvicted(Object obj) throws CacheEvictionException { + CacheEntry entry = (CacheEntry) obj; + if (entry._isDirty) { + try { + _recman.update(entry._recid, entry._obj, entry._serializer); + } catch (IOException except) { + throw new CacheEvictionException(except); } + } } - - private class CacheEntry - { - - long _recid; - Object _obj; - Serializer _serializer; - boolean _isDirty; - - CacheEntry( long recid, Object obj, Serializer serializer, boolean isDirty ) - { - _recid = recid; - _obj = obj; - _serializer = serializer; - _isDirty = isDirty; - } - - } // class CacheEntry - - private class CacheListener - implements CachePolicyListener - { - - /** Notification that cache is evicting an object - * - * @arg obj object evited from cache - * - */ - public void cacheObjectEvicted( Object obj ) - throws CacheEvictionException - { - CacheEntry entry = (CacheEntry) obj; - if ( entry._isDirty ) { - try { - _recman.update( entry._recid, entry._obj, entry._serializer ); - } catch ( IOException except ) { - throw new CacheEvictionException( except ); - } - } - } - - } + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/RecordManagerOptions.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/RecordManagerOptions.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/RecordManagerOptions.java (working copy) @@ -68,79 +68,71 @@ /** * Standard options for RecordManager. - * + * * @author Alex Boisvert * @author Cees de Groot - * @version $Id: RecordManagerOptions.java,v 1.1 2002/05/31 06:33:20 boisvert Exp $ + * @version $Id: RecordManagerOptions.java,v 1.1 2002/05/31 06:33:20 boisvert + * Exp $ */ -public class RecordManagerOptions -{ +public class RecordManagerOptions { - /** - * Option to create a thread-safe record manager. - */ - public static final String PROVIDER_FACTORY = "jdbm.provider"; + /** + * Option to create a thread-safe record manager. + */ + public static final String PROVIDER_FACTORY = "jdbm.provider"; + /** + * Option to create a thread-safe record manager. + */ + public static final String THREAD_SAFE = "jdbm.threadSafe"; - /** - * Option to create a thread-safe record manager. - */ - public static final String THREAD_SAFE = "jdbm.threadSafe"; + /** + * Option to automatically commit data after each operation. + */ + public static final String AUTO_COMMIT = "jdbm.autoCommit"; + /** + * Option to disable transaction (to increase performance at the cost of + * potential data loss). + */ + public static final String DISABLE_TRANSACTIONS = "jdbm.disableTransactions"; - /** - * Option to automatically commit data after each operation. - */ - public static final String AUTO_COMMIT = "jdbm.autoCommit"; + /** + * Cache type. + */ + public static final String CACHE_TYPE = "jdbm.cache.type"; + /** + * Cache size (when applicable) + */ + public static final String CACHE_SIZE = "jdbm.cache.size"; - /** - * Option to disable transaction (to increase performance at the cost of - * potential data loss). - */ - public static final String DISABLE_TRANSACTIONS = "jdbm.disableTransactions"; + /** + * Use normal (strong) object references for the record cache. + */ + public static final String NORMAL_CACHE = "normal"; + /** + * Use soft references {$link java.lang.ref.SoftReference} for the record + * cache instead of the default normal object references. + *

+ * Soft references are cleared at the discretion of the garbage collector in + * response to memory demand. + */ + public static final String SOFT_REF_CACHE = "soft"; - /** - * Cache type. - */ - public static final String CACHE_TYPE = "jdbm.cache.type"; + /** + * Use weak references {$link java.lang.ref.WeakReference} for the record + * cache instead of the default normal object references. + *

+ * Weak references do not prevent their referents from being made finalizable, + * finalized, and then reclaimed. + */ + public static final String WEAK_REF_CACHE = "weak"; + /** + * Disable cache. + */ + public static final String NO_CACHE = "nocache"; - /** - * Cache size (when applicable) - */ - public static final String CACHE_SIZE = "jdbm.cache.size"; - - - /** - * Use normal (strong) object references for the record cache. - */ - public static final String NORMAL_CACHE = "normal"; - - - /** - * Use soft references {$link java.lang.ref.SoftReference} for the record - * cache instead of the default normal object references. - *

- * Soft references are cleared at the discretion of the garbage collector - * in response to memory demand. - */ - public static final String SOFT_REF_CACHE = "soft"; - - - /** - * Use weak references {$link java.lang.ref.WeakReference} for the record - * cache instead of the default normal object references. - *

- * Weak references do not prevent their referents from being made - * finalizable, finalized, and then reclaimed. - */ - public static final String WEAK_REF_CACHE = "weak"; - - /** - * Disable cache. - */ - public static final String NO_CACHE = "nocache"; - } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashBucket.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashBucket.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashBucket.java (working copy) @@ -68,265 +68,233 @@ import java.io.IOException; import java.io.ObjectInput; import java.io.ObjectOutput; - import java.util.ArrayList; /** - * A bucket is a placeholder for multiple (key, value) pairs. Buckets - * are used to store collisions (same hash value) at all levels of an - * H*tree. - * + * A bucket is a placeholder for multiple (key, value) pairs. Buckets are used + * to store collisions (same hash value) at all levels of an H*tree. + * * There are two types of buckets: leaf and non-leaf. - * - * Non-leaf buckets are buckets which hold collisions which happen - * when the H*tree is not fully expanded. Keys in a non-leaf buckets - * can have different hash codes. Non-leaf buckets are limited to an - * arbitrary size. When this limit is reached, the H*tree should create - * a new Directory page and distribute keys of the non-leaf buckets into - * the newly created Directory. - * - * A leaf bucket is a bucket which contains keys which all have - * the same hashCode(). Leaf buckets stand at the - * bottom of an H*tree because the hashing algorithm cannot further - * discriminate between different keys based on their hash code. - * - * @author Alex Boisvert - * @version $Id: HashBucket.java,v 1.2 2005/06/25 23:12:32 doomdark Exp $ + * + * Non-leaf buckets are buckets which hold collisions which happen when the + * H*tree is not fully expanded. Keys in a non-leaf buckets can have different + * hash codes. Non-leaf buckets are limited to an arbitrary size. When this + * limit is reached, the H*tree should create a new Directory page and + * distribute keys of the non-leaf buckets into the newly created Directory. + * + * A leaf bucket is a bucket which contains keys which all have the same + * hashCode(). Leaf buckets stand at the bottom of an H*tree + * because the hashing algorithm cannot further discriminate between different + * keys based on their hash code. + * + * @author Alex Boisvert + * @version $Id: HashBucket.java,v 1.2 2005/06/25 23:12:32 doomdark Exp $ */ -final class HashBucket - extends HashNode - implements Externalizable -{ +final class HashBucket extends HashNode implements Externalizable { - final static long serialVersionUID = 1L; + final static long serialVersionUID = 1L; - /** - * The maximum number of elements (key, value) a non-leaf bucket - * can contain. - */ - public static final int OVERFLOW_SIZE = 8; + /** + * The maximum number of elements (key, value) a non-leaf bucket can contain. + */ + public static final int OVERFLOW_SIZE = 8; + /** + * Depth of this bucket. + */ + private int _depth; - /** - * Depth of this bucket. - */ - private int _depth; + /** + * Keys in this bucket. Keys are ordered to match their respective value in + * _values. + */ + private ArrayList _keys; + /** + * Values in this bucket. Values are ordered to match their respective key in + * _keys. + */ + private ArrayList _values; - /** - * Keys in this bucket. Keys are ordered to match their respective - * value in _values. - */ - private ArrayList _keys; + /** + * Public constructor for serialization. + */ + public HashBucket() { + // empty + } - - /** - * Values in this bucket. Values are ordered to match their respective - * key in _keys. - */ - private ArrayList _values; - - - /** - * Public constructor for serialization. - */ - public HashBucket() { - // empty + /** + * Construct a bucket with a given depth level. Depth level is the number of + * HashDirectory above this bucket. + */ + public HashBucket(int level) { + if (level > HashDirectory.MAX_DEPTH + 1) { + throw new IllegalArgumentException( + "Cannot create bucket with depth > MAX_DEPTH+1. " + "Depth=" + level); } + _depth = level; + _keys = new ArrayList(OVERFLOW_SIZE); + _values = new ArrayList(OVERFLOW_SIZE); + } + /** + * Returns the number of elements contained in this bucket. + */ + public int getElementCount() { + return _keys.size(); + } - /** - * Construct a bucket with a given depth level. Depth level is the - * number of HashDirectory above this bucket. - */ - public HashBucket( int level ) - { - if ( level > HashDirectory.MAX_DEPTH+1 ) { - throw new IllegalArgumentException( - "Cannot create bucket with depth > MAX_DEPTH+1. " - + "Depth=" + level ); - } - _depth = level; - _keys = new ArrayList( OVERFLOW_SIZE ); - _values = new ArrayList( OVERFLOW_SIZE ); - } + /** + * Returns whether or not this bucket is a "leaf bucket". + */ + public boolean isLeaf() { + return (_depth > HashDirectory.MAX_DEPTH); + } - - /** - * Returns the number of elements contained in this bucket. - */ - public int getElementCount() - { - return _keys.size(); + /** + * Returns true if bucket can accept at least one more element. + */ + public boolean hasRoom() { + if (isLeaf()) { + return true; // leaf buckets are never full + } else { + // non-leaf bucket + return (_keys.size() < OVERFLOW_SIZE); } + } - - /** - * Returns whether or not this bucket is a "leaf bucket". - */ - public boolean isLeaf() - { - return ( _depth > HashDirectory.MAX_DEPTH ); + /** + * Add an element (key, value) to this bucket. If an existing element has the + * same key, it is replaced silently. + * + * @return Object which was previously associated with the given key or + * null if no association existed. + */ + public Object addElement(Object key, Object value) { + int existing = _keys.indexOf(key); + if (existing != -1) { + // replace existing element + Object before = _values.get(existing); + _values.set(existing, value); + return before; + } else { + // add new (key, value) pair + _keys.add(key); + _values.add(value); + return null; } + } - - /** - * Returns true if bucket can accept at least one more element. - */ - public boolean hasRoom() - { - if ( isLeaf() ) { - return true; // leaf buckets are never full - } else { - // non-leaf bucket - return ( _keys.size() < OVERFLOW_SIZE ); - } + /** + * Remove an element, given a specific key. + * + * @param key + * Key of the element to remove + * + * @return Removed element value, or null if not found + */ + public Object removeElement(Object key) { + int existing = _keys.indexOf(key); + if (existing != -1) { + Object obj = _values.get(existing); + _keys.remove(existing); + _values.remove(existing); + return obj; + } else { + // not found + return null; } + } - - /** - * Add an element (key, value) to this bucket. If an existing element - * has the same key, it is replaced silently. - * - * @return Object which was previously associated with the given key - * or null if no association existed. - */ - public Object addElement( Object key, Object value ) - { - int existing = _keys.indexOf(key); - if ( existing != -1 ) { - // replace existing element - Object before = _values.get( existing ); - _values.set( existing, value ); - return before; - } else { - // add new (key, value) pair - _keys.add( key ); - _values.add( value ); - return null; - } + /** + * Returns the value associated with a given key. If the given key is not + * found in this bucket, returns null. + */ + public Object getValue(Object key) { + int existing = _keys.indexOf(key); + if (existing != -1) { + return _values.get(existing); + } else { + // key not found + return null; } + } + /** + * Obtain keys contained in this buckets. Keys are ordered to match their + * values, which be be obtained by calling getValues(). + * + * As an optimization, the Vector returned is the instance member of this + * class. Please don't modify outside the scope of this class. + */ + ArrayList getKeys() { + return _keys; + } - /** - * Remove an element, given a specific key. - * - * @param key Key of the element to remove - * - * @return Removed element value, or null if not found - */ - public Object removeElement( Object key ) - { - int existing = _keys.indexOf(key); - if ( existing != -1 ) { - Object obj = _values.get( existing ); - _keys.remove( existing ); - _values.remove( existing ); - return obj; - } else { - // not found - return null; - } - } + /** + * Obtain values contained in this buckets. Values are ordered to match their + * keys, which be be obtained by calling getKeys(). + * + * As an optimization, the Vector returned is the instance member of this + * class. Please don't modify outside the scope of this class. + */ + ArrayList getValues() { + return _values; + } + /** + * Implement Externalizable interface. + */ + public void writeExternal(ObjectOutput out) throws IOException { + out.writeInt(_depth); - /** - * Returns the value associated with a given key. If the given key - * is not found in this bucket, returns null. - */ - public Object getValue( Object key ) - { - int existing = _keys.indexOf(key); - if ( existing != -1 ) { - return _values.get( existing ); - } else { - // key not found - return null; - } - } + int entries = _keys.size(); + out.writeInt(entries); - - /** - * Obtain keys contained in this buckets. Keys are ordered to match - * their values, which be be obtained by calling getValues(). - * - * As an optimization, the Vector returned is the instance member - * of this class. Please don't modify outside the scope of this class. - */ - ArrayList getKeys() - { - return this._keys; + // write keys + for (int i = 0; i < entries; i++) { + out.writeObject(_keys.get(i)); } - - - /** - * Obtain values contained in this buckets. Values are ordered to match - * their keys, which be be obtained by calling getKeys(). - * - * As an optimization, the Vector returned is the instance member - * of this class. Please don't modify outside the scope of this class. - */ - ArrayList getValues() - { - return this._values; + // write values + for (int i = 0; i < entries; i++) { + out.writeObject(_values.get(i)); } + } + /** + * Implement Externalizable interface. + */ + public void readExternal(ObjectInput in) throws IOException, + ClassNotFoundException { + _depth = in.readInt(); - /** - * Implement Externalizable interface. - */ - public void writeExternal( ObjectOutput out ) - throws IOException - { - out.writeInt( _depth ); + int entries = in.readInt(); - int entries = _keys.size(); - out.writeInt( entries ); + // prepare array lists + int size = Math.max(entries, OVERFLOW_SIZE); + _keys = new ArrayList(size); + _values = new ArrayList(size); - // write keys - for (int i=0; iAlex Boisvert - * @version $Id: HTree.java,v 1.3 2005/06/25 23:12:32 doomdark Exp $ + * Persistent hashtable implementation for PageManager. Implemented as an H*Tree + * structure. + * + * WARNING! If this instance is used in a transactional context, it *must* be + * discarded after a rollback. + * + * @author Alex Boisvert + * @version $Id: HTree.java,v 1.3 2005/06/25 23:12:32 doomdark Exp $ */ -public class HTree -{ +public class HTree { - /** - * Root hash directory. - */ - private HashDirectory _root; + /** + * Root hash directory. + */ + private final HashDirectory _root; + /** + * Private constructor + * + * @param root + * Root hash directory. + */ + private HTree(HashDirectory root) { + _root = root; + } - /** - * Private constructor - * - * @param root Root hash directory. - */ - private HTree( HashDirectory root ) { - _root = root; - } + /** + * Create a persistent hashtable. + * + * @param recman + * Record manager used for persistence. + */ + public static HTree createInstance(RecordManager recman) throws IOException { + HashDirectory root; + long recid; + root = new HashDirectory((byte) 0); + recid = recman.insert(root); + root.setPersistenceContext(recman, recid); - /** - * Create a persistent hashtable. - * - * @param recman Record manager used for persistence. - */ - public static HTree createInstance( RecordManager recman ) - throws IOException - { - HashDirectory root; - long recid; + return new HTree(root); + } - root = new HashDirectory( (byte) 0 ); - recid = recman.insert( root ); - root.setPersistenceContext( recman, recid ); + /** + * Load a persistent hashtable + * + * @param recman + * RecordManager used to store the persistent hashtable + * @param root_recid + * Record id of the root directory of the HTree + */ + public static HTree load(RecordManager recman, long root_recid) + throws IOException { + HTree tree; + HashDirectory root; - return new HTree( root ); - } + root = (HashDirectory) recman.fetch(root_recid); + root.setPersistenceContext(recman, root_recid); + tree = new HTree(root); + return tree; + } + /** + * Associates the specified value with the specified key. + * + * @param key + * key with which the specified value is to be assocated. + * @param value + * value to be associated with the specified key. + */ + public synchronized void put(Object key, Object value) throws IOException { + _root.put(key, value); + } - /** - * Load a persistent hashtable - * - * @param recman RecordManager used to store the persistent hashtable - * @param root_recid Record id of the root directory of the HTree - */ - public static HTree load( RecordManager recman, long root_recid ) - throws IOException - { - HTree tree; - HashDirectory root; + /** + * Returns the value which is associated with the given key. Returns + * null if there is not association for this key. + * + * @param key + * key whose associated value is to be returned + */ + public synchronized Object get(Object key) throws IOException { + return _root.get(key); + } - root = (HashDirectory) recman.fetch( root_recid ); - root.setPersistenceContext( recman, root_recid ); - tree = new HTree( root ); - return tree; - } + /** + * Remove the value which is associated with the given key. If the key does + * not exist, this method simply ignores the operation. + * + * @param key + * key whose associated value is to be removed + */ + public synchronized void remove(Object key) throws IOException { + _root.remove(key); + } + /** + * Returns an enumeration of the keys contained in this + */ + public synchronized FastIterator keys() throws IOException { + return _root.keys(); + } - /** - * Associates the specified value with the specified key. - * - * @param key key with which the specified value is to be assocated. - * @param value value to be associated with the specified key. - */ - public synchronized void put(Object key, Object value) - throws IOException - { - _root.put(key, value); - } + /** + * Returns an enumeration of the values contained in this + */ + public synchronized FastIterator values() throws IOException { + return _root.values(); + } + /** + * Get the record identifier used to load this hashtable. + */ + public long getRecid() { + return _root.getRecid(); + } - /** - * Returns the value which is associated with the given key. Returns - * null if there is not association for this key. - * - * @param key key whose associated value is to be returned - */ - public synchronized Object get(Object key) - throws IOException - { - return _root.get(key); - } - - - /** - * Remove the value which is associated with the given key. If the - * key does not exist, this method simply ignores the operation. - * - * @param key key whose associated value is to be removed - */ - public synchronized void remove(Object key) - throws IOException - { - _root.remove(key); - } - - - /** - * Returns an enumeration of the keys contained in this - */ - public synchronized FastIterator keys() - throws IOException - { - return _root.keys(); - } - - - /** - * Returns an enumeration of the values contained in this - */ - public synchronized FastIterator values() - throws IOException - { - return _root.values(); - } - - - /** - * Get the record identifier used to load this hashtable. - */ - public long getRecid() - { - return _root.getRecid(); - } - } - Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashDirectory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashDirectory.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashDirectory.java (working copy) @@ -64,503 +64,472 @@ package org.apache.hadoop.hive.ql.util.jdbm.htree; -import org.apache.hadoop.hive.ql.util.jdbm.RecordManager; - -import org.apache.hadoop.hive.ql.util.jdbm.helper.FastIterator; -import org.apache.hadoop.hive.ql.util.jdbm.helper.IterationException; - import java.io.Externalizable; import java.io.IOException; import java.io.ObjectInput; import java.io.ObjectOutput; - import java.util.ArrayList; import java.util.Iterator; +import org.apache.hadoop.hive.ql.util.jdbm.RecordManager; +import org.apache.hadoop.hive.ql.util.jdbm.helper.FastIterator; +import org.apache.hadoop.hive.ql.util.jdbm.helper.IterationException; + /** - * Hashtable directory page. - * - * @author Alex Boisvert - * @version $Id: HashDirectory.java,v 1.5 2005/06/25 23:12:32 doomdark Exp $ + * Hashtable directory page. + * + * @author Alex Boisvert + * @version $Id: HashDirectory.java,v 1.5 2005/06/25 23:12:32 doomdark Exp $ */ -final class HashDirectory - extends HashNode - implements Externalizable -{ +final class HashDirectory extends HashNode implements Externalizable { - static final long serialVersionUID = 1L; + static final long serialVersionUID = 1L; + /** + * Maximum number of children in a directory. + * + * (Must be a power of 2 -- if you update this value, you must also update + * BIT_SIZE and MAX_DEPTH.) + */ + static final int MAX_CHILDREN = 256; - /** - * Maximum number of children in a directory. - * - * (Must be a power of 2 -- if you update this value, you must also - * update BIT_SIZE and MAX_DEPTH.) - */ - static final int MAX_CHILDREN = 256; + /** + * Number of significant bits per directory level. + */ + static final int BIT_SIZE = 8; // log2(256) = 8 + /** + * Maximum number of levels (zero-based) + * + * (4 * 8 bits = 32 bits, which is the size of an "int", and as you know, + * hashcodes in Java are "ints") + */ + static final int MAX_DEPTH = 3; // 4 levels - /** - * Number of significant bits per directory level. - */ - static final int BIT_SIZE = 8; // log2(256) = 8 + /** + * Record ids of children pages. + */ + private long[] _children; + /** + * Depth of this directory page, zero-based + */ + private byte _depth; - /** - * Maximum number of levels (zero-based) - * - * (4 * 8 bits = 32 bits, which is the size of an "int", and as - * you know, hashcodes in Java are "ints") - */ - static final int MAX_DEPTH = 3; // 4 levels + /** + * PageManager used to persist changes in directory and buckets + */ + private transient RecordManager _recman; + /** + * This directory's record ID in the PageManager. (transient) + */ + private transient long _recid; - /** - * Record ids of children pages. - */ - private long[] _children; + /** + * Public constructor used by serialization + */ + public HashDirectory() { + // empty + } + /** + * Construct a HashDirectory + * + * @param depth + * Depth of this directory page. + */ + HashDirectory(byte depth) { + _depth = depth; + _children = new long[MAX_CHILDREN]; + } - /** - * Depth of this directory page, zero-based - */ - private byte _depth; + /** + * Sets persistence context. This method must be called before any + * persistence-related operation. + * + * @param recman + * RecordManager which stores this directory + * @param recid + * Record id of this directory. + */ + void setPersistenceContext(RecordManager recman, long recid) { + _recman = recman; + _recid = recid; + } + /** + * Get the record identifier used to load this hashtable. + */ + long getRecid() { + return _recid; + } - /** - * PageManager used to persist changes in directory and buckets - */ - private transient RecordManager _recman; + /** + * Returns whether or not this directory is empty. A directory is empty when + * it no longer contains buckets or sub-directories. + */ + boolean isEmpty() { + for (long element : _children) { + if (element != 0) { + return false; + } + } + return true; + } + /** + * Returns the value which is associated with the given key. Returns + * null if there is not association for this key. + * + * @param key + * key whose associated value is to be returned + */ + Object get(Object key) throws IOException { + int hash = hashCode(key); + long child_recid = _children[hash]; + if (child_recid == 0) { + // not bucket/page --> not found + return null; + } else { + HashNode node = (HashNode) _recman.fetch(child_recid); + // System.out.println("HashDirectory.get() child is : "+node); - /** - * This directory's record ID in the PageManager. (transient) - */ - private transient long _recid; - - - /** - * Public constructor used by serialization - */ - public HashDirectory() { - // empty + if (node instanceof HashDirectory) { + // recurse into next directory level + HashDirectory dir = (HashDirectory) node; + dir.setPersistenceContext(_recman, child_recid); + return dir.get(key); + } else { + // node is a bucket + HashBucket bucket = (HashBucket) node; + return bucket.getValue(key); + } } + } - /** - * Construct a HashDirectory - * - * @param depth Depth of this directory page. - */ - HashDirectory(byte depth) { - _depth = depth; - _children = new long[MAX_CHILDREN]; + /** + * Associates the specified value with the specified key. + * + * @param key + * key with which the specified value is to be assocated. + * @param value + * value to be associated with the specified key. + * @return object which was previously associated with the given key, or + * null if no association existed. + */ + Object put(Object key, Object value) throws IOException { + if (value == null) { + return remove(key); } + int hash = hashCode(key); + long child_recid = _children[hash]; + if (child_recid == 0) { + // no bucket/page here yet, let's create a bucket + HashBucket bucket = new HashBucket(_depth + 1); + // insert (key,value) pair in bucket + Object existing = bucket.addElement(key, value); - /** - * Sets persistence context. This method must be called before any - * persistence-related operation. - * - * @param recman RecordManager which stores this directory - * @param recid Record id of this directory. - */ - void setPersistenceContext( RecordManager recman, long recid ) - { - this._recman = recman; - this._recid = recid; - } + long b_recid = _recman.insert(bucket); + _children[hash] = b_recid; + _recman.update(_recid, this); - /** - * Get the record identifier used to load this hashtable. - */ - long getRecid() { - return _recid; - } + // System.out.println("Added: "+bucket); + return existing; + } else { + HashNode node = (HashNode) _recman.fetch(child_recid); + if (node instanceof HashDirectory) { + // recursive insert in next directory level + HashDirectory dir = (HashDirectory) node; + dir.setPersistenceContext(_recman, child_recid); + return dir.put(key, value); + } else { + // node is a bucket + HashBucket bucket = (HashBucket) node; + if (bucket.hasRoom()) { + Object existing = bucket.addElement(key, value); + _recman.update(child_recid, bucket); + // System.out.println("Added: "+bucket); + return existing; + } else { + // overflow, so create a new directory + if (_depth == MAX_DEPTH) { + throw new RuntimeException("Cannot create deeper directory. " + + "Depth=" + _depth); + } + HashDirectory dir = new HashDirectory((byte) (_depth + 1)); + long dir_recid = _recman.insert(dir); + dir.setPersistenceContext(_recman, dir_recid); - /** - * Returns whether or not this directory is empty. A directory - * is empty when it no longer contains buckets or sub-directories. - */ - boolean isEmpty() { - for (int i=0; i<_children.length; i++) { - if (_children[i] != 0) { - return false; - } - } - return true; - } + _children[hash] = dir_recid; + _recman.update(_recid, this); - /** - * Returns the value which is associated with the given key. Returns - * null if there is not association for this key. - * - * @param key key whose associated value is to be returned - */ - Object get(Object key) - throws IOException - { - int hash = hashCode( key ); - long child_recid = _children[ hash ]; - if ( child_recid == 0 ) { - // not bucket/page --> not found - return null; - } else { - HashNode node = (HashNode) _recman.fetch( child_recid ); - // System.out.println("HashDirectory.get() child is : "+node); + // discard overflown bucket + _recman.delete(child_recid); - if ( node instanceof HashDirectory ) { - // recurse into next directory level - HashDirectory dir = (HashDirectory) node; - dir.setPersistenceContext( _recman, child_recid ); - return dir.get( key ); - } else { - // node is a bucket - HashBucket bucket = (HashBucket) node; - return bucket.getValue( key ); - } + // migrate existing bucket elements + ArrayList keys = bucket.getKeys(); + ArrayList values = bucket.getValues(); + int entries = keys.size(); + for (int i = 0; i < entries; i++) { + dir.put(keys.get(i), values.get(i)); + } + + // (finally!) insert new element + return dir.put(key, value); } + } } + } + /** + * Remove the value which is associated with the given key. If the key does + * not exist, this method simply ignores the operation. + * + * @param key + * key whose associated value is to be removed + * @return object which was associated with the given key, or + * null if no association existed with given key. + */ + Object remove(Object key) throws IOException { + int hash = hashCode(key); + long child_recid = _children[hash]; + if (child_recid == 0) { + // not bucket/page --> not found + return null; + } else { + HashNode node = (HashNode) _recman.fetch(child_recid); + // System.out.println("HashDirectory.remove() child is : "+node); - /** - * Associates the specified value with the specified key. - * - * @param key key with which the specified value is to be assocated. - * @param value value to be associated with the specified key. - * @return object which was previously associated with the given key, - * or null if no association existed. - */ - Object put(Object key, Object value) - throws IOException { - if (value == null) { - return remove(key); + if (node instanceof HashDirectory) { + // recurse into next directory level + HashDirectory dir = (HashDirectory) node; + dir.setPersistenceContext(_recman, child_recid); + Object existing = dir.remove(key); + if (existing != null) { + if (dir.isEmpty()) { + // delete empty directory + _recman.delete(child_recid); + _children[hash] = 0; + _recman.update(_recid, this); + } } - int hash = hashCode(key); - long child_recid = _children[hash]; - if (child_recid == 0) { - // no bucket/page here yet, let's create a bucket - HashBucket bucket = new HashBucket(_depth+1); - - // insert (key,value) pair in bucket - Object existing = bucket.addElement(key, value); - - long b_recid = _recman.insert(bucket); - _children[hash] = b_recid; - + return existing; + } else { + // node is a bucket + HashBucket bucket = (HashBucket) node; + Object existing = bucket.removeElement(key); + if (existing != null) { + if (bucket.getElementCount() >= 1) { + _recman.update(child_recid, bucket); + } else { + // delete bucket, it's empty + _recman.delete(child_recid); + _children[hash] = 0; _recman.update(_recid, this); + } + } + return existing; + } + } + } - // System.out.println("Added: "+bucket); - return existing; - } else { - HashNode node = (HashNode) _recman.fetch( child_recid ); + /** + * Calculates the hashcode of a key, based on the current directory depth. + */ + private int hashCode(Object key) { + int hashMask = hashMask(); + int hash = key.hashCode(); + hash = hash & hashMask; + hash = hash >>> ((MAX_DEPTH - _depth) * BIT_SIZE); + hash = hash % MAX_CHILDREN; + /* + * System.out.println("HashDirectory.hashCode() is: 0x" + * +Integer.toHexString(hash) +" for object hashCode() 0x" + * +Integer.toHexString(key.hashCode())); + */ + return hash; + } - if ( node instanceof HashDirectory ) { - // recursive insert in next directory level - HashDirectory dir = (HashDirectory) node; - dir.setPersistenceContext( _recman, child_recid ); - return dir.put( key, value ); - } else { - // node is a bucket - HashBucket bucket = (HashBucket)node; - if (bucket.hasRoom()) { - Object existing = bucket.addElement(key, value); - _recman.update(child_recid, bucket); - // System.out.println("Added: "+bucket); - return existing; - } else { - // overflow, so create a new directory - if (_depth == MAX_DEPTH) { - throw new RuntimeException( "Cannot create deeper directory. " - + "Depth=" + _depth ); - } - HashDirectory dir = new HashDirectory( (byte) (_depth+1) ); - long dir_recid = _recman.insert( dir ); - dir.setPersistenceContext( _recman, dir_recid ); + /** + * Calculates the hashmask of this directory. The hashmask is the bit mask + * applied to a hashcode to retain only bits that are relevant to this + * directory level. + */ + int hashMask() { + int bits = MAX_CHILDREN - 1; + int hashMask = bits << ((MAX_DEPTH - _depth) * BIT_SIZE); + /* + * System.out.println("HashDirectory.hashMask() is: 0x" + * +Integer.toHexString(hashMask)); + */ + return hashMask; + } - _children[hash] = dir_recid; - _recman.update( _recid, this ); + /** + * Returns an enumeration of the keys contained in this + */ + FastIterator keys() throws IOException { + return new HDIterator(true); + } - // discard overflown bucket - _recman.delete( child_recid ); + /** + * Returns an enumeration of the values contained in this + */ + FastIterator values() throws IOException { + return new HDIterator(false); + } - // migrate existing bucket elements - ArrayList keys = bucket.getKeys(); - ArrayList values = bucket.getValues(); - int entries = keys.size(); - for ( int i=0; inull if no association existed with given key. + * True if we're iterating on keys, False if enumerating on values. */ - Object remove(Object key) throws IOException { - int hash = hashCode(key); - long child_recid = _children[hash]; - if (child_recid == 0) { - // not bucket/page --> not found - return null; - } else { - HashNode node = (HashNode) _recman.fetch( child_recid ); - // System.out.println("HashDirectory.remove() child is : "+node); + private final boolean _iterateKeys; - if (node instanceof HashDirectory) { - // recurse into next directory level - HashDirectory dir = (HashDirectory)node; - dir.setPersistenceContext( _recman, child_recid ); - Object existing = dir.remove(key); - if (existing != null) { - if (dir.isEmpty()) { - // delete empty directory - _recman.delete(child_recid); - _children[hash] = 0; - _recman.update(_recid, this); - } - } - return existing; - } else { - // node is a bucket - HashBucket bucket = (HashBucket)node; - Object existing = bucket.removeElement(key); - if (existing != null) { - if (bucket.getElementCount() >= 1) { - _recman.update(child_recid, bucket); - } else { - // delete bucket, it's empty - _recman.delete(child_recid); - _children[hash] = 0; - _recman.update(_recid, this); - } - } - return existing; - } - } - } - /** - * Calculates the hashcode of a key, based on the current directory - * depth. + * Stacks of directories & last enumerated child position */ - private int hashCode(Object key) { - int hashMask = hashMask(); - int hash = key.hashCode(); - hash = hash & hashMask; - hash = hash >>> ((MAX_DEPTH - _depth) * BIT_SIZE); - hash = hash % MAX_CHILDREN; - /* - System.out.println("HashDirectory.hashCode() is: 0x" - +Integer.toHexString(hash) - +" for object hashCode() 0x" - +Integer.toHexString(key.hashCode())); - */ - return hash; - } + private final ArrayList _dirStack; + private final ArrayList _childStack; /** - * Calculates the hashmask of this directory. The hashmask is the - * bit mask applied to a hashcode to retain only bits that are - * relevant to this directory level. + * Current HashDirectory in the hierarchy */ - int hashMask() { - int bits = MAX_CHILDREN-1; - int hashMask = bits << ((MAX_DEPTH - _depth) * BIT_SIZE); - /* - System.out.println("HashDirectory.hashMask() is: 0x" - +Integer.toHexString(hashMask)); - */ - return hashMask; - } + private HashDirectory _dir; /** - * Returns an enumeration of the keys contained in this + * Current child position */ - FastIterator keys() - throws IOException - { - return new HDIterator( true ); - } + private int _child; /** - * Returns an enumeration of the values contained in this + * Current bucket iterator */ - FastIterator values() - throws IOException - { - return new HDIterator( false ); - } + private Iterator _iter; - /** - * Implement Externalizable interface + * Construct an iterator on this directory. + * + * @param iterateKeys + * True if iteration supplies keys, False if iterateKeys supplies + * values. */ - public void writeExternal(ObjectOutput out) - throws IOException { - out.writeByte(_depth); - out.writeObject(_children); + HDIterator(boolean iterateKeys) throws IOException { + _dirStack = new ArrayList(); + _childStack = new ArrayList(); + _dir = HashDirectory.this; + _child = -1; + _iterateKeys = iterateKeys; + + prepareNext(); } - /** - * Implement Externalizable interface + * Returns the next object. */ - public synchronized void readExternal(ObjectInput in) - throws IOException, ClassNotFoundException { - _depth = in.readByte(); - _children = (long[])in.readObject(); + @Override + public Object next() { + Object next = null; + if (_iter != null && _iter.hasNext()) { + next = _iter.next(); + } else { + try { + prepareNext(); + } catch (IOException except) { + throw new IterationException(except); + } + if (_iter != null && _iter.hasNext()) { + return next(); + } + } + return next; } - - //////////////////////////////////////////////////////////////////////// - // INNER CLASS - //////////////////////////////////////////////////////////////////////// - /** - * Utility class to enumerate keys/values in a HTree + * Prepare internal state so we can answer hasMoreElements + * + * Actually, this code prepares an Enumeration on the next Bucket to + * enumerate. If no following bucket is found, the next Enumeration is set + * to null. */ - public class HDIterator - extends FastIterator - { + private void prepareNext() throws IOException { + long child_recid = 0; - /** - * True if we're iterating on keys, False if enumerating on values. - */ - private boolean _iterateKeys; + // find next bucket/directory to enumerate + do { + _child++; + if (_child >= MAX_CHILDREN) { - /** - * Stacks of directories & last enumerated child position - */ - private ArrayList _dirStack; - private ArrayList _childStack; + if (_dirStack.isEmpty()) { + // no more directory in the stack, we're finished + return; + } - /** - * Current HashDirectory in the hierarchy - */ - private HashDirectory _dir; - - /** - * Current child position - */ - private int _child; - - /** - * Current bucket iterator - */ - private Iterator _iter; - - - /** - * Construct an iterator on this directory. - * - * @param iterateKeys True if iteration supplies keys, False - * if iterateKeys supplies values. - */ - HDIterator( boolean iterateKeys ) - throws IOException - { - _dirStack = new ArrayList(); - _childStack = new ArrayList(); - _dir = HashDirectory.this; - _child = -1; - _iterateKeys = iterateKeys; - - prepareNext(); + // try next page + _dir = (HashDirectory) _dirStack.remove(_dirStack.size() - 1); + _child = ((Integer) _childStack.remove(_childStack.size() - 1)) + .intValue(); + continue; } + child_recid = _dir._children[_child]; + } while (child_recid == 0); + if (child_recid == 0) { + throw new Error("child_recid cannot be 0"); + } - /** - * Returns the next object. - */ - public Object next() - { - Object next = null; - if( _iter != null && _iter.hasNext() ) { - next = _iter.next(); - } else { - try { - prepareNext(); - } catch ( IOException except ) { - throw new IterationException( except ); - } - if ( _iter != null && _iter.hasNext() ) { - return next(); - } - } - return next; - } + HashNode node = (HashNode) _recman.fetch(child_recid); + // System.out.println("HDEnumeration.get() child is : "+node); + if (node instanceof HashDirectory) { + // save current position + _dirStack.add(_dir); + _childStack.add(new Integer(_child)); - /** - * Prepare internal state so we can answer hasMoreElements - * - * Actually, this code prepares an Enumeration on the next - * Bucket to enumerate. If no following bucket is found, - * the next Enumeration is set to null. - */ - private void prepareNext() throws IOException { - long child_recid = 0; + _dir = (HashDirectory) node; + _child = -1; - // find next bucket/directory to enumerate - do { - _child++; - if (_child >= MAX_CHILDREN) { - - if (_dirStack.isEmpty()) { - // no more directory in the stack, we're finished - return; - } - - // try next page - _dir = (HashDirectory) _dirStack.remove( _dirStack.size()-1 ); - _child = ( (Integer) _childStack.remove( _childStack.size()-1 ) ).intValue(); - continue; - } - child_recid = _dir._children[_child]; - } while (child_recid == 0); - - if (child_recid == 0) { - throw new Error("child_recid cannot be 0"); - } - - HashNode node = (HashNode) _recman.fetch( child_recid ); - // System.out.println("HDEnumeration.get() child is : "+node); - - if ( node instanceof HashDirectory ) { - // save current position - _dirStack.add( _dir ); - _childStack.add( new Integer( _child ) ); - - _dir = (HashDirectory)node; - _child = -1; - - // recurse into - _dir.setPersistenceContext( _recman, child_recid ); - prepareNext(); - } else { - // node is a bucket - HashBucket bucket = (HashBucket)node; - if ( _iterateKeys ) { - _iter = bucket.getKeys().iterator(); - } else { - _iter = bucket.getValues().iterator(); - } - } + // recurse into + _dir.setPersistenceContext(_recman, child_recid); + prepareNext(); + } else { + // node is a bucket + HashBucket bucket = (HashBucket) node; + if (_iterateKeys) { + _iter = bucket.getKeys().iterator(); + } else { + _iter = bucket.getValues().iterator(); } + } } + } } - Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashNode.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashNode.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashNode.java (working copy) @@ -67,14 +67,14 @@ import java.io.Serializable; /** - * Abstract class for Hashtable directory nodes - * - * @author Alex Boisvert - * @version $Id: HashNode.java,v 1.2 2003/03/21 02:54:58 boisvert Exp $ + * Abstract class for Hashtable directory nodes + * + * @author Alex Boisvert + * @version $Id: HashNode.java,v 1.2 2003/03/21 02:54:58 boisvert Exp $ */ class HashNode implements Serializable { - // Empty, there's no common functionality. We use this abstract - // class for typing only. + // Empty, there's no common functionality. We use this abstract + // class for typing only. } Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/RecordManager.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/RecordManager.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/RecordManager.java (working copy) @@ -67,174 +67,165 @@ package org.apache.hadoop.hive.ql.util.jdbm; import java.io.IOException; + import org.apache.hadoop.hive.ql.util.jdbm.helper.Serializer; /** - * An interface to manages records, which are uninterpreted blobs of data. - *

- * The set of record operations is simple: fetch, insert, update and delete. - * Each record is identified using a "rowid" and contains a byte[] data block. - * Rowids are returned on inserts and you can store them someplace safe - * to be able to get back to them. Data blocks can be as long as you wish, - * and may have lengths different from the original when updating. - * + * An interface to manages records, which are uninterpreted blobs of data. + *

+ * The set of record operations is simple: fetch, insert, update and delete. + * Each record is identified using a "rowid" and contains a byte[] data block. + * Rowids are returned on inserts and you can store them someplace safe to be + * able to get back to them. Data blocks can be as long as you wish, and may + * have lengths different from the original when updating. + * * @author Alex Boisvert * @author Cees de Groot * @version $Id: RecordManager.java,v 1.3 2005/06/25 23:12:31 doomdark Exp $ */ -public interface RecordManager -{ +public interface RecordManager { - /** - * Reserved slot for name directory. - */ - public static final int NAME_DIRECTORY_ROOT = 0; + /** + * Reserved slot for name directory. + */ + public static final int NAME_DIRECTORY_ROOT = 0; + /** + * Inserts a new record using standard java object serialization. + * + * @param obj + * the object for the new record. + * @return the rowid for the new record. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public abstract long insert(Object obj) throws IOException; - /** - * Inserts a new record using standard java object serialization. - * - * @param obj the object for the new record. - * @return the rowid for the new record. - * @throws IOException when one of the underlying I/O operations fails. - */ - public abstract long insert( Object obj ) - throws IOException; + /** + * Inserts a new record using a custom serializer. + * + * @param obj + * the object for the new record. + * @param serializer + * a custom serializer + * @return the rowid for the new record. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public abstract long insert(Object obj, Serializer serializer) + throws IOException; - - /** - * Inserts a new record using a custom serializer. - * - * @param obj the object for the new record. - * @param serializer a custom serializer - * @return the rowid for the new record. - * @throws IOException when one of the underlying I/O operations fails. - */ - public abstract long insert( Object obj, Serializer serializer ) - throws IOException; + /** + * Deletes a record. + * + * @param recid + * the rowid for the record that should be deleted. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public abstract void delete(long recid) throws IOException; + /** + * Updates a record using standard java object serialization. + * + * @param recid + * the recid for the record that is to be updated. + * @param obj + * the new object for the record. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public abstract void update(long recid, Object obj) throws IOException; - /** - * Deletes a record. - * - * @param recid the rowid for the record that should be deleted. - * @throws IOException when one of the underlying I/O operations fails. - */ - public abstract void delete( long recid ) - throws IOException; + /** + * Updates a record using a custom serializer. + * + * @param recid + * the recid for the record that is to be updated. + * @param obj + * the new object for the record. + * @param serializer + * a custom serializer + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public abstract void update(long recid, Object obj, Serializer serializer) + throws IOException; + /** + * Fetches a record using standard java object serialization. + * + * @param recid + * the recid for the record that must be fetched. + * @return the object contained in the record. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public abstract Object fetch(long recid) throws IOException; - /** - * Updates a record using standard java object serialization. - * - * @param recid the recid for the record that is to be updated. - * @param obj the new object for the record. - * @throws IOException when one of the underlying I/O operations fails. - */ - public abstract void update( long recid, Object obj ) - throws IOException; + /** + * Fetches a record using a custom serializer. + * + * @param recid + * the recid for the record that must be fetched. + * @param serializer + * a custom serializer + * @return the object contained in the record. + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public abstract Object fetch(long recid, Serializer serializer) + throws IOException; + /** + * Closes the record manager. + * + * @throws IOException + * when one of the underlying I/O operations fails. + */ + public abstract void close() throws IOException; - /** - * Updates a record using a custom serializer. - * - * @param recid the recid for the record that is to be updated. - * @param obj the new object for the record. - * @param serializer a custom serializer - * @throws IOException when one of the underlying I/O operations fails. - */ - public abstract void update( long recid, Object obj, Serializer serializer ) - throws IOException; + /** + * Returns the number of slots available for "root" rowids. These slots can be + * used to store special rowids, like rowids that point to other rowids. Root + * rowids are useful for bootstrapping access to a set of data. + */ + public abstract int getRootCount(); - - /** - * Fetches a record using standard java object serialization. - * - * @param recid the recid for the record that must be fetched. - * @return the object contained in the record. - * @throws IOException when one of the underlying I/O operations fails. - */ - public abstract Object fetch( long recid ) - throws IOException; + /** + * Returns the indicated root rowid. + * + * @see #getRootCount + */ + public abstract long getRoot(int id) throws IOException; + /** + * Sets the indicated root rowid. + * + * @see #getRootCount + */ + public abstract void setRoot(int id, long rowid) throws IOException; - /** - * Fetches a record using a custom serializer. - * - * @param recid the recid for the record that must be fetched. - * @param serializer a custom serializer - * @return the object contained in the record. - * @throws IOException when one of the underlying I/O operations fails. - */ - public abstract Object fetch( long recid, Serializer serializer ) - throws IOException; + /** + * Commit (make persistent) all changes since beginning of transaction. + */ + public abstract void commit() throws IOException; + /** + * Rollback (cancel) all changes since beginning of transaction. + */ + public abstract void rollback() throws IOException; - /** - * Closes the record manager. - * - * @throws IOException when one of the underlying I/O operations fails. - */ - public abstract void close() - throws IOException; + /** + * Obtain the record id of a named object. Returns 0 if named object doesn't + * exist. + */ + public abstract long getNamedObject(String name) throws IOException; + /** + * Set the record id of a named object. + */ + public abstract void setNamedObject(String name, long recid) + throws IOException; - /** - * Returns the number of slots available for "root" rowids. These slots - * can be used to store special rowids, like rowids that point to - * other rowids. Root rowids are useful for bootstrapping access to - * a set of data. - */ - public abstract int getRootCount(); - - - /** - * Returns the indicated root rowid. - * - * @see #getRootCount - */ - public abstract long getRoot( int id ) - throws IOException; - - - /** - * Sets the indicated root rowid. - * - * @see #getRootCount - */ - public abstract void setRoot( int id, long rowid ) - throws IOException; - - - /** - * Commit (make persistent) all changes since beginning of transaction. - */ - public abstract void commit() - throws IOException; - - - /** - * Rollback (cancel) all changes since beginning of transaction. - */ - public abstract void rollback() - throws IOException; - - - - - /** - * Obtain the record id of a named object. Returns 0 if named object - * doesn't exist. - */ - public abstract long getNamedObject( String name ) - throws IOException; - - - /** - * Set the record id of a named object. - */ - public abstract void setNamedObject( String name, long recid ) - throws IOException; - } - Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java (working copy) @@ -30,38 +30,37 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@description( - name = "year", - value = "_FUNC_(date) - Returns the year of date", - extended = "date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or " + - "'yyyy-MM-dd'.\n" + - "Example:\n " + - " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + - " 2009" - ) +@description(name = "year", value = "_FUNC_(date) - Returns the year of date", extended = "date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or " + + "'yyyy-MM-dd'.\n" + + "Example:\n " + + " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + " 2009") public class UDFYear extends UDF { private static Log LOG = LogFactory.getLog(UDFYear.class.getName()); - private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private Calendar calendar = Calendar.getInstance(); + private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private final Calendar calendar = Calendar.getInstance(); IntWritable result = new IntWritable(); + public UDFYear() { } /** * Get the year from a date string. * - * @param dateString the dateString in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd". - * @return an int from 1 to 12. null if the dateString is not a valid date string. + * @param dateString + * the dateString in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy-MM-dd". + * @return an int from 1 to 12. null if the dateString is not a valid date + * string. */ - public IntWritable evaluate(Text dateString) { - + public IntWritable evaluate(Text dateString) { + if (dateString == null) { return null; } - + try { Date date = formatter.parse(dateString.toString()); calendar.setTime(date); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java (working copy) @@ -32,33 +32,34 @@ import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; - public class UDFToInteger extends UDF { private static Log LOG = LogFactory.getLog(UDFToInteger.class.getName()); IntWritable intWritable = new IntWritable(); - + public UDFToInteger() { } /** * Convert from void to an integer. This is called for CAST(... AS INT) - * - * @param i The void value to convert + * + * @param i + * The void value to convert * @return Integer */ - public IntWritable evaluate(NullWritable i) { + public IntWritable evaluate(NullWritable i) { return null; } /** * Convert from boolean to an integer. This is called for CAST(... AS INT) - * - * @param i The boolean value to convert + * + * @param i + * The boolean value to convert * @return IntWritable */ - public IntWritable evaluate(BooleanWritable i) { + public IntWritable evaluate(BooleanWritable i) { if (i == null) { return null; } else { @@ -66,94 +67,101 @@ return intWritable; } } - + /** * Convert from byte to an integer. This is called for CAST(... AS INT) - * - * @param i The byte value to convert + * + * @param i + * The byte value to convert * @return IntWritable */ - public IntWritable evaluate(ByteWritable i) { + public IntWritable evaluate(ByteWritable i) { if (i == null) { return null; } else { - intWritable.set((int)i.get()); + intWritable.set(i.get()); return intWritable; } } - + /** * Convert from short to an integer. This is called for CAST(... AS INT) - * - * @param i The short value to convert + * + * @param i + * The short value to convert * @return IntWritable */ - public IntWritable evaluate(ShortWritable i) { + public IntWritable evaluate(ShortWritable i) { if (i == null) { return null; } else { - intWritable.set((int)i.get()); + intWritable.set(i.get()); return intWritable; } } - + /** * Convert from long to an integer. This is called for CAST(... AS INT) - * - * @param i The long value to convert + * + * @param i + * The long value to convert * @return IntWritable */ - public IntWritable evaluate(LongWritable i) { + public IntWritable evaluate(LongWritable i) { if (i == null) { return null; } else { - intWritable.set((int)i.get()); + intWritable.set((int) i.get()); return intWritable; } } - + /** * Convert from float to an integer. This is called for CAST(... AS INT) - * - * @param i The float value to convert + * + * @param i + * The float value to convert * @return IntWritable */ - public IntWritable evaluate(FloatWritable i) { + public IntWritable evaluate(FloatWritable i) { if (i == null) { return null; } else { - intWritable.set((int)i.get()); + intWritable.set((int) i.get()); return intWritable; } } - + /** * Convert from double to an integer. This is called for CAST(... AS INT) - * - * @param i The double value to convert + * + * @param i + * The double value to convert * @return IntWritable */ - public IntWritable evaluate(DoubleWritable i) { + public IntWritable evaluate(DoubleWritable i) { if (i == null) { return null; } else { - intWritable.set((int)i.get()); + intWritable.set((int) i.get()); return intWritable; } } - + /** * Convert from string to an integer. This is called for CAST(... AS INT) - * - * @param i The string value to convert + * + * @param i + * The string value to convert * @return IntWritable */ - public IntWritable evaluate(Text i) { + public IntWritable evaluate(Text i) { if (i == null) { return null; } else { try { - intWritable.set(LazyInteger.parseInt(i.getBytes(), 0 , i.getLength(), 10)); + intWritable.set(LazyInteger + .parseInt(i.getBytes(), 0, i.getLength(), 10)); return intWritable; } catch (NumberFormatException e) { // MySQL returns 0 if the string is not a well-formed numeric value. @@ -163,5 +171,5 @@ } } } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java (working copy) @@ -30,24 +30,20 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@description( - name = "yearweek", - value = "_FUNC_(date) - Returns the week of the year of the given date. A week " + - "is considered to start on a Monday and week 1 is the first week with >3 days.", - extended = "Examples:\n" + - " > SELECT _FUNC_('2008-02-20') FROM src LIMIT 1;\n" + - " 8\n" + - " > SELECT _FUNC_('1980-12-31 12:59:59') FROM src LIMIT 1;\n" + - " 1" - ) +@description(name = "yearweek", value = "_FUNC_(date) - Returns the week of the year of the given date. A week " + + "is considered to start on a Monday and week 1 is the first week with >3 days.", extended = "Examples:\n" + + " > SELECT _FUNC_('2008-02-20') FROM src LIMIT 1;\n" + + " 8\n" + + " > SELECT _FUNC_('1980-12-31 12:59:59') FROM src LIMIT 1;\n" + " 1") public class UDFWeekOfYear extends UDF { private static Log LOG = LogFactory.getLog(UDFWeekOfYear.class.getName()); - private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private Calendar calendar = Calendar.getInstance(); + private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private final Calendar calendar = Calendar.getInstance(); IntWritable result = new IntWritable(); + public UDFWeekOfYear() { calendar.setFirstDayOfWeek(Calendar.MONDAY); calendar.setMinimalDaysInFirstWeek(4); @@ -56,10 +52,13 @@ /** * Get the week of the year from a date string. * - * @param dateString the dateString in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd". - * @return an int from 1 to 53. null if the dateString is not a valid date string. + * @param dateString + * the dateString in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy-MM-dd". + * @return an int from 1 to 53. null if the dateString is not a valid date + * string. */ - public IntWritable evaluate(Text dateString) { + public IntWritable evaluate(Text dateString) { if (dateString == null) { return null; } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPositive.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPositive.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPositive.java (working copy) @@ -20,7 +20,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -29,10 +28,7 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; -@description( - name = "positive", - value = "_FUNC_ a - Returns a" -) +@description(name = "positive", value = "_FUNC_ a - Returns a") public class UDFOPPositive extends UDFBaseNumericUnaryOp { private static Log LOG = LogFactory.getLog(UDFOPPositive.class.getName()); @@ -40,7 +36,6 @@ public UDFOPPositive() { } - @Override public ByteWritable evaluate(ByteWritable a) { return a; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java (working copy) @@ -32,138 +32,147 @@ import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; - public class UDFToLong extends UDF { private static Log LOG = LogFactory.getLog(UDFToLong.class.getName()); LongWritable longWritable = new LongWritable(); - + public UDFToLong() { } /** * Convert from void to a long. This is called for CAST(... AS BIGINT) - * - * @param i The void value to convert + * + * @param i + * The void value to convert * @return LongWritable */ - public LongWritable evaluate(NullWritable i) { + public LongWritable evaluate(NullWritable i) { return null; } /** * Convert from boolean to a long. This is called for CAST(... AS BIGINT) - * - * @param i The boolean value to convert + * + * @param i + * The boolean value to convert * @return LongWritable */ - public LongWritable evaluate(BooleanWritable i) { + public LongWritable evaluate(BooleanWritable i) { if (i == null) { return null; } else { - longWritable.set(i.get()? (long)1 : (long) 0); + longWritable.set(i.get() ? (long) 1 : (long) 0); return longWritable; } } /** * Convert from byte to a long. This is called for CAST(... AS BIGINT) - * - * @param i The byte value to convert + * + * @param i + * The byte value to convert * @return LongWritable */ - public LongWritable evaluate(ByteWritable i) { + public LongWritable evaluate(ByteWritable i) { if (i == null) { return null; } else { - longWritable.set((long)i.get()); + longWritable.set(i.get()); return longWritable; } } - + /** * Convert from short to a long. This is called for CAST(... AS BIGINT) - * - * @param i The short value to convert + * + * @param i + * The short value to convert * @return LongWritable */ - public LongWritable evaluate(ShortWritable i) { + public LongWritable evaluate(ShortWritable i) { if (i == null) { return null; } else { - longWritable.set((long)i.get()); + longWritable.set(i.get()); return longWritable; } } - + /** * Convert from integer to a long. This is called for CAST(... AS BIGINT) - * - * @param i The integer value to convert + * + * @param i + * The integer value to convert * @return LongWritable */ - public LongWritable evaluate(IntWritable i) { + public LongWritable evaluate(IntWritable i) { if (i == null) { return null; } else { - longWritable.set((long)i.get()); + longWritable.set(i.get()); return longWritable; } } /** * Convert from long to a long. This is called for CAST(... AS BIGINT) - * - * @param i The long value to convert + * + * @param i + * The long value to convert * @return LongWritable */ - public LongWritable evaluate(LongWritable i) { + public LongWritable evaluate(LongWritable i) { return i; } /** * Convert from float to a long. This is called for CAST(... AS BIGINT) - * - * @param i The float value to convert + * + * @param i + * The float value to convert * @return LongWritable */ - public LongWritable evaluate(FloatWritable i) { + public LongWritable evaluate(FloatWritable i) { if (i == null) { return null; } else { - longWritable.set((long)i.get()); + longWritable.set((long) i.get()); return longWritable; } } - + /** * Convert from double to a long. This is called for CAST(... AS BIGINT) - * - * @param i The double value to convert + * + * @param i + * The double value to convert * @return LongWritable */ - public LongWritable evaluate(DoubleWritable i) { + public LongWritable evaluate(DoubleWritable i) { if (i == null) { return null; } else { - longWritable.set((long)i.get()); + longWritable.set((long) i.get()); return longWritable; } } - + /** * Convert from string to a long. This is called for CAST(... AS BIGINT) - * - * @param i The string value to convert + * + * @param i + * The string value to convert * @return LongWritable */ - public LongWritable evaluate(Text i) { + public LongWritable evaluate(Text i) { if (i == null) { return null; } else { try { - longWritable.set(LazyLong.parseLong(i.getBytes(), 0 , i.getLength(), 10)); + longWritable + .set(LazyLong.parseLong(i.getBytes(), 0, i.getLength(), 10)); return longWritable; } catch (NumberFormatException e) { // MySQL returns 0 if the string is not a well-formed numeric value. @@ -173,5 +182,5 @@ } } } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog2.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog2.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog2.java (working copy) @@ -24,13 +24,8 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -@description( - name = "log2", - value = "_FUNC_(x) - Returns the logarithm of x with base 2", - extended = "Example:\n" + - " > SELECT _FUNC_(2) FROM src LIMIT 1;\n" + - " 1" - ) +@description(name = "log2", value = "_FUNC_(x) - Returns the logarithm of x with base 2", extended = "Example:\n" + + " > SELECT _FUNC_(2) FROM src LIMIT 1;\n" + " 1") public class UDFLog2 extends UDF { private static Log LOG = LogFactory.getLog(UDFLog2.class.getName()); @@ -38,18 +33,18 @@ private static double log2 = Math.log(2.0); DoubleWritable result = new DoubleWritable(); - + public UDFLog2() { } /** * Returns the logarithm of "a" with base 2. */ - public DoubleWritable evaluate(DoubleWritable a) { + public DoubleWritable evaluate(DoubleWritable a) { if (a == null || a.get() <= 0.0) { return null; } else { - result.set(Math.log(a.get())/log2); + result.set(Math.log(a.get()) / log2); return result; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPGreaterThan.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPGreaterThan.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPGreaterThan.java (working copy) @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.udf; -import java.sql.Date; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.description; @@ -33,21 +31,19 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -@description( - name = ">", - value = "a _FUNC_ b - Returns TRUE if a is greater than b" -) +@description(name = ">", value = "a _FUNC_ b - Returns TRUE if a is greater than b") public class UDFOPGreaterThan extends UDFBaseCompare { private static Log LOG = LogFactory.getLog(UDFOPGreaterThan.class.getName()); BooleanWritable resultCache; + public UDFOPGreaterThan() { resultCache = new BooleanWritable(); } - public BooleanWritable evaluate(Text a, Text b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(Text a, Text b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -57,8 +53,8 @@ return r; } - public BooleanWritable evaluate(ByteWritable a, ByteWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(ByteWritable a, ByteWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -68,8 +64,8 @@ return r; } - public BooleanWritable evaluate(ShortWritable a, ShortWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(ShortWritable a, ShortWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -79,8 +75,8 @@ return r; } - public BooleanWritable evaluate(IntWritable a, IntWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(IntWritable a, IntWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -89,9 +85,9 @@ // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; } - - public BooleanWritable evaluate(LongWritable a, LongWritable b) { - BooleanWritable r = this.resultCache; + + public BooleanWritable evaluate(LongWritable a, LongWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -100,9 +96,9 @@ // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; } - - public BooleanWritable evaluate(FloatWritable a, FloatWritable b) { - BooleanWritable r = this.resultCache; + + public BooleanWritable evaluate(FloatWritable a, FloatWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -110,10 +106,11 @@ } // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; - } + } - public BooleanWritable evaluate(DoubleWritable a, DoubleWritable b) { - BooleanWritable r = this.resultCache; + @Override + public BooleanWritable evaluate(DoubleWritable a, DoubleWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAbs.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAbs.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAbs.java (working copy) @@ -24,48 +24,43 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; -@description( - name = "abs", - value = "_FUNC_(x) - returns the absolute value of x", - extended = "Example:\n" + - " > SELECT _FUNC_(0) FROM src LIMIT 1;\n" + - " 0\n" + - " > SELECT _FUNC_(-5) FROM src LIMIT 1;\n" + - " 5" - ) -public class UDFAbs extends UDF { - - private DoubleWritable resultDouble = new DoubleWritable(); - private LongWritable resultLong = new LongWritable(); - private IntWritable resultInt = new IntWritable(); - +@description(name = "abs", value = "_FUNC_(x) - returns the absolute value of x", extended = "Example:\n" + + " > SELECT _FUNC_(0) FROM src LIMIT 1;\n" + + " 0\n" + + " > SELECT _FUNC_(-5) FROM src LIMIT 1;\n" + " 5") +public class UDFAbs extends UDF { + + private final DoubleWritable resultDouble = new DoubleWritable(); + private final LongWritable resultLong = new LongWritable(); + private final IntWritable resultInt = new IntWritable(); + public DoubleWritable evaluate(DoubleWritable n) { if (n == null) { return null; } - + resultDouble.set(Math.abs(n.get())); - + return resultDouble; } - + public LongWritable evaluate(LongWritable n) { if (n == null) { return null; } - + resultLong.set(Math.abs(n.get())); - + return resultLong; } - + public IntWritable evaluate(IntWritable n) { if (n == null) { return null; } - + resultInt.set(Math.abs(n.get())); - + return resultInt; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFConv.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFConv.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFConv.java (working copy) @@ -22,170 +22,171 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -@description( - name = "conv", - value="_FUNC_(num, from_base, to_base) - convert num from from_base to" + - " to_base", - extended="If to_base is negative, treat num as a signed integer," + - "otherwise, treat it as an unsigned integer.\n" + - "Example:\n" + - " > SELECT _FUNC_('100', 2, 10) FROM src LIMIT 1;\n" + - " '4'\n" + - " > SELECT _FUNC_(-10, 16, -10) FROM src LIMIT 1;\n" + - " '16'" - ) +@description(name = "conv", value = "_FUNC_(num, from_base, to_base) - convert num from from_base to" + + " to_base", extended = "If to_base is negative, treat num as a signed integer," + + "otherwise, treat it as an unsigned integer.\n" + + "Example:\n" + + " > SELECT _FUNC_('100', 2, 10) FROM src LIMIT 1;\n" + + " '4'\n" + + " > SELECT _FUNC_(-10, 16, -10) FROM src LIMIT 1;\n" + " '16'") public class UDFConv extends UDF { - private Text result = new Text(); - private byte[] value = new byte[64]; - + private final Text result = new Text(); + private final byte[] value = new byte[64]; + /** - * Divide x by m as if x is an unsigned 64-bit integer. - * Examples: - * unsignedLongDiv(-1, 2) == Long.MAX_VALUE - * unsignedLongDiv(6, 3) == 2 - * unsignedLongDiv(0, 5) == 0 - * - * @param x is treated as unsigned - * @param m is treated as signed + * Divide x by m as if x is an unsigned 64-bit integer. Examples: + * unsignedLongDiv(-1, 2) == Long.MAX_VALUE unsignedLongDiv(6, 3) == 2 + * unsignedLongDiv(0, 5) == 0 + * + * @param x + * is treated as unsigned + * @param m + * is treated as signed */ private long unsignedLongDiv(long x, int m) { - if(x >= 0) { + if (x >= 0) { return x / m; } - + // Let uval be the value of the unsigned long with the same bits as x // Two's complement => x = uval - 2*MAX - 2 // => uval = x + 2*MAX + 2 // Now, use the fact: (a+b)/c = a/c + b/c + (a%c+b%c)/c - return x/m + 2*(Long.MAX_VALUE/m) + 2/m - + (x%m + 2*(Long.MAX_VALUE%m) + 2%m) / m; + return x / m + 2 * (Long.MAX_VALUE / m) + 2 / m + + (x % m + 2 * (Long.MAX_VALUE % m) + 2 % m) / m; } - + /** * Decode val into value[] * - * @param val is treated as an unsigned 64-bit integer - * @param radix must be between MIN_RADIX and MAX_RADIX + * @param val + * is treated as an unsigned 64-bit integer + * @param radix + * must be between MIN_RADIX and MAX_RADIX */ private void decode(long val, int radix) { - Arrays.fill(value, (byte)0); - for (int i = value.length-1; val != 0; i--) { + Arrays.fill(value, (byte) 0); + for (int i = value.length - 1; val != 0; i--) { long q = unsignedLongDiv(val, radix); - value[i] = (byte)(val - q*radix); + value[i] = (byte) (val - q * radix); val = q; } } - + /** * Convert value[] into a long. On overflow, return -1 (as mySQL does). If a * negative digit is found, ignore the suffix starting there. * - * @param radix must be between MIN_RADIX and MAX_RADIX + * @param radix + * must be between MIN_RADIX and MAX_RADIX * @return the result should be treated as an unsigned 64-bit integer. */ private long encode(int radix) { long val = 0; - long bound = unsignedLongDiv(-1-radix, radix); // Possible overflow once val - // exceeds this value - for(int i = 0; i=0; i++) { - if(val >= bound) { + long bound = unsignedLongDiv(-1 - radix, radix); // Possible overflow once + // val + // exceeds this value + for (int i = 0; i < value.length && value[i] >= 0; i++) { + if (val >= bound) { // Check for overflow - if(unsignedLongDiv(-1-value[i], radix) < val) { + if (unsignedLongDiv(-1 - value[i], radix) < val) { return -1; } } - val = val*radix + value[i]; + val = val * radix + value[i]; } return val; } - + /** * Convert the bytes in value[] to the corresponding chars. * - * @param radix must be between MIN_RADIX and MAX_RADIX - * @param fromPos is the first nonzero element + * @param radix + * must be between MIN_RADIX and MAX_RADIX + * @param fromPos + * is the first nonzero element */ - private void byte2char(int radix, int fromPos) - { - for(int i=fromPos; i < value.length; i++) { - value[i] = (byte)Character.toUpperCase( - Character.forDigit(value[i], radix)); + private void byte2char(int radix, int fromPos) { + for (int i = fromPos; i < value.length; i++) { + value[i] = (byte) Character.toUpperCase(Character.forDigit(value[i], + radix)); } } - + /** * Convert the chars in value[] to the corresponding integers. Convert invalid * characters to -1. * - * @param radix must be between MIN_RADIX and MAX_RADIX - * @param fromPos is the first nonzero element + * @param radix + * must be between MIN_RADIX and MAX_RADIX + * @param fromPos + * is the first nonzero element */ - private void char2byte(int radix, int fromPos) - { - for(int i=fromPos; i0 the result is + * Convert numbers between different number bases. If toBase>0 the result is * unsigned, otherwise it is signed. * */ - public Text evaluate(Text n, IntWritable fromBase, IntWritable toBase) - { + public Text evaluate(Text n, IntWritable fromBase, IntWritable toBase) { if (n == null || fromBase == null || toBase == null) { return null; } - + int fromBs = fromBase.get(); int toBs = toBase.get(); - if(fromBs < Character.MIN_RADIX || fromBs > Character.MAX_RADIX - || Math.abs(toBs) < Character.MIN_RADIX - || Math.abs(toBs) > Character.MAX_RADIX ) { + if (fromBs < Character.MIN_RADIX || fromBs > Character.MAX_RADIX + || Math.abs(toBs) < Character.MIN_RADIX + || Math.abs(toBs) > Character.MAX_RADIX) { return null; } - + byte[] num = n.getBytes(); - boolean negative = (num[0]=='-'); + boolean negative = (num[0] == '-'); int first = 0; - if(negative) { + if (negative) { first = 1; } - + // Copy the digits in the right side of the array - for(int i = 1; i <= n.getLength()-first; i++) { + for (int i = 1; i <= n.getLength() - first; i++) { value[value.length - i] = num[n.getLength() - i]; } char2byte(fromBs, value.length - n.getLength() + first); - + // Do the conversion by going through a 64 bit integer long val = encode(fromBs); - if(negative && toBs > 0) { - if(val < 0) { + if (negative && toBs > 0) { + if (val < 0) { val = -1; } else { val = -val; } } - if(toBs < 0 && val <0 ) { + if (toBs < 0 && val < 0) { val = -val; negative = true; } decode(val, Math.abs(toBs)); - + // Find the first non-zero digit or the last digits if all are zero. - for(first=0; first SELECT _FUNC_(13) FROM src LIMIT 1\n" + - " '1101'" - ) -public class UDFBin extends UDF { - private Text result = new Text(); +@description(name = "bin", value = "_FUNC_(n) - returns n in binary", extended = "n is a BIGINT. Returns NULL if n is NULL.\n" + + "Example:\n" + " > SELECT _FUNC_(13) FROM src LIMIT 1\n" + " '1101'") +public class UDFBin extends UDF { + private final Text result = new Text(); byte[] value = new byte[64]; - + public Text evaluate(LongWritable n) { if (n == null) { return null; } - + long num = n.get(); // Extract the bits of num into value[] from right to left int len = 0; do { len++; - value[value.length-len] = (byte)('0' + (num & 1)); + value[value.length - len] = (byte) ('0' + (num & 1)); num >>>= 1; - } while(num != 0); - - result.set(value, value.length-len, len); + } while (num != 0); + + result.set(value, value.length - len, len); return result; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPEqualOrLessThan.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPEqualOrLessThan.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPEqualOrLessThan.java (working copy) @@ -31,21 +31,20 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -@description( - name = "<=", - value = "a _FUNC_ b - Returns TRUE if b is not greater than a" -) +@description(name = "<=", value = "a _FUNC_ b - Returns TRUE if b is not greater than a") public class UDFOPEqualOrLessThan extends UDFBaseCompare { - private static Log LOG = LogFactory.getLog(UDFOPEqualOrLessThan.class.getName()); + private static Log LOG = LogFactory.getLog(UDFOPEqualOrLessThan.class + .getName()); BooleanWritable resultCache; + public UDFOPEqualOrLessThan() { resultCache = new BooleanWritable(); } - public BooleanWritable evaluate(Text a, Text b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(Text a, Text b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -55,8 +54,8 @@ return r; } - public BooleanWritable evaluate(ByteWritable a, ByteWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(ByteWritable a, ByteWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -66,8 +65,8 @@ return r; } - public BooleanWritable evaluate(ShortWritable a, ShortWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(ShortWritable a, ShortWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -77,8 +76,8 @@ return r; } - public BooleanWritable evaluate(IntWritable a, IntWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(IntWritable a, IntWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -87,9 +86,9 @@ // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; } - - public BooleanWritable evaluate(LongWritable a, LongWritable b) { - BooleanWritable r = this.resultCache; + + public BooleanWritable evaluate(LongWritable a, LongWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -98,9 +97,9 @@ // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; } - - public BooleanWritable evaluate(FloatWritable a, FloatWritable b) { - BooleanWritable r = this.resultCache; + + public BooleanWritable evaluate(FloatWritable a, FloatWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -108,10 +107,11 @@ } // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; - } + } - public BooleanWritable evaluate(DoubleWritable a, DoubleWritable b) { - BooleanWritable r = this.resultCache; + @Override + public BooleanWritable evaluate(DoubleWritable a, DoubleWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java (working copy) @@ -31,33 +31,34 @@ import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; - public class UDFToDouble extends UDF { private static Log LOG = LogFactory.getLog(UDFToDouble.class.getName()); DoubleWritable doubleWritable = new DoubleWritable(); - + public UDFToDouble() { } /** * Convert from void to a double. This is called for CAST(... AS DOUBLE) - * - * @param i The void value to convert + * + * @param i + * The void value to convert * @return DoubleWritable */ - public DoubleWritable evaluate(NullWritable i) { + public DoubleWritable evaluate(NullWritable i) { return null; } /** * Convert from boolean to a double. This is called for CAST(... AS DOUBLE) - * - * @param i The boolean value to convert + * + * @param i + * The boolean value to convert * @return DoubleWritable */ - public DoubleWritable evaluate(BooleanWritable i) { + public DoubleWritable evaluate(BooleanWritable i) { if (i == null) { return null; } else { @@ -68,11 +69,12 @@ /** * Convert from boolean to a double. This is called for CAST(... AS DOUBLE) - * - * @param i The byte value to convert + * + * @param i + * The byte value to convert * @return DoubleWritable */ - public DoubleWritable evaluate(ByteWritable i) { + public DoubleWritable evaluate(ByteWritable i) { if (i == null) { return null; } else { @@ -80,14 +82,15 @@ return doubleWritable; } } - + /** * Convert from short to a double. This is called for CAST(... AS DOUBLE) - * - * @param i The short value to convert + * + * @param i + * The short value to convert * @return DoubleWritable */ - public DoubleWritable evaluate(ShortWritable i) { + public DoubleWritable evaluate(ShortWritable i) { if (i == null) { return null; } else { @@ -95,14 +98,15 @@ return doubleWritable; } } - + /** * Convert from integer to a double. This is called for CAST(... AS DOUBLE) - * - * @param i The integer value to convert + * + * @param i + * The integer value to convert * @return DoubleWritable */ - public DoubleWritable evaluate(IntWritable i) { + public DoubleWritable evaluate(IntWritable i) { if (i == null) { return null; } else { @@ -110,14 +114,15 @@ return doubleWritable; } } - + /** * Convert from long to a double. This is called for CAST(... AS DOUBLE) - * - * @param i The long value to convert + * + * @param i + * The long value to convert * @return DoubleWritable */ - public DoubleWritable evaluate(LongWritable i) { + public DoubleWritable evaluate(LongWritable i) { if (i == null) { return null; } else { @@ -125,14 +130,15 @@ return doubleWritable; } } - + /** * Convert from float to a double. This is called for CAST(... AS DOUBLE) - * - * @param i The float value to convert + * + * @param i + * The float value to convert * @return DoubleWritable */ - public DoubleWritable evaluate(FloatWritable i) { + public DoubleWritable evaluate(FloatWritable i) { if (i == null) { return null; } else { @@ -140,14 +146,15 @@ return doubleWritable; } } - + /** * Convert from string to a double. This is called for CAST(... AS DOUBLE) - * - * @param i The string value to convert + * + * @param i + * The string value to convert * @return DoubleWritable */ - public DoubleWritable evaluate(Text i) { + public DoubleWritable evaluate(Text i) { if (i == null) { return null; } else { @@ -161,6 +168,5 @@ } } } - - + } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDate.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDate.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDate.java (working copy) @@ -20,7 +20,6 @@ import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.Calendar; import java.util.Date; import org.apache.commons.logging.Log; @@ -29,36 +28,35 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.io.Text; -@description( - name = "to_date", - value = "_FUNC_(expr) - Extracts the date part of the date or datetime " + - "expression expr", - extended = "Example:\n " + - " > SELECT _FUNC_('2009-30-07 04:17:52') FROM src LIMIT 1;\n" + - " '2009-30-07'" - ) +@description(name = "to_date", value = "_FUNC_(expr) - Extracts the date part of the date or datetime " + + "expression expr", extended = "Example:\n " + + " > SELECT _FUNC_('2009-30-07 04:17:52') FROM src LIMIT 1;\n" + + " '2009-30-07'") public class UDFDate extends UDF { private static Log LOG = LogFactory.getLog(UDFDate.class.getName()); - private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); Text t = new Text(); + public UDFDate() { } /** * Get the date part of a date time string. * - * @param dateString the date string in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd". + * @param dateString + * the date string in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy-MM-dd". * @return the date in the format of "yyyy-MM-dd". */ - public Text evaluate(Text dateString) { - + public Text evaluate(Text dateString) { + if (dateString == null) { return null; } - + try { Date date = formatter.parse(dateString.toString()); t.set(formatter.format(date)); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPBitOr.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPBitOr.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPBitOr.java (working copy) @@ -26,16 +26,12 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; -@description( - name = "|", - value = "a _FUNC_ b - Bitwise or", - extended = "Example:\n" + - " > SELECT 3 _FUNC_ 5 FROM src LIMIT 1;\n" + - " 7" -) +@description(name = "|", value = "a _FUNC_ b - Bitwise or", extended = "Example:\n" + + " > SELECT 3 _FUNC_ 5 FROM src LIMIT 1;\n" + " 7") public class UDFOPBitOr extends UDFBaseBitOP { - private static Log LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.udf.UDFOPBitOr"); + private static Log LOG = LogFactory + .getLog("org.apache.hadoop.hive.ql.udf.UDFOPBitOr"); public UDFOPBitOr() { } @@ -44,7 +40,7 @@ if (a == null || b == null) { return null; } - byteWritable.set((byte)(a.get() | b.get())); + byteWritable.set((byte) (a.get() | b.get())); return byteWritable; } @@ -52,10 +48,10 @@ if (a == null || b == null) { return null; } - shortWritable.set((short)(a.get() | b.get())); + shortWritable.set((short) (a.get() | b.get())); return shortWritable; } - + public IntWritable evaluate(IntWritable a, IntWritable b) { if (a == null || b == null) { return null; @@ -72,5 +68,4 @@ return longWritable; } - } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMultiply.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMultiply.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMultiply.java (working copy) @@ -28,72 +28,82 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; -@description( - name = "*", - value = "a _FUNC_ b - Multiplies a by b" -) +@description(name = "*", value = "a _FUNC_ b - Multiplies a by b") public class UDFOPMultiply extends UDFBaseNumericOp { - private static Log LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.udf.UDFOPMultiply"); + private static Log LOG = LogFactory + .getLog("org.apache.hadoop.hive.ql.udf.UDFOPMultiply"); public UDFOPMultiply() { } @Override - public ByteWritable evaluate(ByteWritable a, ByteWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public ByteWritable evaluate(ByteWritable a, ByteWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - byteWritable.set((byte)(a.get() * b.get())); + byteWritable.set((byte) (a.get() * b.get())); return byteWritable; } @Override - public ShortWritable evaluate(ShortWritable a, ShortWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public ShortWritable evaluate(ShortWritable a, ShortWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - shortWritable.set((short)(a.get() * b.get())); + shortWritable.set((short) (a.get() * b.get())); return shortWritable; } @Override - public IntWritable evaluate(IntWritable a, IntWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public IntWritable evaluate(IntWritable a, IntWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - intWritable.set((int)(a.get() * b.get())); + intWritable.set((a.get() * b.get())); return intWritable; } @Override - public LongWritable evaluate(LongWritable a, LongWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public LongWritable evaluate(LongWritable a, LongWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } longWritable.set(a.get() * b.get()); return longWritable; } @Override - public FloatWritable evaluate(FloatWritable a, FloatWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public FloatWritable evaluate(FloatWritable a, FloatWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } floatWritable.set(a.get() * b.get()); return floatWritable; } - + @Override - public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } doubleWritable.set(a.get() * b.get()); return doubleWritable; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java (working copy) @@ -23,33 +23,30 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@description( - name = "substr,substring", - value = "_FUNC_(str, pos[, len]) - returns the substring of str that" + - " starts at pos and is of length len", - extended = "pos is a 1-based index. If pos<0 the starting position is" + - " determined by counting backwards from the end of str.\n" + - "Example:\n " + - " > SELECT _FUNC_('Facebook', 5) FROM src LIMIT 1;\n" + - " 'book'\n" + - " > SELECT _FUNC_('Facebook', -5) FROM src LIMIT 1;\n" + - " 'ebook'\n" + - " > SELECT _FUNC_('Facebook', 5, 1) FROM src LIMIT 1;\n" + - " 'b'" - ) +@description(name = "substr,substring", value = "_FUNC_(str, pos[, len]) - returns the substring of str that" + + " starts at pos and is of length len", extended = "pos is a 1-based index. If pos<0 the starting position is" + + " determined by counting backwards from the end of str.\n" + + "Example:\n " + + " > SELECT _FUNC_('Facebook', 5) FROM src LIMIT 1;\n" + + " 'book'\n" + + " > SELECT _FUNC_('Facebook', -5) FROM src LIMIT 1;\n" + + " 'ebook'\n" + + " > SELECT _FUNC_('Facebook', 5, 1) FROM src LIMIT 1;\n" + + " 'b'") public class UDFSubstr extends UDF { Text r; + public UDFSubstr() { r = new Text(); } - - public Text evaluate(Text t, IntWritable pos, IntWritable len) { - + + public Text evaluate(Text t, IntWritable pos, IntWritable len) { + if ((t == null) || (pos == null) || (len == null)) { return null; } - + r.clear(); if ((len.get() <= 0)) { return r; @@ -59,7 +56,7 @@ if ((Math.abs(pos.get()) > s.length())) { return r; } - + int start, end; if (pos.get() > 0) { @@ -69,20 +66,20 @@ } else { start = 0; } - + if ((s.length() - start) < len.get()) { end = s.length(); } else { end = start + len.get(); } - + r.set(s.substring(start, end)); return r; } IntWritable maxValue = new IntWritable(Integer.MAX_VALUE); - - public Text evaluate(Text s, IntWritable pos) { + + public Text evaluate(Text s, IntWritable pos) { return evaluate(s, pos, maxValue); } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCos.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCos.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCos.java (working copy) @@ -24,26 +24,21 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -@description( - name = "cos", - value = "_FUNC_(x) - returns the cosine of x (x is in radians)", - extended = "Example:\n " + - " > SELECT _FUNC_(0) FROM src LIMIT 1;\n" + - " 1" - ) +@description(name = "cos", value = "_FUNC_(x) - returns the cosine of x (x is in radians)", extended = "Example:\n " + + " > SELECT _FUNC_(0) FROM src LIMIT 1;\n" + " 1") public class UDFCos extends UDF { private static Log LOG = LogFactory.getLog(UDFCos.class.getName()); DoubleWritable result = new DoubleWritable(); - + public UDFCos() { } /** * Take Cosine of a */ - public DoubleWritable evaluate(DoubleWritable a) { + public DoubleWritable evaluate(DoubleWritable a) { if (a == null) { return null; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHex.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHex.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHex.java (working copy) @@ -23,22 +23,19 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -@description( - name = "hex", - value = "_FUNC_(n or str) - Convert the argument to hexadecimal ", - extended = "If the argument is a string, returns two hex digits for each " + - "character in the string.\n" + - "If the argument is a number, returns the hexadecimal representation.\n" + - "Example:\n" + - " > SELECT _FUNC_(17) FROM src LIMIT 1;\n" + - " 'H1'\n" + - " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + - " '46616365626F6F6B'" - ) -public class UDFHex extends UDF { - private Text result = new Text(); + +@description(name = "hex", value = "_FUNC_(n or str) - Convert the argument to hexadecimal ", extended = "If the argument is a string, returns two hex digits for each " + + "character in the string.\n" + + "If the argument is a number, returns the hexadecimal representation.\n" + + "Example:\n" + + " > SELECT _FUNC_(17) FROM src LIMIT 1;\n" + + " 'H1'\n" + + " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + + " '46616365626F6F6B'") +public class UDFHex extends UDF { + private final Text result = new Text(); byte[] value = new byte[16]; - + /** * Convert num to hex. * @@ -48,29 +45,29 @@ int len = 0; do { len++; - value[value.length-len] = (byte)Character.toUpperCase( - Character.forDigit((int)(num & 0xF), 16)); + value[value.length - len] = (byte) Character.toUpperCase(Character + .forDigit((int) (num & 0xF), 16)); num >>>= 4; - } while(num != 0); - - result.set(value, value.length-len, len); + } while (num != 0); + + result.set(value, value.length - len, len); return result; } - + public Text evaluate(LongWritable n) { if (n == null) { return null; } - return evaluate((long)n.get()); - } - + return evaluate(n.get()); + } + public Text evaluate(IntWritable n) { if (n == null) { return null; } return evaluate(n.get()); } - + /** * Convert every character in s to two hex digits. * @@ -79,20 +76,20 @@ if (s == null) { return null; } - - if(value.length < s.getLength()*2) { - value = new byte[s.getLength()*2]; + + if (value.length < s.getLength() * 2) { + value = new byte[s.getLength() * 2]; } - + byte[] str = s.getBytes(); - for(int i = 0; i < s.getLength(); i++) { - value[i*2] = (byte)Character.toUpperCase( - Character.forDigit((str[i]&0xF0)>>>4, 16)); - value[i*2 + 1] = (byte)Character.toUpperCase( - Character.forDigit(str[i]&0x0F, 16)); + for (int i = 0; i < s.getLength(); i++) { + value[i * 2] = (byte) Character.toUpperCase(Character.forDigit( + (str[i] & 0xF0) >>> 4, 16)); + value[i * 2 + 1] = (byte) Character.toUpperCase(Character.forDigit( + str[i] & 0x0F, 16)); } - - result.set(value, 0, s.getLength()*2); + + result.set(value, 0, s.getLength() * 2); return result; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRound.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRound.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRound.java (working copy) @@ -27,18 +27,13 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; -@description( - name = "round", - value = "_FUNC_(x[, d]) - round x to d decimal places", - extended = "Example:\n" + - " > SELECT _FUNC_(12.3456, 1) FROM src LIMIT 1;\n" + - " 12.3'" - ) +@description(name = "round", value = "_FUNC_(x[, d]) - round x to d decimal places", extended = "Example:\n" + + " > SELECT _FUNC_(12.3456, 1) FROM src LIMIT 1;\n" + " 12.3'") public class UDFRound extends UDF { DoubleWritable doubleWritable = new DoubleWritable(); LongWritable longWritable = new LongWritable(); - + public UDFRound() { } @@ -46,7 +41,8 @@ if (n == null) { return null; } - longWritable.set(BigDecimal.valueOf(n.get()).setScale(0, RoundingMode.HALF_UP).longValue()); + longWritable.set(BigDecimal.valueOf(n.get()).setScale(0, + RoundingMode.HALF_UP).longValue()); return longWritable; } @@ -54,8 +50,9 @@ if ((n == null) || (i == null)) { return null; } - doubleWritable.set(BigDecimal.valueOf(n.get()).setScale(i.get(), RoundingMode.HALF_UP).doubleValue()); + doubleWritable.set(BigDecimal.valueOf(n.get()).setScale(i.get(), + RoundingMode.HALF_UP).doubleValue()); return doubleWritable; } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLower.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLower.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLower.java (working copy) @@ -22,19 +22,12 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.io.Text; -import java.util.regex.Pattern; -import java.util.regex.Matcher; - -@description( - name = "lower,lcase", - value = "_FUNC_(str) - Returns str with all characters changed to lowercase", - extended = "Example:\n" + - " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + - " 'facebook'" - ) +@description(name = "lower,lcase", value = "_FUNC_(str) - Returns str with all characters changed to lowercase", extended = "Example:\n" + + " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + " 'facebook'") public class UDFLower extends UDF { Text t = new Text(); + public UDFLower() { } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSqrt.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSqrt.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSqrt.java (working copy) @@ -24,30 +24,24 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.DoubleWritable; - /** * Implementation of the SQRT UDF found in many databases. */ -@description( - name = "sqrt", - value = "_FUNC_(x) - returns the square root of x", - extended = "Example:\n " + - " > SELECT _FUNC_(4) FROM src LIMIT 1;\n" + - " 2" - ) +@description(name = "sqrt", value = "_FUNC_(x) - returns the square root of x", extended = "Example:\n " + + " > SELECT _FUNC_(4) FROM src LIMIT 1;\n" + " 2") public class UDFSqrt extends UDF { private static Log LOG = LogFactory.getLog(UDFSqrt.class.getName()); DoubleWritable result = new DoubleWritable(); + public UDFSqrt() { } /** - * Return NULL for NULL or negative inputs; otherwise, return - * the square root. + * Return NULL for NULL or negative inputs; otherwise, return the square root. */ - public DoubleWritable evaluate(DoubleWritable i) { + public DoubleWritable evaluate(DoubleWritable i) { if (i == null) { return null; } else if (i.get() < 0) { @@ -57,5 +51,5 @@ return result; } } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java (working copy) @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.udf; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.UDF; @@ -25,26 +28,19 @@ import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.Text; -import java.util.regex.Pattern; -import java.util.regex.Matcher; - -@description( - name = "rlike,regexp", - value = "str _FUNC_ regexp - Returns true if str matches regexp and " + - "false otherwise", - extended = "Example:\n" + - " > SELECT 'fb' _FUNC_ '.*' FROM src LIMIT 1;\n" + - " true" - ) +@description(name = "rlike,regexp", value = "str _FUNC_ regexp - Returns true if str matches regexp and " + + "false otherwise", extended = "Example:\n" + + " > SELECT 'fb' _FUNC_ '.*' FROM src LIMIT 1;\n" + " true") public class UDFRegExp extends UDF { static final Log LOG = LogFactory.getLog(UDFRegExp.class.getName()); - - private Text lastRegex = new Text(); + + private final Text lastRegex = new Text(); private Pattern p = null; boolean warned = false; BooleanWritable result = new BooleanWritable(); + public UDFRegExp() { } @@ -52,11 +48,11 @@ if (s == null || regex == null) { return null; } - if(regex.getLength()==0) { - if(!warned) { + if (regex.getLength() == 0) { + if (!warned) { warned = true; - LOG.warn(getClass().getSimpleName() + " regex is empty. Additional " + - "warnings for an empty regex will be suppressed."); + LOG.warn(getClass().getSimpleName() + " regex is empty. Additional " + + "warnings for an empty regex will be suppressed."); } result.set(false); return result; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUpper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUpper.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUpper.java (working copy) @@ -22,16 +22,12 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.io.Text; -@description( - name = "upper,ucase", - value = "_FUNC_(str) - Returns str with all characters changed to uppercase", - extended = "Example:\n" + - " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + - " 'FACEBOOK'" - ) +@description(name = "upper,ucase", value = "_FUNC_(str) - Returns str with all characters changed to uppercase", extended = "Example:\n" + + " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + " 'FACEBOOK'") public class UDFUpper extends UDF { Text t = new Text(); + public UDFUpper() { } @@ -42,5 +38,5 @@ t.set(s.toString().toUpperCase()); return t; } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPower.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPower.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPower.java (working copy) @@ -24,25 +24,21 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -@description( - name = "power,pow", - value = "_FUNC_(x1, x2) - raise x1 to the power of x2", - extended = "Example:\n" + - " > SELECT _FUNC_(2, 3) FROM src LIMIT 1;\n" + - " 8" - ) +@description(name = "power,pow", value = "_FUNC_(x1, x2) - raise x1 to the power of x2", extended = "Example:\n" + + " > SELECT _FUNC_(2, 3) FROM src LIMIT 1;\n" + " 8") public class UDFPower extends UDF { private static Log LOG = LogFactory.getLog(UDFPower.class.getName()); DoubleWritable result = new DoubleWritable(); + public UDFPower() { } /** - * Raise a to the power of b. + * Raise a to the power of b. */ - public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { + public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { if (a == null || b == null) { return null; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPEqualOrGreaterThan.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPEqualOrGreaterThan.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPEqualOrGreaterThan.java (working copy) @@ -31,21 +31,20 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -@description( - name = ">=", - value = "a _FUNC_ b - Returns TRUE if b is not smaller than a" -) +@description(name = ">=", value = "a _FUNC_ b - Returns TRUE if b is not smaller than a") public class UDFOPEqualOrGreaterThan extends UDFBaseCompare { - private static Log LOG = LogFactory.getLog(UDFOPEqualOrGreaterThan.class.getName()); + private static Log LOG = LogFactory.getLog(UDFOPEqualOrGreaterThan.class + .getName()); BooleanWritable resultCache; + public UDFOPEqualOrGreaterThan() { resultCache = new BooleanWritable(); } - public BooleanWritable evaluate(Text a, Text b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(Text a, Text b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -55,8 +54,8 @@ return r; } - public BooleanWritable evaluate(ByteWritable a, ByteWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(ByteWritable a, ByteWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -66,8 +65,8 @@ return r; } - public BooleanWritable evaluate(ShortWritable a, ShortWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(ShortWritable a, ShortWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -77,8 +76,8 @@ return r; } - public BooleanWritable evaluate(IntWritable a, IntWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(IntWritable a, IntWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -87,9 +86,9 @@ // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; } - - public BooleanWritable evaluate(LongWritable a, LongWritable b) { - BooleanWritable r = this.resultCache; + + public BooleanWritable evaluate(LongWritable a, LongWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -98,9 +97,9 @@ // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; } - - public BooleanWritable evaluate(FloatWritable a, FloatWritable b) { - BooleanWritable r = this.resultCache; + + public BooleanWritable evaluate(FloatWritable a, FloatWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -108,10 +107,11 @@ } // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; - } + } - public BooleanWritable evaluate(DoubleWritable a, DoubleWritable b) { - BooleanWritable r = this.resultCache; + @Override + public BooleanWritable evaluate(DoubleWritable a, DoubleWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericOp.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericOp.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericOp.java (working copy) @@ -28,15 +28,14 @@ import org.apache.hadoop.io.LongWritable; /** - * Base class for numeric operators like +, -, / etc. All these operators - * share a common method resolver (NumericOpMethodResolver). + * Base class for numeric operators like +, -, / etc. All these operators share + * a common method resolver (NumericOpMethodResolver). */ public abstract class UDFBaseNumericOp extends UDF { /** - * Constructor. - * This constructor sets the resolver to be used for comparison operators. - * See {@link org.apache.hadoop.hive.ql.exec.UDFMethodResolver} + * Constructor. This constructor sets the resolver to be used for comparison + * operators. See {@link org.apache.hadoop.hive.ql.exec.UDFMethodResolver} */ public UDFBaseNumericOp() { super(null); @@ -49,12 +48,17 @@ protected LongWritable longWritable = new LongWritable(); protected FloatWritable floatWritable = new FloatWritable(); protected DoubleWritable doubleWritable = new DoubleWritable(); - - public abstract ByteWritable evaluate(ByteWritable a, ByteWritable b); - public abstract ShortWritable evaluate(ShortWritable a, ShortWritable b); - public abstract IntWritable evaluate(IntWritable a, IntWritable b); - public abstract LongWritable evaluate(LongWritable a, LongWritable b); - public abstract FloatWritable evaluate(FloatWritable a, FloatWritable b); - public abstract DoubleWritable evaluate(DoubleWritable a, DoubleWritable b); + public abstract ByteWritable evaluate(ByteWritable a, ByteWritable b); + + public abstract ShortWritable evaluate(ShortWritable a, ShortWritable b); + + public abstract IntWritable evaluate(IntWritable a, IntWritable b); + + public abstract LongWritable evaluate(LongWritable a, LongWritable b); + + public abstract FloatWritable evaluate(FloatWritable a, FloatWritable b); + + public abstract DoubleWritable evaluate(DoubleWritable a, DoubleWritable b); + } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPBitNot.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPBitNot.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPBitNot.java (working copy) @@ -20,20 +20,14 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; -@description( - name = "~", - value = "_FUNC_ n - Bitwise not", - extended = "Example:\n" + - " > SELECT _FUNC_ 0 FROM src LIMIT 1;\n" + - " -1" -) +@description(name = "~", value = "_FUNC_ n - Bitwise not", extended = "Example:\n" + + " > SELECT _FUNC_ 0 FROM src LIMIT 1;\n" + " -1") public class UDFOPBitNot extends UDFBaseBitOP { private static Log LOG = LogFactory.getLog(UDFOPBitNot.class.getName()); @@ -45,7 +39,7 @@ if (a == null) { return null; } - byteWritable.set((byte)(~a.get())); + byteWritable.set((byte) (~a.get())); return byteWritable; } @@ -53,10 +47,10 @@ if (a == null) { return null; } - shortWritable.set((short)(~a.get())); + shortWritable.set((short) (~a.get())); return shortWritable; } - + public IntWritable evaluate(IntWritable a) { if (a == null) { return null; @@ -73,5 +67,4 @@ return longWritable; } - } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNot.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNot.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNot.java (working copy) @@ -24,20 +24,19 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.io.BooleanWritable; -@description( - name = "not,!", - value = "_FUNC_ a - Logical not" -) +@description(name = "not,!", value = "_FUNC_ a - Logical not") public class UDFOPNot extends UDF { - private static Log LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.udf.UDFOPNot"); + private static Log LOG = LogFactory + .getLog("org.apache.hadoop.hive.ql.udf.UDFOPNot"); BooleanWritable result = new BooleanWritable(); + public UDFOPNot() { } // Three-value Boolean: NULL stands for unknown - public BooleanWritable evaluate(BooleanWritable a) { + public BooleanWritable evaluate(BooleanWritable a) { if (a == null) { return null; } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPosMod.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPosMod.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPosMod.java (working copy) @@ -28,80 +28,87 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; - /** - * class for computing positive modulo. - * Used for positive_mod command in Cli - * See {org.apache.hadoop.hive.ql.udf.UDFOPMod} - * See {org.apache.hadoop.hive.ql.exec.FunctionRegistry} + * class for computing positive modulo. Used for positive_mod command in Cli See + * {org.apache.hadoop.hive.ql.udf.UDFOPMod} See + * {org.apache.hadoop.hive.ql.exec.FunctionRegistry} */ -@description( - name = "pmod", - value = "a _FUNC_ b - Compute the positive modulo" -) +@description(name = "pmod", value = "a _FUNC_ b - Compute the positive modulo") public class UDFPosMod extends UDFBaseNumericOp { - private static Log LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.udf.UDFPosMod"); + private static Log LOG = LogFactory + .getLog("org.apache.hadoop.hive.ql.udf.UDFPosMod"); public UDFPosMod() { } - @Override - public ByteWritable evaluate(ByteWritable a, ByteWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public ByteWritable evaluate(ByteWritable a, ByteWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - byteWritable.set((byte)(((a.get() % b.get()) + b.get()) % b.get())); + byteWritable.set((byte) (((a.get() % b.get()) + b.get()) % b.get())); return byteWritable; } @Override - public ShortWritable evaluate(ShortWritable a, ShortWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public ShortWritable evaluate(ShortWritable a, ShortWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - shortWritable.set((short)(((a.get() % b.get()) + b.get()) % b.get())); + shortWritable.set((short) (((a.get() % b.get()) + b.get()) % b.get())); return shortWritable; } @Override - public IntWritable evaluate(IntWritable a, IntWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public IntWritable evaluate(IntWritable a, IntWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - intWritable.set((int)(((a.get() % b.get()) + b.get()) % b.get())); + intWritable.set((((a.get() % b.get()) + b.get()) % b.get())); return intWritable; } @Override - public LongWritable evaluate(LongWritable a, LongWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public LongWritable evaluate(LongWritable a, LongWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } longWritable.set(((a.get() % b.get()) + b.get()) % b.get()); return longWritable; } @Override - public FloatWritable evaluate(FloatWritable a, FloatWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public FloatWritable evaluate(FloatWritable a, FloatWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } floatWritable.set(((a.get() % b.get()) + b.get()) % b.get()); return floatWritable; } - + @Override - public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } doubleWritable.set(((a.get() % b.get()) + b.get()) % b.get()); return doubleWritable; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java (working copy) @@ -30,41 +30,41 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@description( - name = "minute", - value = "_FUNC_(date) - Returns the minute of date", - extended = "date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or " + - "'HH:mm:ss'.\n" + - "Example:\n " + - " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + - " 58\n" + - " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + - " 58" - ) +@description(name = "minute", value = "_FUNC_(date) - Returns the minute of date", extended = "date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or " + + "'HH:mm:ss'.\n" + + "Example:\n " + + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + + " 58\n" + + " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + " 58") public class UDFMinute extends UDF { private static Log LOG = LogFactory.getLog(UDFMinute.class.getName()); - private SimpleDateFormat formatter1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - private SimpleDateFormat formatter2 = new SimpleDateFormat("HH:mm:ss"); - private Calendar calendar = Calendar.getInstance(); + private final SimpleDateFormat formatter1 = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + private final SimpleDateFormat formatter2 = new SimpleDateFormat("HH:mm:ss"); + private final Calendar calendar = Calendar.getInstance(); IntWritable result = new IntWritable(); + public UDFMinute() { } /** * Get the minute from a date string. * - * @param dateString the dateString in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd". - * @return an int from 0 to 59. null if the dateString is not a valid date string. + * @param dateString + * the dateString in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy-MM-dd". + * @return an int from 0 to 59. null if the dateString is not a valid date + * string. */ - public IntWritable evaluate(Text dateString) { - + public IntWritable evaluate(Text dateString) { + if (dateString == null) { return null; } - + try { Date date = null; try { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNegative.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNegative.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNegative.java (working copy) @@ -20,7 +20,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -28,12 +27,8 @@ import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.NullWritable; -@description( - name = "-", - value = "_FUNC_ a - Returns -a" -) +@description(name = "-", value = "_FUNC_ a - Returns -a") public class UDFOPNegative extends UDFBaseNumericUnaryOp { private static Log LOG = LogFactory.getLog(UDFOPNegative.class.getName()); @@ -43,25 +38,25 @@ @Override public ByteWritable evaluate(ByteWritable a) { - if ( a == null ) { + if (a == null) { return null; } - byteWritable.set((byte)-a.get()); + byteWritable.set((byte) -a.get()); return byteWritable; } @Override public ShortWritable evaluate(ShortWritable a) { - if ( a == null ) { + if (a == null) { return null; } - shortWritable.set((short)-a.get()); + shortWritable.set((short) -a.get()); return shortWritable; } @Override public IntWritable evaluate(IntWritable a) { - if ( a == null ) { + if (a == null) { return null; } intWritable.set(-a.get()); @@ -70,7 +65,7 @@ @Override public LongWritable evaluate(LongWritable a) { - if ( a == null ) { + if (a == null) { return null; } longWritable.set(-a.get()); @@ -79,7 +74,7 @@ @Override public FloatWritable evaluate(FloatWritable a) { - if ( a == null ) { + if (a == null) { return null; } floatWritable.set(-a.get()); @@ -88,12 +83,11 @@ @Override public DoubleWritable evaluate(DoubleWritable a) { - if ( a == null ) { + if (a == null) { return null; } doubleWritable.set(-a.get()); return doubleWritable; } - } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java (working copy) @@ -22,21 +22,10 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; -import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.hive.ql.exec.NumericOpMethodResolver; -@description( - name = "/", - value = "a _FUNC_ b - Divide a by b", - extended = "Example:\n" + - " > SELECT 3 _FUNC_ 2 FROM src LIMIT 1;\n" + - " 1.5" -) +@description(name = "/", value = "a _FUNC_ b - Divide a by b", extended = "Example:\n" + + " > SELECT 3 _FUNC_ 2 FROM src LIMIT 1;\n" + " 1.5") /** * Note that in SQL, the return type of divide is not necessarily the same * as the parameters. For example, 3 / 2 = 1.5, not 1. To follow SQL, we always @@ -44,16 +33,19 @@ */ public class UDFOPDivide extends UDF { - private static Log LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.udf.UDFOPDivide"); + private static Log LOG = LogFactory + .getLog("org.apache.hadoop.hive.ql.udf.UDFOPDivide"); protected DoubleWritable doubleWritable = new DoubleWritable(); - - public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + + public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - doubleWritable.set(a.get()/b.get()); + doubleWritable.set(a.get() / b.get()); return doubleWritable; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateDiff.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateDiff.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateDiff.java (working copy) @@ -29,51 +29,51 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@description( - name = "datediff", - value = "_FUNC_(date1, date2) - Returns the number of days between date1 " + - "and date2", - extended = "date1 and date2 are strings in the format " + - "'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. The time parts are ignored." + - "If date1 is earlier than date2, the result is negative.\n" + - "Example:\n " + - " > SELECT _FUNC_('2009-30-07', '2009-31-07') FROM src LIMIT 1;\n" + - " 1" - ) +@description(name = "datediff", value = "_FUNC_(date1, date2) - Returns the number of days between date1 " + + "and date2", extended = "date1 and date2 are strings in the format " + + "'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. The time parts are ignored." + + "If date1 is earlier than date2, the result is negative.\n" + + "Example:\n " + + " > SELECT _FUNC_('2009-30-07', '2009-31-07') FROM src LIMIT 1;\n" + + " 1") public class UDFDateDiff extends UDF { private static Log LOG = LogFactory.getLog(UDFDateDiff.class.getName()); - private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); IntWritable result = new IntWritable(); - + public UDFDateDiff() { formatter.setTimeZone(TimeZone.getTimeZone("UTC")); } /** - * Calculate the difference in the number of days. - * The time part of the string will be ignored. - * If dateString1 is earlier than dateString2, then the result can be negative. + * Calculate the difference in the number of days. The time part of the string + * will be ignored. If dateString1 is earlier than dateString2, then the + * result can be negative. * - * @param dateString1 the date string in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd". - * @param dateString2 the date string in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd". + * @param dateString1 + * the date string in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy-MM-dd". + * @param dateString2 + * the date string in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy-MM-dd". * @return the difference in days. */ - public IntWritable evaluate(Text dateString1, Text dateString2) { - + public IntWritable evaluate(Text dateString1, Text dateString2) { + if (dateString1 == null || dateString2 == null) { return null; } - + try { // NOTE: This implementation avoids the extra-second problem // by comparing with UTC epoch and integer division. - long diffInMilliSeconds = (formatter.parse(dateString1.toString()).getTime() - - formatter.parse(dateString2.toString()).getTime()); + long diffInMilliSeconds = (formatter.parse(dateString1.toString()) + .getTime() - formatter.parse(dateString2.toString()).getTime()); // 86400 is the number of seconds in a day - result.set((int)(diffInMilliSeconds / (86400 * 1000))); + result.set((int) (diffInMilliSeconds / (86400 * 1000))); return result; } catch (ParseException e) { return null; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPBitXor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPBitXor.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPBitXor.java (working copy) @@ -20,20 +20,14 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; -@description( - name = "^", - value = "a _FUNC_ b - Bitwise exclusive or", - extended = "Example:\n" + - " > SELECT 3 _FUNC_ 5 FROM src LIMIT 1;\n" + - " 2" -) +@description(name = "^", value = "a _FUNC_ b - Bitwise exclusive or", extended = "Example:\n" + + " > SELECT 3 _FUNC_ 5 FROM src LIMIT 1;\n" + " 2") public class UDFOPBitXor extends UDFBaseBitOP { private static Log LOG = LogFactory.getLog(UDFOPBitXor.class.getName()); @@ -45,7 +39,7 @@ if (a == null || b == null) { return null; } - byteWritable.set((byte)(a.get() ^ b.get())); + byteWritable.set((byte) (a.get() ^ b.get())); return byteWritable; } @@ -53,10 +47,10 @@ if (a == null || b == null) { return null; } - shortWritable.set((short)(a.get() ^ b.get())); + shortWritable.set((short) (a.get() ^ b.get())); return shortWritable; } - + public IntWritable evaluate(IntWritable a, IntWritable b) { if (a == null || b == null) { return null; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPEqual.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPEqual.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPEqual.java (working copy) @@ -31,25 +31,23 @@ import org.apache.hadoop.io.Text; /** - * The reason that we list evaluate methods with all numeric types is for - * better performance; otherwise a single method that takes (Number a, Number b) - * and use a.doubleValue() == b.doubleValue() is enough. + * The reason that we list evaluate methods with all numeric types is for better + * performance; otherwise a single method that takes (Number a, Number b) and + * use a.doubleValue() == b.doubleValue() is enough. */ -@description( - name = "=,==", - value= "a _FUNC_ b - Returns TRUE if a equals b and false otherwise" -) +@description(name = "=,==", value = "a _FUNC_ b - Returns TRUE if a equals b and false otherwise") public class UDFOPEqual extends UDFBaseCompare { private static Log LOG = LogFactory.getLog(UDFOPEqual.class.getName()); BooleanWritable resultCache; + public UDFOPEqual() { resultCache = new BooleanWritable(); } - public BooleanWritable evaluate(Text a, Text b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(Text a, Text b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -59,8 +57,8 @@ return r; } - public BooleanWritable evaluate(ByteWritable a, ByteWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(ByteWritable a, ByteWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -70,8 +68,8 @@ return r; } - public BooleanWritable evaluate(ShortWritable a, ShortWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(ShortWritable a, ShortWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -81,8 +79,8 @@ return r; } - public BooleanWritable evaluate(IntWritable a, IntWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(IntWritable a, IntWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -91,9 +89,9 @@ // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; } - - public BooleanWritable evaluate(LongWritable a, LongWritable b) { - BooleanWritable r = this.resultCache; + + public BooleanWritable evaluate(LongWritable a, LongWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -102,9 +100,9 @@ // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; } - - public BooleanWritable evaluate(FloatWritable a, FloatWritable b) { - BooleanWritable r = this.resultCache; + + public BooleanWritable evaluate(FloatWritable a, FloatWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -112,10 +110,11 @@ } // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; - } + } - public BooleanWritable evaluate(DoubleWritable a, DoubleWritable b) { - BooleanWritable r = this.resultCache; + @Override + public BooleanWritable evaluate(DoubleWritable a, DoubleWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFConcat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFConcat.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFConcat.java (working copy) @@ -22,14 +22,10 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.io.Text; -@description( - name = "concat", - value = "_FUNC_(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN", - extended = "Returns NULL if any argument is NULL.\n" + - "Example:\n" + - " > SELECT _FUNC_('abc', 'def') FROM src LIMIT 1;\n" + - " 'abcdef'" - ) +@description(name = "concat", value = "_FUNC_(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN", extended = "Returns NULL if any argument is NULL.\n" + + "Example:\n" + + " > SELECT _FUNC_('abc', 'def') FROM src LIMIT 1;\n" + + " 'abcdef'") public class UDFConcat extends UDF { public UDFConcat() { @@ -37,14 +33,13 @@ Text text = new Text(); - public Text evaluate(Text... args) { text.clear(); - for(int i=0; i SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + - " 59\n" + - " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + - " 59" - ) +@description(name = "second", value = "_FUNC_(date) - Returns the second of date", extended = "date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or " + + "'HH:mm:ss'.\n" + + "Example:\n " + + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + + " 59\n" + + " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + " 59") public class UDFSecond extends UDF { private static Log LOG = LogFactory.getLog(UDFSecond.class.getName()); - private SimpleDateFormat formatter1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - private SimpleDateFormat formatter2 = new SimpleDateFormat("HH:mm:ss"); - private Calendar calendar = Calendar.getInstance(); + private final SimpleDateFormat formatter1 = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + private final SimpleDateFormat formatter2 = new SimpleDateFormat("HH:mm:ss"); + private final Calendar calendar = Calendar.getInstance(); IntWritable result = new IntWritable(); + public UDFSecond() { } /** * Get the minute from a date string. * - * @param dateString the dateString in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd". - * @return an int from 0 to 59. null if the dateString is not a valid date string. + * @param dateString + * the dateString in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy-MM-dd". + * @return an int from 0 to 59. null if the dateString is not a valid date + * string. */ - public IntWritable evaluate(Text dateString) { - + public IntWritable evaluate(Text dateString) { + if (dateString == null) { return null; } - + try { Date date = null; try { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericUnaryOp.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericUnaryOp.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericUnaryOp.java (working copy) @@ -27,8 +27,8 @@ import org.apache.hadoop.io.LongWritable; /** - * Base class for numeric operators like +, -, / etc. All these operators - * share a common method resolver (NumericOpMethodResolver). + * Base class for numeric operators like +, -, / etc. All these operators share + * a common method resolver (NumericOpMethodResolver). */ public abstract class UDFBaseNumericUnaryOp extends UDF { @@ -45,12 +45,17 @@ protected LongWritable longWritable = new LongWritable(); protected FloatWritable floatWritable = new FloatWritable(); protected DoubleWritable doubleWritable = new DoubleWritable(); - - public abstract ByteWritable evaluate(ByteWritable a); - public abstract ShortWritable evaluate(ShortWritable a); - public abstract IntWritable evaluate(IntWritable a); - public abstract LongWritable evaluate(LongWritable a); - public abstract FloatWritable evaluate(FloatWritable a); - public abstract DoubleWritable evaluate(DoubleWritable a); + public abstract ByteWritable evaluate(ByteWritable a); + + public abstract ShortWritable evaluate(ShortWritable a); + + public abstract IntWritable evaluate(IntWritable a); + + public abstract LongWritable evaluate(LongWritable a); + + public abstract FloatWritable evaluate(FloatWritable a); + + public abstract DoubleWritable evaluate(DoubleWritable a); + } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCeil.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCeil.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCeil.java (working copy) @@ -25,30 +25,26 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.io.LongWritable; -@description( - name = "ceil,ceiling", - value = "_FUNC_(x) - Find the smallest integer not smaller than x", - extended = "Example:\n" + - " > SELECT _FUNC_(-0.1) FROM src LIMIT 1;\n" + - " 0\n" + - " > SELECT _FUNC_(5) FROM src LIMIT 1;\n" + - " 5" - ) +@description(name = "ceil,ceiling", value = "_FUNC_(x) - Find the smallest integer not smaller than x", extended = "Example:\n" + + " > SELECT _FUNC_(-0.1) FROM src LIMIT 1;\n" + + " 0\n" + + " > SELECT _FUNC_(5) FROM src LIMIT 1;\n" + " 5") public class UDFCeil extends UDF { private static Log LOG = LogFactory.getLog(UDFCeil.class.getName()); LongWritable longWritable = new LongWritable(); + public UDFCeil() { } - public LongWritable evaluate(DoubleWritable i) { + public LongWritable evaluate(DoubleWritable i) { if (i == null) { return null; } else { - longWritable.set((long)Math.ceil(i.get())); + longWritable.set((long) Math.ceil(i.get())); return longWritable; } } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMod.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMod.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMod.java (working copy) @@ -28,73 +28,82 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; -@description( - name = "%", - value = "a _FUNC_ b - Returns the remainder when dividing a by b" -) +@description(name = "%", value = "a _FUNC_ b - Returns the remainder when dividing a by b") public class UDFOPMod extends UDFBaseNumericOp { - private static Log LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.udf.UDFOPMod"); + private static Log LOG = LogFactory + .getLog("org.apache.hadoop.hive.ql.udf.UDFOPMod"); public UDFOPMod() { } - @Override - public ByteWritable evaluate(ByteWritable a, ByteWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public ByteWritable evaluate(ByteWritable a, ByteWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - byteWritable.set((byte)(a.get() % b.get())); + byteWritable.set((byte) (a.get() % b.get())); return byteWritable; } @Override - public ShortWritable evaluate(ShortWritable a, ShortWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public ShortWritable evaluate(ShortWritable a, ShortWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - shortWritable.set((short)(a.get() % b.get())); + shortWritable.set((short) (a.get() % b.get())); return shortWritable; } @Override - public IntWritable evaluate(IntWritable a, IntWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public IntWritable evaluate(IntWritable a, IntWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - intWritable.set((int)(a.get() % b.get())); + intWritable.set((a.get() % b.get())); return intWritable; } @Override - public LongWritable evaluate(LongWritable a, LongWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public LongWritable evaluate(LongWritable a, LongWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } longWritable.set(a.get() % b.get()); return longWritable; } @Override - public FloatWritable evaluate(FloatWritable a, FloatWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public FloatWritable evaluate(FloatWritable a, FloatWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } floatWritable.set(a.get() % b.get()); return floatWritable; } - + @Override - public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } doubleWritable.set(a.get() % b.get()); return doubleWritable; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java (working copy) @@ -1,9 +1,12 @@ package org.apache.hadoop.hive.ql.udf; +import java.lang.annotation.ElementType; +import java.lang.annotation.Inherited; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; -import java.lang.annotation.*; - -@Target(ElementType.TYPE) +@Target(ElementType.TYPE) @Retention(RetentionPolicy.RUNTIME) @Inherited public @interface UDFType { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java (working copy) @@ -31,34 +31,34 @@ import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; - - public class UDFToBoolean extends UDF { private static Log LOG = LogFactory.getLog(UDFToBoolean.class.getName()); BooleanWritable booleanWritable = new BooleanWritable(); - + public UDFToBoolean() { } /** * Convert a void to boolean. This is called for CAST(... AS BOOLEAN) - * - * @param i The value of a void type + * + * @param i + * The value of a void type * @return BooleanWritable */ - public BooleanWritable evaluate(NullWritable i) { - return null; + public BooleanWritable evaluate(NullWritable i) { + return null; } /** * Convert from a byte to boolean. This is called for CAST(... AS BOOLEAN) - * - * @param i The byte value to convert + * + * @param i + * The byte value to convert * @return BooleanWritable */ - public BooleanWritable evaluate(ByteWritable i) { + public BooleanWritable evaluate(ByteWritable i) { if (i == null) { return null; } else { @@ -69,11 +69,12 @@ /** * Convert from a short to boolean. This is called for CAST(... AS BOOLEAN) - * - * @param i The short value to convert + * + * @param i + * The short value to convert * @return BooleanWritable */ - public BooleanWritable evaluate(ShortWritable i) { + public BooleanWritable evaluate(ShortWritable i) { if (i == null) { return null; } else { @@ -84,11 +85,12 @@ /** * Convert from a integer to boolean. This is called for CAST(... AS BOOLEAN) - * - * @param i The integer value to convert + * + * @param i + * The integer value to convert * @return BooleanWritable */ - public BooleanWritable evaluate(IntWritable i) { + public BooleanWritable evaluate(IntWritable i) { if (i == null) { return null; } else { @@ -99,11 +101,12 @@ /** * Convert from a long to boolean. This is called for CAST(... AS BOOLEAN) - * - * @param i The long value to convert + * + * @param i + * The long value to convert * @return BooleanWritable */ - public BooleanWritable evaluate(LongWritable i) { + public BooleanWritable evaluate(LongWritable i) { if (i == null) { return null; } else { @@ -114,11 +117,12 @@ /** * Convert from a float to boolean. This is called for CAST(... AS BOOLEAN) - * - * @param i The float value to convert + * + * @param i + * The float value to convert * @return BooleanWritable - */ - public BooleanWritable evaluate(FloatWritable i) { + */ + public BooleanWritable evaluate(FloatWritable i) { if (i == null) { return null; } else { @@ -129,11 +133,12 @@ /** * Convert from a double to boolean. This is called for CAST(... AS BOOLEAN) - * - * @param i The double value to convert + * + * @param i + * The double value to convert * @return BooleanWritable */ - public BooleanWritable evaluate(DoubleWritable i) { + public BooleanWritable evaluate(DoubleWritable i) { if (i == null) { return null; } else { @@ -144,11 +149,12 @@ /** * Convert from a string to boolean. This is called for CAST(... AS BOOLEAN) - * - * @param i The string value to convert + * + * @param i + * The string value to convert * @return BooleanWritable */ - public BooleanWritable evaluate(Text i) { + public BooleanWritable evaluate(Text i) { if (i == null) { return null; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPLongDivide.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPLongDivide.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPLongDivide.java (working copy) @@ -22,32 +22,25 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; -import org.apache.hadoop.hive.serde2.io.ByteWritable; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; -@description( - name = "div", - value = "a _FUNC_ b - Divide a by b rounded to the long integer", - extended = "Example:\n" + - " > SELECT 3 _FUNC_ 2 FROM src LIMIT 1;\n" + - " 1" -) +@description(name = "div", value = "a _FUNC_ b - Divide a by b rounded to the long integer", extended = "Example:\n" + + " > SELECT 3 _FUNC_ 2 FROM src LIMIT 1;\n" + " 1") public class UDFOPLongDivide extends UDF { - private static Log LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.udf.UDFOPLongDivide"); + private static Log LOG = LogFactory + .getLog("org.apache.hadoop.hive.ql.udf.UDFOPLongDivide"); protected LongWritable longWritable = new LongWritable(); - - public LongWritable evaluate(LongWritable a, LongWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + + public LongWritable evaluate(LongWritable a, LongWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - longWritable.set((long)a.get()/b.get()); + longWritable.set(a.get() / b.get()); return longWritable; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFMin.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFMin.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFMin.java (working copy) @@ -29,10 +29,7 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -@description( - name = "min", - value = "_FUNC_(expr) - Returns the minimum value of expr" - ) +@description(name = "min", value = "_FUNC_(expr) - Returns the minimum value of expr") public class UDAFMin extends UDAF { static public class MinShortEvaluator implements UDAFEvaluator { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExpExtract.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExpExtract.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExpExtract.java (working copy) @@ -28,25 +28,22 @@ import org.apache.hadoop.hive.ql.exec.description; /** - * UDF to extract a specific group identified by a java regex. - * Note that if a regexp has a backslash ('\'), then need to specify '\\' - * For example, regexp_extract('100-200', '(\\d+)-(\\d+)', 1) will return '100' + * UDF to extract a specific group identified by a java regex. Note that if a + * regexp has a backslash ('\'), then need to specify '\\' For example, + * regexp_extract('100-200', '(\\d+)-(\\d+)', 1) will return '100' */ -@description( - name = "regexp_extract", - value = "_FUNC_(str, regexp[, idx]) - extracts a group that matches regexp", - extended = "Example:\n" + - " > SELECT _FUNC_('100-200', '(\\d+)-(\\d+)', 1) FROM src LIMIT 1;\n" + - " '100'" - ) +@description(name = "regexp_extract", value = "_FUNC_(str, regexp[, idx]) - extracts a group that matches regexp", extended = "Example:\n" + + " > SELECT _FUNC_('100-200', '(\\d+)-(\\d+)', 1) FROM src LIMIT 1;\n" + + " '100'") public class UDFRegExpExtract extends UDF { private static Log LOG = LogFactory.getLog(UDFRegExpExtract.class.getName()); private String lastRegex = null; private Pattern p = null; + public UDFRegExpExtract() { } - + public String evaluate(String s, String regex, Integer extractIndex) { if (s == null || regex == null) { return null; @@ -67,5 +64,4 @@ return this.evaluate(s, regex, 1); } - } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFromUnixTime.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFromUnixTime.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFromUnixTime.java (working copy) @@ -29,64 +29,76 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -@description( - name = "from_unixtime", - value = "_FUNC_(unix_time, format) - returns unix_time in the specified " + - "format", - extended = "Example:\n" + - " > SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss') FROM src LIMIT 1;\n" + - " '1970-01-01 00:00:00'" - ) +@description(name = "from_unixtime", value = "_FUNC_(unix_time, format) - returns unix_time in the specified " + + "format", extended = "Example:\n" + + " > SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss') FROM src LIMIT 1;\n" + + " '1970-01-01 00:00:00'") public class UDFFromUnixTime extends UDF { private static Log LOG = LogFactory.getLog(UDFFromUnixTime.class.getName()); private SimpleDateFormat formatter; - + Text result = new Text(); Text lastFormat = new Text(); - + public UDFFromUnixTime() { } Text defaultFormat = new Text("yyyy-MM-dd HH:mm:ss"); - public Text evaluate(IntWritable unixtime) { + + public Text evaluate(IntWritable unixtime) { return evaluate(unixtime, defaultFormat); } - + /** * Convert UnixTime to a string format. - * @param unixtime The number of seconds from 1970-01-01 00:00:00 - * @param format See http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat.html + * + * @param unixtime + * The number of seconds from 1970-01-01 00:00:00 + * @param format + * See + * http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat + * .html * @return a String in the format specified. */ - public Text evaluate(LongWritable unixtime, Text format) { + public Text evaluate(LongWritable unixtime, Text format) { if (unixtime == null || format == null) { return null; } - + return eval(unixtime.get(), format); } /** * Convert UnixTime to a string format. - * @param unixtime The number of seconds from 1970-01-01 00:00:00 - * @param format See http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat.html + * + * @param unixtime + * The number of seconds from 1970-01-01 00:00:00 + * @param format + * See + * http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat + * .html * @return a String in the format specified. */ - public Text evaluate(IntWritable unixtime, Text format) { + public Text evaluate(IntWritable unixtime, Text format) { if (unixtime == null || format == null) { return null; } - - return eval((long)unixtime.get(), format); + + return eval(unixtime.get(), format); } /** - * Internal evaluation function given the seconds from 1970-01-01 00:00:00 - * and the output text format. - * @param unixtime seconds of type long from 1970-01-01 00:00:00 - * @param format display format. See http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat.html + * Internal evaluation function given the seconds from 1970-01-01 00:00:00 and + * the output text format. + * + * @param unixtime + * seconds of type long from 1970-01-01 00:00:00 + * @param format + * display format. See + * http://java.sun.com/j2se/1.4.2/docs/api/java/text + * /SimpleDateFormat.html * @return elapsed time in the given format. */ private Text eval(long unixtime, Text format) { @@ -94,7 +106,7 @@ formatter = new SimpleDateFormat(format.toString()); lastFormat.set(format); } - + // convert seconds to milliseconds Date date = new Date(unixtime * 1000L); result.set(formatter.format(date)); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateSub.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateSub.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateSub.java (working copy) @@ -31,45 +31,47 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@description( - name = "date_sub", - value = "_FUNC_(start_date, num_days) - Returns the date that is num_days" + - " before start_date.", - extended = "start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or" + - " 'yyyy-MM-dd'. num_days is a number. The time part of start_date is " + - "ignored.\n" + - "Example:\n " + - " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + - " '2009-29-07'" - ) +@description(name = "date_sub", value = "_FUNC_(start_date, num_days) - Returns the date that is num_days" + + " before start_date.", extended = "start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or" + + " 'yyyy-MM-dd'. num_days is a number. The time part of start_date is " + + "ignored.\n" + + "Example:\n " + + " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + + " '2009-29-07'") public class UDFDateSub extends UDF { private static Log LOG = LogFactory.getLog(UDFDateSub.class.getName()); - private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private final Calendar calendar = Calendar.getInstance(TimeZone + .getTimeZone("UTC")); Text result = new Text(); + public UDFDateSub() { } /** - * Subtract a number of days to the date. - * The time part of the string will be ignored. + * Subtract a number of days to the date. The time part of the string will be + * ignored. * * NOTE: This is a subset of what MySQL offers as: - * http://dev.mysql.com/doc/refman/5.1/en/date-and-time-functions.html#function_date-sub + * http://dev.mysql.com/doc/refman + * /5.1/en/date-and-time-functions.html#function_date-sub * - * @param dateString1 the date string in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd". - * @param days the number of days to subtract. + * @param dateString1 + * the date string in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy-MM-dd". + * @param days + * the number of days to subtract. * @return the date in the format of "yyyy-MM-dd". */ - public Text evaluate(Text dateString1, IntWritable days) { - + public Text evaluate(Text dateString1, IntWritable days) { + if (dateString1 == null || days == null) { return null; } - + try { calendar.setTime(formatter.parse(dateString1.toString())); calendar.add(Calendar.DAY_OF_MONTH, -days.get()); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNotEqual.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNotEqual.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNotEqual.java (working copy) @@ -30,21 +30,19 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -@description( - name = "<>", - value = "a _FUNC_ b - Returns TRUE if a is not equal to b" -) +@description(name = "<>", value = "a _FUNC_ b - Returns TRUE if a is not equal to b") public class UDFOPNotEqual extends UDFBaseCompare { private static Log LOG = LogFactory.getLog(UDFOPNotEqual.class.getName()); BooleanWritable resultCache; + public UDFOPNotEqual() { resultCache = new BooleanWritable(); } - public BooleanWritable evaluate(Text a, Text b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(Text a, Text b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -54,8 +52,8 @@ return r; } - public BooleanWritable evaluate(ByteWritable a, ByteWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(ByteWritable a, ByteWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -65,8 +63,8 @@ return r; } - public BooleanWritable evaluate(ShortWritable a, ShortWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(ShortWritable a, ShortWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -76,8 +74,8 @@ return r; } - public BooleanWritable evaluate(IntWritable a, IntWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(IntWritable a, IntWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -86,9 +84,9 @@ // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; } - - public BooleanWritable evaluate(LongWritable a, LongWritable b) { - BooleanWritable r = this.resultCache; + + public BooleanWritable evaluate(LongWritable a, LongWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -97,9 +95,9 @@ // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; } - - public BooleanWritable evaluate(FloatWritable a, FloatWritable b) { - BooleanWritable r = this.resultCache; + + public BooleanWritable evaluate(FloatWritable a, FloatWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -107,10 +105,11 @@ } // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; - } + } - public BooleanWritable evaluate(DoubleWritable a, DoubleWritable b) { - BooleanWritable r = this.resultCache; + @Override + public BooleanWritable evaluate(DoubleWritable a, DoubleWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAsin.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAsin.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAsin.java (working copy) @@ -24,28 +24,23 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -@description( - name = "asin", - value = "_FUNC_(x) - returns the arc sine of x if -1<=x<=1 or NULL otherwise", - extended = "Example:\n" + - " > SELECT _FUNC_(0) FROM src LIMIT 1;\n" + - " 0\n" + - " > SELECT _FUNC_(2) FROM src LIMIT 1;\n" + - " NULL" - ) +@description(name = "asin", value = "_FUNC_(x) - returns the arc sine of x if -1<=x<=1 or NULL otherwise", extended = "Example:\n" + + " > SELECT _FUNC_(0) FROM src LIMIT 1;\n" + + " 0\n" + + " > SELECT _FUNC_(2) FROM src LIMIT 1;\n" + " NULL") public class UDFAsin extends UDF { private static Log LOG = LogFactory.getLog(UDFAsin.class.getName()); DoubleWritable result = new DoubleWritable(); - + public UDFAsin() { } /** * Take Arc Sine of a in radians. */ - public DoubleWritable evaluate(DoubleWritable a) { + public DoubleWritable evaluate(DoubleWritable a) { if (a == null) { return null; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFExp.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFExp.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFExp.java (working copy) @@ -24,26 +24,21 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -@description( - name = "exp", - value = "_FUNC_(x) - Returns e to the power of x", - extended = "Example:\n " + - " > SELECT _FUNC_(0) FROM src LIMIT 1;\n" + - " 1" - ) +@description(name = "exp", value = "_FUNC_(x) - Returns e to the power of x", extended = "Example:\n " + + " > SELECT _FUNC_(0) FROM src LIMIT 1;\n" + " 1") public class UDFExp extends UDF { private static Log LOG = LogFactory.getLog(UDFExp.class.getName()); DoubleWritable result = new DoubleWritable(); - + public UDFExp() { } /** - * Raise e (the base of natural logarithm) to the power of a. + * Raise e (the base of natural logarithm) to the power of a. */ - public DoubleWritable evaluate(DoubleWritable a) { + public DoubleWritable evaluate(DoubleWritable a) { if (a == null) { return null; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTF.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTF.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTF.java (working copy) @@ -32,34 +32,35 @@ public abstract class GenericUDTF { Collector collector = null; - + /** - * Initialize this GenericUDTF. This will be called only once per - * instance. + * Initialize this GenericUDTF. This will be called only once per instance. * - * @param args An array of ObjectInspectors for the arguments - * @return A StructObjectInspector for output. The output struct - * represents a row of the table where the fields of the stuct - * are the columns. The field names are unimportant as they - * will be overridden by user supplied column aliases. + * @param args + * An array of ObjectInspectors for the arguments + * @return A StructObjectInspector for output. The output struct represents a + * row of the table where the fields of the stuct are the columns. The + * field names are unimportant as they will be overridden by user + * supplied column aliases. */ - public abstract StructObjectInspector initialize(ObjectInspector [] argOIs) - throws UDFArgumentException; - + public abstract StructObjectInspector initialize(ObjectInspector[] argOIs) + throws UDFArgumentException; + /** * Give a set of arguments for the UDTF to process. * - * @param o object array of arguments + * @param o + * object array of arguments */ - public abstract void process(Object [] args) throws HiveException; - + public abstract void process(Object[] args) throws HiveException; + /** - * Called to notify the UDTF that there are no more rows to process. Note - * that forward() should not be called in this function. Only clean up code - * should be run. + * Called to notify the UDTF that there are no more rows to process. Note that + * forward() should not be called in this function. Only clean up code should + * be run. */ public abstract void close() throws HiveException; - + /** * Associates a collector with this UDTF. Can't be specified in the * constructor as the UDTF may be initialized before the collector has been @@ -70,7 +71,7 @@ public final void setCollector(Collector collector) { this.collector = collector; } - + /** * Passes an output row to the collector * Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java (working copy) @@ -33,55 +33,54 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.util.StringUtils; -@description( - name = "sum", - value = "_FUNC_(x) - Returns the sum of a set of numbers" -) +@description(name = "sum", value = "_FUNC_(x) - Returns the sum of a set of numbers") public class GenericUDAFSum implements GenericUDAFResolver { static final Log LOG = LogFactory.getLog(GenericUDAFSum.class.getName()); - + @Override - public GenericUDAFEvaluator getEvaluator( - TypeInfo[] parameters) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException { if (parameters.length != 1) { throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected."); } - + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentTypeException(0, - "Only primitive type arguments are accepted but " + parameters[0].getTypeName() + " is passed."); + "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); } - switch (((PrimitiveTypeInfo)parameters[0]).getPrimitiveCategory()) { - case BYTE: - case SHORT: - case INT: - case LONG: - return new GenericUDAFSumLong(); - case FLOAT: - case DOUBLE: - case STRING: - return new GenericUDAFSumDouble(); - case BOOLEAN: - default: - throw new UDFArgumentTypeException(0, - "Only numeric or string type arguments are accepted but " + parameters[0].getTypeName() + " is passed."); + switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { + case BYTE: + case SHORT: + case INT: + case LONG: + return new GenericUDAFSumLong(); + case FLOAT: + case DOUBLE: + case STRING: + return new GenericUDAFSumDouble(); + case BOOLEAN: + default: + throw new UDFArgumentTypeException(0, + "Only numeric or string type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); } } - + public static class GenericUDAFSumDouble extends GenericUDAFEvaluator { PrimitiveObjectInspector inputOI; DoubleWritable result; - + @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { - assert(parameters.length == 1); + assert (parameters.length == 1); super.init(m, parameters); result = new DoubleWritable(0); - inputOI = (PrimitiveObjectInspector)parameters[0]; + inputOI = (PrimitiveObjectInspector) parameters[0]; return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; } @@ -90,7 +89,7 @@ boolean empty; double sum; } - + @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { SumDoubleAgg result = new SumDoubleAgg(); @@ -100,23 +99,27 @@ @Override public void reset(AggregationBuffer agg) throws HiveException { - SumDoubleAgg myagg = (SumDoubleAgg)agg; + SumDoubleAgg myagg = (SumDoubleAgg) agg; myagg.empty = true; - myagg.sum = 0; + myagg.sum = 0; } boolean warned = false; - + @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { - assert(parameters.length == 1); + public void iterate(AggregationBuffer agg, Object[] parameters) + throws HiveException { + assert (parameters.length == 1); try { merge(agg, parameters[0]); } catch (NumberFormatException e) { if (!warned) { warned = true; - LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); - LOG.warn(getClass().getSimpleName() + " ignoring similar exceptions."); + LOG.warn(getClass().getSimpleName() + " " + + StringUtils.stringifyException(e)); + LOG + .warn(getClass().getSimpleName() + + " ignoring similar exceptions."); } } } @@ -127,17 +130,18 @@ } @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException { + public void merge(AggregationBuffer agg, Object partial) + throws HiveException { if (partial != null) { - SumDoubleAgg myagg = (SumDoubleAgg)agg; + SumDoubleAgg myagg = (SumDoubleAgg) agg; myagg.empty = false; - myagg.sum += PrimitiveObjectInspectorUtils.getDouble(partial, inputOI); + myagg.sum += PrimitiveObjectInspectorUtils.getDouble(partial, inputOI); } } @Override public Object terminate(AggregationBuffer agg) throws HiveException { - SumDoubleAgg myagg = (SumDoubleAgg)agg; + SumDoubleAgg myagg = (SumDoubleAgg) agg; if (myagg.empty) { return null; } @@ -146,20 +150,19 @@ } } - public static class GenericUDAFSumLong extends GenericUDAFEvaluator { PrimitiveObjectInspector inputOI; LongWritable result; - + @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { - assert(parameters.length == 1); + assert (parameters.length == 1); super.init(m, parameters); result = new LongWritable(0); - inputOI = (PrimitiveObjectInspector)parameters[0]; + inputOI = (PrimitiveObjectInspector) parameters[0]; return PrimitiveObjectInspectorFactory.writableLongObjectInspector; } @@ -168,7 +171,7 @@ boolean empty; long sum; } - + @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { SumLongAgg result = new SumLongAgg(); @@ -178,22 +181,24 @@ @Override public void reset(AggregationBuffer agg) throws HiveException { - SumLongAgg myagg = (SumLongAgg)agg; + SumLongAgg myagg = (SumLongAgg) agg; myagg.empty = true; - myagg.sum = 0; + myagg.sum = 0; } boolean warned = false; - + @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { - assert(parameters.length == 1); + public void iterate(AggregationBuffer agg, Object[] parameters) + throws HiveException { + assert (parameters.length == 1); try { merge(agg, parameters[0]); } catch (NumberFormatException e) { if (!warned) { warned = true; - LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); + LOG.warn(getClass().getSimpleName() + " " + + StringUtils.stringifyException(e)); } } } @@ -204,17 +209,18 @@ } @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException { + public void merge(AggregationBuffer agg, Object partial) + throws HiveException { if (partial != null) { - SumLongAgg myagg = (SumLongAgg)agg; - myagg.sum += PrimitiveObjectInspectorUtils.getLong(partial, inputOI); + SumLongAgg myagg = (SumLongAgg) agg; + myagg.sum += PrimitiveObjectInspectorUtils.getLong(partial, inputOI); myagg.empty = false; } } @Override public Object terminate(AggregationBuffer agg) throws HiveException { - SumLongAgg myagg = (SumLongAgg)agg; + SumLongAgg myagg = (SumLongAgg) agg; if (myagg.empty) { return null; } @@ -223,6 +229,5 @@ } } - } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java (working copy) @@ -23,27 +23,22 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.io.IntWritable; -@description( - name = "index", - value = "_FUNC_(a, n) - Returns the n-th element of a " -) +@description(name = "index", value = "_FUNC_(a, n) - Returns the n-th element of a ") public class GenericUDFIndex extends GenericUDF { private MapObjectInspector mapOI; private boolean mapKeyPreferWritable; private ListObjectInspector listOI; private PrimitiveObjectInspector indexOI; private ObjectInspector returnOI; - private IntWritable result = new IntWritable(-1); + private final IntWritable result = new IntWritable(-1); @Override public ObjectInspector initialize(ObjectInspector[] arguments) @@ -52,51 +47,51 @@ throw new UDFArgumentLengthException( "The function INDEX accepts exactly 2 arguments."); } - + if (arguments[0] instanceof MapObjectInspector) { // index into a map - mapOI = (MapObjectInspector)arguments[0]; + mapOI = (MapObjectInspector) arguments[0]; listOI = null; } else if (arguments[0] instanceof ListObjectInspector) { // index into a list - listOI = (ListObjectInspector)arguments[0]; + listOI = (ListObjectInspector) arguments[0]; mapOI = null; } else { - throw new UDFArgumentTypeException(0, - "\"" + Category.MAP.toString().toLowerCase() - + "\" or \"" + Category.LIST.toString().toLowerCase() - + "\" is expected at function INDEX, but \"" + throw new UDFArgumentTypeException(0, "\"" + + Category.MAP.toString().toLowerCase() + "\" or \"" + + Category.LIST.toString().toLowerCase() + + "\" is expected at function INDEX, but \"" + arguments[0].getTypeName() + "\" is found"); } - + // index has to be a primitive if (arguments[1] instanceof PrimitiveObjectInspector) { indexOI = (PrimitiveObjectInspector) arguments[1]; } else { - throw new UDFArgumentTypeException(1, - "Primitive Type is expected but " + arguments[1].getTypeName() - + "\" is found"); + throw new UDFArgumentTypeException(1, "Primitive Type is expected but " + + arguments[1].getTypeName() + "\" is found"); } if (mapOI != null) { returnOI = mapOI.getMapValueObjectInspector(); ObjectInspector keyOI = mapOI.getMapKeyObjectInspector(); - mapKeyPreferWritable = ((PrimitiveObjectInspector)keyOI).preferWritable(); + mapKeyPreferWritable = ((PrimitiveObjectInspector) keyOI) + .preferWritable(); } else { returnOI = listOI.getListElementObjectInspector(); } - + return returnOI; } @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - assert(arguments.length == 2); + assert (arguments.length == 2); Object main = arguments[0].get(); Object index = arguments[1].get(); if (mapOI != null) { - + Object indexObject; if (mapKeyPreferWritable) { indexObject = indexOI.getPrimitiveWritableObject(index); @@ -104,10 +99,10 @@ indexObject = indexOI.getPrimitiveJavaObject(index); } return mapOI.getMapValueElement(main, indexObject); - + } else { - - assert(listOI != null); + + assert (listOI != null); int intIndex = 0; try { intIndex = PrimitiveObjectInspectorUtils.getInt(index, indexOI); @@ -119,13 +114,13 @@ return null; } return listOI.getListElement(main, intIndex); - + } } @Override public String getDisplayString(String[] children) { - assert(children.length == 2); + assert (children.length == 2); return children[0] + "[" + children[1] + "]"; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNull.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNull.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNull.java (working copy) @@ -26,10 +26,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.BooleanWritable; -@description( - name = "isnull", - value = "_FUNC_ a - Returns true if a is NULL and false otherwise" -) +@description(name = "isnull", value = "_FUNC_ a - Returns true if a is NULL and false otherwise") public class GenericUDFOPNull extends GenericUDF { BooleanWritable result = new BooleanWritable(); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSize.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSize.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSize.java (working copy) @@ -31,13 +31,10 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.IntWritable; -@description( - name = "size", - value = "_FUNC_(a) - Returns the size of a" -) +@description(name = "size", value = "_FUNC_(a) - Returns the size of a") public class GenericUDFSize extends GenericUDF { private ObjectInspector returnOI; - private IntWritable result = new IntWritable(-1); + private final IntWritable result = new IntWritable(-1); @Override public ObjectInspector initialize(ObjectInspector[] arguments) @@ -48,15 +45,15 @@ } Category category = arguments[0].getCategory(); String typeName = arguments[0].getTypeName(); - if (category != Category.MAP - && category != Category.LIST + if (category != Category.MAP && category != Category.LIST && !typeName.equals(Constants.VOID_TYPE_NAME)) { - throw new UDFArgumentTypeException(0 , - "\"" + Category.MAP.toString().toLowerCase() - + "\" or \"" + Category.LIST.toString().toLowerCase() + "\" is expected at function SIZE, " - + "but \"" + arguments[0].getTypeName() + "\" is found"); + throw new UDFArgumentTypeException(0, "\"" + + Category.MAP.toString().toLowerCase() + "\" or \"" + + Category.LIST.toString().toLowerCase() + + "\" is expected at function SIZE, " + "but \"" + + arguments[0].getTypeName() + "\" is found"); } - + returnOI = arguments[0]; return PrimitiveObjectInspectorFactory.writableIntObjectInspector; } @@ -64,11 +61,11 @@ @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { Object data = arguments[0].get(); - if(returnOI.getCategory() == Category.MAP){ + if (returnOI.getCategory() == Category.MAP) { result.set(((MapObjectInspector) returnOI).getMapSize(data)); - } else if(returnOI.getCategory() == Category.LIST){ + } else if (returnOI.getCategory() == Category.LIST) { result.set(((ListObjectInspector) returnOI).getListLength(data)); - } else if(returnOI.getTypeName().equals(Constants.VOID_TYPE_NAME)) { + } else if (returnOI.getTypeName().equals(Constants.VOID_TYPE_NAME)) { // null result.set(-1); } @@ -77,7 +74,7 @@ @Override public String getDisplayString(String[] children) { - assert(children.length == 1); + assert (children.length == 1); return "size(" + children[0] + ")"; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java (working copy) @@ -40,82 +40,82 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.util.StringUtils; -@description( - name = "avg", - value = "_FUNC_(x) - Returns the mean of a set of numbers" -) +@description(name = "avg", value = "_FUNC_(x) - Returns the mean of a set of numbers") public class GenericUDAFAverage implements GenericUDAFResolver { static final Log LOG = LogFactory.getLog(GenericUDAFAverage.class.getName()); - + @Override - public GenericUDAFEvaluator getEvaluator( - TypeInfo[] parameters) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException { if (parameters.length != 1) { throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected."); } - + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentTypeException(0, - "Only primitive type arguments are accepted but " + parameters[0].getTypeName() + " is passed."); + "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); } - switch (((PrimitiveTypeInfo)parameters[0]).getPrimitiveCategory()) { - case BYTE: - case SHORT: - case INT: - case LONG: - case FLOAT: - case DOUBLE: - case STRING: - return new GenericUDAFAverageEvaluator(); - case BOOLEAN: - default: - throw new UDFArgumentTypeException(0, - "Only numeric or string type arguments are accepted but " + parameters[0].getTypeName() + " is passed."); + switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { + case BYTE: + case SHORT: + case INT: + case LONG: + case FLOAT: + case DOUBLE: + case STRING: + return new GenericUDAFAverageEvaluator(); + case BOOLEAN: + default: + throw new UDFArgumentTypeException(0, + "Only numeric or string type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); } } - + public static class GenericUDAFAverageEvaluator extends GenericUDAFEvaluator { // For PARTIAL1 and COMPLETE PrimitiveObjectInspector inputOI; - + // For PARTIAL2 and FINAL StructObjectInspector soi; StructField countField; StructField sumField; LongObjectInspector countFieldOI; DoubleObjectInspector sumFieldOI; - + // For PARTIAL1 and PARTIAL2 Object[] partialResult; - + // For FINAL and COMPLETE DoubleWritable result; - + @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { - assert(parameters.length == 1); + assert (parameters.length == 1); super.init(m, parameters); - + // init input - if (mode == mode.PARTIAL1 || mode == mode.COMPLETE) { - inputOI = (PrimitiveObjectInspector)parameters[0]; + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { + inputOI = (PrimitiveObjectInspector) parameters[0]; } else { - soi = (StructObjectInspector)parameters[0]; + soi = (StructObjectInspector) parameters[0]; countField = soi.getStructFieldRef("count"); sumField = soi.getStructFieldRef("sum"); - countFieldOI = (LongObjectInspector)countField.getFieldObjectInspector(); - sumFieldOI = (DoubleObjectInspector)sumField.getFieldObjectInspector(); + countFieldOI = (LongObjectInspector) countField + .getFieldObjectInspector(); + sumFieldOI = (DoubleObjectInspector) sumField.getFieldObjectInspector(); } - + // init output - if (mode == mode.PARTIAL1 || mode == mode.PARTIAL2) { + if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { // The output of a partial aggregation is a struct containing - // a "long" count and a "double" sum. - + // a "long" count and a "double" sum. + ArrayList foi = new ArrayList(); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); @@ -125,9 +125,9 @@ partialResult = new Object[2]; partialResult[0] = new LongWritable(0); partialResult[1] = new DoubleWritable(0); - return ObjectInspectorFactory.getStandardStructObjectInspector( - fname, foi); - + return ObjectInspectorFactory.getStandardStructObjectInspector(fname, + foi); + } else { result = new DoubleWritable(0); return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; @@ -148,29 +148,31 @@ @Override public void reset(AggregationBuffer agg) throws HiveException { - AverageAgg myagg = (AverageAgg)agg; + AverageAgg myagg = (AverageAgg) agg; myagg.count = 0; - myagg.sum = 0; + myagg.sum = 0; } - + boolean warned = false; - + @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { - assert(parameters.length == 1); + public void iterate(AggregationBuffer agg, Object[] parameters) + throws HiveException { + assert (parameters.length == 1); Object p = parameters[0]; if (p != null) { - AverageAgg myagg = (AverageAgg)agg; + AverageAgg myagg = (AverageAgg) agg; try { - double v = PrimitiveObjectInspectorUtils.getDouble(p, - (PrimitiveObjectInspector)inputOI); - myagg.count ++; + double v = PrimitiveObjectInspectorUtils.getDouble(p, inputOI); + myagg.count++; myagg.sum += v; } catch (NumberFormatException e) { if (!warned) { warned = true; - LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); - LOG.warn(getClass().getSimpleName() + " ignoring similar exceptions."); + LOG.warn(getClass().getSimpleName() + " " + + StringUtils.stringifyException(e)); + LOG.warn(getClass().getSimpleName() + + " ignoring similar exceptions."); } } } @@ -178,16 +180,17 @@ @Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { - AverageAgg myagg = (AverageAgg)agg; - ((LongWritable)partialResult[0]).set(myagg.count); - ((DoubleWritable)partialResult[1]).set(myagg.sum); + AverageAgg myagg = (AverageAgg) agg; + ((LongWritable) partialResult[0]).set(myagg.count); + ((DoubleWritable) partialResult[1]).set(myagg.sum); return partialResult; } @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException { + public void merge(AggregationBuffer agg, Object partial) + throws HiveException { if (partial != null) { - AverageAgg myagg = (AverageAgg)agg; + AverageAgg myagg = (AverageAgg) agg; Object partialCount = soi.getStructFieldData(partial, countField); Object partialSum = soi.getStructFieldData(partial, sumField); myagg.count += countFieldOI.get(partialCount); @@ -197,7 +200,7 @@ @Override public Object terminate(AggregationBuffer agg) throws HiveException { - AverageAgg myagg = (AverageAgg)agg; + AverageAgg myagg = (AverageAgg) agg; if (myagg.count == 0) { return null; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UDTFCollector.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UDTFCollector.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UDTFCollector.java (working copy) @@ -22,21 +22,25 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; /** - * UDTFCollector collects data from a GenericUDTF and passes the data to a + * UDTFCollector collects data from a GenericUDTF and passes the data to a * UDTFOperator */ public class UDTFCollector implements Collector { - /* (non-Javadoc) - * @see org.apache.hadoop.hive.ql.udf.generic.Collector#collect(java.lang.Object) + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.udf.generic.Collector#collect(java.lang.Object) */ UDTFOperator op = null; - + public UDTFCollector(UDTFOperator op) { this.op = op; } + @Override - public void collect(Object input) throws HiveException{ + public void collect(Object input) throws HiveException { op.forwardUDTFOutput(input); } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java (working copy) @@ -30,53 +30,51 @@ import org.apache.hadoop.io.Text; /** - * Generic UDF for string function CONCAT_WS(sep,str1,str2,str3,...). - * This mimics the function from MySQL - * http://dev.mysql.com/doc/refman/5.0/en/string-functions.html#function_concat-ws - * + * Generic UDF for string function + * CONCAT_WS(sep,str1,str2,str3,...). This mimics the function from + * MySQL http://dev.mysql.com/doc/refman/5.0/en/string-functions.html# + * function_concat-ws + * * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF */ -@description( - name = "concat_ws", - value = "_FUNC_(separator, str1, str2, ...) - " + - "returns the concatenation of the strings separated by the separator.", - extended = "Example:\n" + - " > SELECT _FUNC_('ce', 'fa', 'book') FROM src LIMIT 1;\n" + - " 'facebook'") - +@description(name = "concat_ws", value = "_FUNC_(separator, str1, str2, ...) - " + + "returns the concatenation of the strings separated by the separator.", extended = "Example:\n" + + " > SELECT _FUNC_('ce', 'fa', 'book') FROM src LIMIT 1;\n" + + " 'facebook'") public class GenericUDFConcatWS extends GenericUDF { ObjectInspector[] argumentOIs; @Override public ObjectInspector initialize(ObjectInspector[] arguments) - throws UDFArgumentException { + throws UDFArgumentException { if (arguments.length < 2) { throw new UDFArgumentLengthException( - "The function CONCAT_WS(separator,str1,str2,str3,...) needs at least two arguments."); + "The function CONCAT_WS(separator,str1,str2,str3,...) needs at least two arguments."); } for (int i = 0; i < arguments.length; i++) { - if(arguments[i].getTypeName() != Constants.STRING_TYPE_NAME - && arguments[i].getTypeName() != Constants.VOID_TYPE_NAME) { - throw new UDFArgumentTypeException(i, - "Argument " + (i +1 ) + " of function CONCAT_WS must be \"" + Constants.STRING_TYPE_NAME - + "\", but \"" + arguments[i].getTypeName() + "\" was found."); + if (arguments[i].getTypeName() != Constants.STRING_TYPE_NAME + && arguments[i].getTypeName() != Constants.VOID_TYPE_NAME) { + throw new UDFArgumentTypeException(i, "Argument " + (i + 1) + + " of function CONCAT_WS must be \"" + Constants.STRING_TYPE_NAME + + "\", but \"" + arguments[i].getTypeName() + "\" was found."); } } - this.argumentOIs = arguments; + argumentOIs = arguments; return PrimitiveObjectInspectorFactory.writableStringObjectInspector; } - private Text resultText = new Text(); + private final Text resultText = new Text(); + @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { if (arguments[0].get() == null) { return null; } - String separator = - ((StringObjectInspector)argumentOIs[0]).getPrimitiveJavaObject(arguments[0].get()); + String separator = ((StringObjectInspector) argumentOIs[0]) + .getPrimitiveJavaObject(arguments[0].get()); StringBuilder sb = new StringBuilder(); boolean first = true; @@ -87,7 +85,8 @@ } else { sb.append(separator); } - sb.append(((StringObjectInspector)argumentOIs[i]).getPrimitiveJavaObject(arguments[i].get())); + sb.append(((StringObjectInspector) argumentOIs[i]) + .getPrimitiveJavaObject(arguments[i].get())); } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFHash.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFHash.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFHash.java (working copy) @@ -21,49 +21,42 @@ import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.IntWritable; /** * GenericUDF Class for computing hash values. */ -@description( - name = "hash", - value = "_FUNC_(a1, a2, ...) - Returns a hash value of the arguments" -) +@description(name = "hash", value = "_FUNC_(a1, a2, ...) - Returns a hash value of the arguments") public class GenericUDFHash extends GenericUDF { private static Log LOG = LogFactory.getLog(GenericUDFHash.class.getName()); ObjectInspector[] argumentOIs; - + @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentTypeException { - - this.argumentOIs = arguments; + + argumentOIs = arguments; return PrimitiveObjectInspectorFactory.writableIntObjectInspector; } IntWritable result = new IntWritable(); + @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - // See http://java.sun.com/j2se/1.5.0/docs/api/java/util/List.html#hashCode() + // See + // http://java.sun.com/j2se/1.5.0/docs/api/java/util/List.html#hashCode() int r = 0; - for(int i = 0; i < arguments.length; i++) { - r = r * 31 + ObjectInspectorUtils.hashCode(arguments[i].get(), argumentOIs[i]); + for (int i = 0; i < arguments.length; i++) { + r = r * 31 + + ObjectInspectorUtils.hashCode(arguments[i].get(), argumentOIs[i]); } result.set(r); return result; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java (working copy) @@ -30,41 +30,38 @@ /** * This class implements the COUNT aggregation function as in SQL. */ -@description( - name = "count", - value = "_FUNC_(x) - Returns the count" -) +@description(name = "count", value = "_FUNC_(x) - Returns the count") public class GenericUDAFCount implements GenericUDAFResolver { @Override - public GenericUDAFEvaluator getEvaluator( - TypeInfo[] parameters) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException { if (parameters.length != 1) { throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected."); } return new GenericUDAFCountEvaluator(); } - + public static class GenericUDAFCountEvaluator extends GenericUDAFEvaluator { ObjectInspector inputOI; - LongWritable result; + LongWritable result; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { super.init(m, parameters); - assert(parameters.length == 1); + assert (parameters.length == 1); inputOI = parameters[0]; result = new LongWritable(0); return PrimitiveObjectInspectorFactory.writableLongObjectInspector; } - + /** class for storing count value */ static class CountAgg implements AggregationBuffer { long value; } - + @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { CountAgg result = new CountAgg(); @@ -74,15 +71,15 @@ @Override public void reset(AggregationBuffer agg) throws HiveException { - ((CountAgg)agg).value = 0; + ((CountAgg) agg).value = 0; } @Override public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { - assert(parameters.length == 1); + assert (parameters.length == 1); if (parameters[0] != null) { - ((CountAgg)agg).value ++; + ((CountAgg) agg).value++; } } @@ -90,20 +87,19 @@ public void merge(AggregationBuffer agg, Object partial) throws HiveException { if (partial != null) { - long p = ((LongObjectInspector)inputOI).get(partial); - ((CountAgg)agg).value += p; + long p = ((LongObjectInspector) inputOI).get(partial); + ((CountAgg) agg).value += p; } } @Override public Object terminate(AggregationBuffer agg) throws HiveException { - result.set(((CountAgg)agg).value); + result.set(((CountAgg) agg).value); return result; } @Override - public Object terminatePartial(AggregationBuffer agg) - throws HiveException { + public Object terminatePartial(AggregationBuffer agg) throws HiveException { return terminate(agg); } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotNull.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotNull.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotNull.java (working copy) @@ -26,10 +26,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.BooleanWritable; -@description( - name = "isnotnull", - value = "_FUNC_ a - Returns true if a is not NULL and false otherwise" -) +@description(name = "isnotnull", value = "_FUNC_ a - Returns true if a is not NULL and false otherwise") public class GenericUDFOPNotNull extends GenericUDF { BooleanWritable result = new BooleanWritable(); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSplit.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSplit.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSplit.java (working copy) @@ -22,65 +22,62 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; import org.apache.hadoop.io.Text; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; -@description( - name = "split", - value = "_FUNC_(str, regex) - Splits str around occurances that match " + - "regex", - extended = "Example:\n" + - " > SELECT _FUNC_('oneAtwoBthreeC', '[ABC]') FROM src LIMIT 1;\n" + - " [\"one\", \"two\", \"three\"]" - ) +@description(name = "split", value = "_FUNC_(str, regex) - Splits str around occurances that match " + + "regex", extended = "Example:\n" + + " > SELECT _FUNC_('oneAtwoBthreeC', '[ABC]') FROM src LIMIT 1;\n" + + " [\"one\", \"two\", \"three\"]") public class GenericUDFSplit extends GenericUDF { private ObjectInspectorConverters.Converter[] converters; - + + @Override public ObjectInspector initialize(ObjectInspector[] arguments) - throws UDFArgumentException { + throws UDFArgumentException { if (arguments.length != 2) { throw new UDFArgumentLengthException( "The function SPLIT(s, regexp) takes exactly 2 arguments."); } - + converters = new ObjectInspectorConverters.Converter[arguments.length]; - for(int i = 0; i < arguments.length; i++) { + for (int i = 0; i < arguments.length; i++) { converters[i] = ObjectInspectorConverters.getConverter(arguments[i], PrimitiveObjectInspectorFactory.writableStringObjectInspector); } - - return ObjectInspectorFactory.getStandardListObjectInspector( - PrimitiveObjectInspectorFactory.writableStringObjectInspector); + + return ObjectInspectorFactory + .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector); } + @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - assert(arguments.length == 2); - + assert (arguments.length == 2); + if (arguments[0].get() == null || arguments[1].get() == null) { return null; } - - Text s = (Text)converters[0].convert(arguments[0].get()); - Text regex = (Text)converters[1].convert(arguments[1].get()); - + + Text s = (Text) converters[0].convert(arguments[0].get()); + Text regex = (Text) converters[1].convert(arguments[1].get()); + ArrayList result = new ArrayList(); - - for(String str: s.toString().split(regex.toString())) { + + for (String str : s.toString().split(regex.toString())) { result.add(new Text(str)); } - + return result; } - + + @Override public String getDisplayString(String[] children) { - assert(children.length == 2); + assert (children.length == 2); return "split(" + children[0] + ", " + children[1] + ")"; } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java (working copy) @@ -26,62 +26,60 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** - * Compute the standard deviation by extending GenericUDAFVariance and + * Compute the standard deviation by extending GenericUDAFVariance and * overriding the terminate() method of the evaluator. - * + * */ -@description( - name = "std,stddev,stddev_pop", - value = "_FUNC_(x) - Returns the standard deviation of a set of numbers" -) +@description(name = "std,stddev,stddev_pop", value = "_FUNC_(x) - Returns the standard deviation of a set of numbers") public class GenericUDAFStd extends GenericUDAFVariance { - + @Override - public GenericUDAFEvaluator getEvaluator( - TypeInfo[] parameters) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException { if (parameters.length != 1) { throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected."); } - + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentTypeException(0, - "Only primitive type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); + "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); } - switch (((PrimitiveTypeInfo)parameters[0]).getPrimitiveCategory()) { - case BYTE: - case SHORT: - case INT: - case LONG: - case FLOAT: - case DOUBLE: - case STRING: - return new GenericUDAFStdEvaluator(); - case BOOLEAN: - default: - throw new UDFArgumentTypeException(0, - "Only numeric or string type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); + switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { + case BYTE: + case SHORT: + case INT: + case LONG: + case FLOAT: + case DOUBLE: + case STRING: + return new GenericUDAFStdEvaluator(); + case BOOLEAN: + default: + throw new UDFArgumentTypeException(0, + "Only numeric or string type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); } } - + /** - * Compute the standard deviation by extending GenericUDAFVarianceEvaluator + * Compute the standard deviation by extending GenericUDAFVarianceEvaluator * and overriding the terminate() method of the evaluator. - * + * */ - public static class GenericUDAFStdEvaluator extends GenericUDAFVarianceEvaluator { + public static class GenericUDAFStdEvaluator extends + GenericUDAFVarianceEvaluator { @Override public Object terminate(AggregationBuffer agg) throws HiveException { - StdAgg myagg = (StdAgg)agg; - + StdAgg myagg = (StdAgg) agg; + if (myagg.count == 0) { // SQL standard - return null for zero elements return null; } else { - if(myagg.count > 1) { - result.set(Math.sqrt(myagg.variance / (myagg.count))); + if (myagg.count > 1) { + result.set(Math.sqrt(myagg.variance / (myagg.count))); } else { // for one element the variance is always 0 result.set(0); } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVarianceSample.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVarianceSample.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVarianceSample.java (working copy) @@ -26,62 +26,59 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** - * Compute the sample variance by extending GenericUDAFVariance and - * overriding the terminate() method of the evaluator. - * + * Compute the sample variance by extending GenericUDAFVariance and overriding + * the terminate() method of the evaluator. + * */ -@description( - name = "var_samp", - value = "_FUNC_(x) - Returns the sample variance of a set of numbers" -) +@description(name = "var_samp", value = "_FUNC_(x) - Returns the sample variance of a set of numbers") public class GenericUDAFVarianceSample extends GenericUDAFVariance { - + @Override - public GenericUDAFEvaluator getEvaluator( - TypeInfo[] parameters) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException { if (parameters.length != 1) { throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected."); } - + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentTypeException(0, - "Only primitive type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); + "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); } - switch (((PrimitiveTypeInfo)parameters[0]).getPrimitiveCategory()) { - case BYTE: - case SHORT: - case INT: - case LONG: - case FLOAT: - case DOUBLE: - case STRING: - return new GenericUDAFVarianceSampleEvaluator(); - case BOOLEAN: - default: - throw new UDFArgumentTypeException(0, - "Only numeric or string type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); + switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { + case BYTE: + case SHORT: + case INT: + case LONG: + case FLOAT: + case DOUBLE: + case STRING: + return new GenericUDAFVarianceSampleEvaluator(); + case BOOLEAN: + default: + throw new UDFArgumentTypeException(0, + "Only numeric or string type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); } } - + /** - * Compute the sample variance by extending - * GenericUDAFVarianceEvaluator and overriding the terminate() method of the - * evaluator + * Compute the sample variance by extending GenericUDAFVarianceEvaluator and + * overriding the terminate() method of the evaluator */ - public static class GenericUDAFVarianceSampleEvaluator extends GenericUDAFVarianceEvaluator { + public static class GenericUDAFVarianceSampleEvaluator extends + GenericUDAFVarianceEvaluator { @Override public Object terminate(AggregationBuffer agg) throws HiveException { - StdAgg myagg = (StdAgg)agg; - + StdAgg myagg = (StdAgg) agg; + if (myagg.count == 0) { // SQL standard - return null for zero elements return null; } else { - if(myagg.count > 1) { - result.set(myagg.variance / (myagg.count-1)); + if (myagg.count > 1) { + result.set(myagg.variance / (myagg.count - 1)); } else { // for one element the variance is always 0 result.set(0); } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java (working copy) @@ -24,7 +24,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.AmbiguousMethodException; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -41,8 +40,8 @@ * GenericUDFBridge encapsulates UDF to provide the same interface as * GenericUDF. * - * Note that GenericUDFBridge implements Serializable because the name of - * the UDF class needs to be serialized with the plan. + * Note that GenericUDFBridge implements Serializable because the name of the + * UDF class needs to be serialized with the plan. * */ public class GenericUDFBridge extends GenericUDF implements Serializable { @@ -53,25 +52,28 @@ * The name of the UDF. */ String udfName; - + /** - * Whether the UDF is an operator or not. - * This controls how the display string is generated. + * Whether the UDF is an operator or not. This controls how the display string + * is generated. */ boolean isOperator; - + /** * The underlying UDF class. */ Class udfClass; - + /** * Greate a new GenericUDFBridge object. - * @param udfName The name of the corresponding udf. + * + * @param udfName + * The name of the corresponding udf. * @param isOperator * @param udfClass */ - public GenericUDFBridge(String udfName, boolean isOperator, Class udfClass) { + public GenericUDFBridge(String udfName, boolean isOperator, + Class udfClass) { this.udfName = udfName; this.isOperator = isOperator; this.udfClass = udfClass; @@ -80,15 +82,15 @@ // For Java serialization only public GenericUDFBridge() { } - + public void setUdfName(String udfName) { this.udfName = udfName; } - + public String getUdfName() { return udfName; } - + public boolean isOperator() { return isOperator; } @@ -100,18 +102,18 @@ public void setUdfClass(Class udfClass) { this.udfClass = udfClass; } - + public Class getUdfClass() { return udfClass; } - + /** * The underlying method of the UDF class. */ transient Method udfMethod; - + /** - * Helper to convert the parameters before passing to udfMethod. + * Helper to convert the parameters before passing to udfMethod. */ transient ConversionHelper conversionHelper; /** @@ -122,46 +124,49 @@ * The non-deferred real arguments for method invocation */ transient Object[] realArguments; - + @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - - udf = (UDF)ReflectionUtils.newInstance(udfClass, null); - + + udf = (UDF) ReflectionUtils.newInstance(udfClass, null); + // Resolve for the method based on argument types - ArrayList argumentTypeInfos = new ArrayList(arguments.length); - for (int i=0; i argumentTypeInfos = new ArrayList( + arguments.length); + for (ObjectInspector argument : arguments) { + argumentTypeInfos.add(TypeInfoUtils + .getTypeInfoFromObjectInspector(argument)); } udfMethod = udf.getResolver().getEvalMethod(argumentTypeInfos); - + // Create parameter converters conversionHelper = new ConversionHelper(udfMethod, arguments); // Create the non-deferred realArgument realArguments = new Object[arguments.length]; - + // Get the return ObjectInspector. - ObjectInspector returnOI = ObjectInspectorFactory.getReflectionObjectInspector( - udfMethod.getGenericReturnType(), ObjectInspectorOptions.JAVA); - + ObjectInspector returnOI = ObjectInspectorFactory + .getReflectionObjectInspector(udfMethod.getGenericReturnType(), + ObjectInspectorOptions.JAVA); + return returnOI; } @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - assert(arguments.length == realArguments.length); - + assert (arguments.length == realArguments.length); + // Calculate all the arguments for (int i = 0; i < realArguments.length; i++) { realArguments[i] = arguments[i].get(); } // Call the function - Object result = FunctionRegistry.invoke(udfMethod, udf, - conversionHelper.convertIfNecessary(realArguments)); - + Object result = FunctionRegistry.invoke(udfMethod, udf, conversionHelper + .convertIfNecessary(realArguments)); + return result; } @@ -180,7 +185,7 @@ StringBuilder sb = new StringBuilder(); sb.append(udfName); sb.append("("); - for(int i = 0; i < children.length; i++) { + for (int i = 0; i < children.length; i++) { sb.append(children[i]); if (i + 1 < children.length) { sb.append(", "); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java (working copy) @@ -35,87 +35,87 @@ import org.apache.hadoop.util.ReflectionUtils; /** - * This class is a bridge between GenericUDAF and UDAF. - * Old UDAF can be used with the GenericUDAF infrastructure through - * this bridge. + * This class is a bridge between GenericUDAF and UDAF. Old UDAF can be used + * with the GenericUDAF infrastructure through this bridge. */ public class GenericUDAFBridge implements GenericUDAFResolver { UDAF udaf; - + public GenericUDAFBridge(UDAF udaf) { this.udaf = udaf; } - + public Class getUDAFClass() { return udaf.getClass(); } - + @Override - public GenericUDAFEvaluator getEvaluator( - TypeInfo[] parameters) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException { - Class udafEvaluatorClass = - udaf.getResolver().getEvaluatorClass(Arrays.asList(parameters)); - + Class udafEvaluatorClass = udaf.getResolver() + .getEvaluatorClass(Arrays.asList(parameters)); + return new GenericUDAFBridgeEvaluator(udafEvaluatorClass); } - + public static class GenericUDAFBridgeEvaluator extends GenericUDAFEvaluator - implements Serializable { - + implements Serializable { + private static final long serialVersionUID = 1L; // Used by serialization only public GenericUDAFBridgeEvaluator() { } + public Class getUdafEvaluator() { return udafEvaluator; } + public void setUdafEvaluator(Class udafEvaluator) { this.udafEvaluator = udafEvaluator; } - - public GenericUDAFBridgeEvaluator(Class udafEvaluator) { + public GenericUDAFBridgeEvaluator( + Class udafEvaluator) { this.udafEvaluator = udafEvaluator; } - - + Class udafEvaluator; - + transient ObjectInspector[] parameterOIs; transient Object result; - + transient Method iterateMethod; transient Method mergeMethod; transient Method terminatePartialMethod; transient Method terminateMethod; - transient ConversionHelper conversionHelper; - + transient ConversionHelper conversionHelper; + @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { super.init(m, parameters); - this.parameterOIs = parameters; - + parameterOIs = parameters; + // Get the reflection methods from ue - for (Method method : udafEvaluator.getMethods()){ - if (method.getName().equals("iterate")) { + for (Method method : udafEvaluator.getMethods()) { + if (method.getName().equals("iterate")) { iterateMethod = method; } - if (method.getName().equals("merge")) { + if (method.getName().equals("merge")) { mergeMethod = method; } - if (method.getName().equals("terminatePartial")) { + if (method.getName().equals("terminatePartial")) { terminatePartialMethod = method; } - if (method.getName().equals("terminate")) { + if (method.getName().equals("terminate")) { terminateMethod = method; } } - + // Input: do Java/Writable conversion if needed Method aggregateMethod = null; if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { @@ -124,7 +124,7 @@ aggregateMethod = mergeMethod; } conversionHelper = new ConversionHelper(aggregateMethod, parameters); - + // Output: get the evaluate method Method evaluateMethod = null; if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { @@ -135,55 +135,57 @@ // Get the output ObjectInspector from the return type. Type returnType = evaluateMethod.getGenericReturnType(); try { - return ObjectInspectorFactory.getReflectionObjectInspector(returnType, + return ObjectInspectorFactory.getReflectionObjectInspector(returnType, ObjectInspectorOptions.JAVA); } catch (RuntimeException e) { - throw new HiveException("Cannot recognize return type " + returnType + - " from " + evaluateMethod, e); + throw new HiveException("Cannot recognize return type " + returnType + + " from " + evaluateMethod, e); } } - + /** class for storing UDAFEvaluator value */ static class UDAFAgg implements AggregationBuffer { UDAFEvaluator ueObject; + UDAFAgg(UDAFEvaluator ueObject) { this.ueObject = ueObject; } } - + @Override public AggregationBuffer getNewAggregationBuffer() { - return new UDAFAgg((UDAFEvaluator)ReflectionUtils.newInstance(udafEvaluator, null)); + return new UDAFAgg((UDAFEvaluator) ReflectionUtils.newInstance( + udafEvaluator, null)); } @Override public void reset(AggregationBuffer agg) throws HiveException { - ((UDAFAgg)agg).ueObject.init(); + ((UDAFAgg) agg).ueObject.init(); } @Override public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { - FunctionRegistry.invoke(iterateMethod, ((UDAFAgg)agg).ueObject, + FunctionRegistry.invoke(iterateMethod, ((UDAFAgg) agg).ueObject, conversionHelper.convertIfNecessary(parameters)); } @Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { - FunctionRegistry.invoke(mergeMethod, ((UDAFAgg)agg).ueObject, + FunctionRegistry.invoke(mergeMethod, ((UDAFAgg) agg).ueObject, conversionHelper.convertIfNecessary(partial)); } @Override public Object terminate(AggregationBuffer agg) throws HiveException { - return FunctionRegistry.invoke(terminateMethod, ((UDAFAgg)agg).ueObject); + return FunctionRegistry.invoke(terminateMethod, ((UDAFAgg) agg).ueObject); } @Override - public Object terminatePartial(AggregationBuffer agg) - throws HiveException { - return FunctionRegistry.invoke(terminatePartialMethod, ((UDAFAgg)agg).ueObject); + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + return FunctionRegistry.invoke(terminatePartialMethod, + ((UDAFAgg) agg).ueObject); } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java (working copy) @@ -24,13 +24,14 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; /** * IF(expr1,expr2,expr3)
- * If expr1 is TRUE (expr1 <> 0 and expr1 <> NULL) then IF() returns expr2; otherwise it returns expr3. - * IF() returns a numeric or string value, depending on the context in which it is used. + * If expr1 is TRUE (expr1 <> 0 and expr1 <> NULL) then IF() returns expr2; + * otherwise it returns expr3. IF() returns a numeric or string value, depending + * on the context in which it is used. */ public class GenericUDFIf extends GenericUDF { @@ -40,7 +41,7 @@ @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - this.argumentOIs = arguments; + argumentOIs = arguments; returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true); if (arguments.length != 3) { @@ -50,22 +51,23 @@ boolean conditionTypeIsOk = (arguments[0].getCategory() == ObjectInspector.Category.PRIMITIVE); if (conditionTypeIsOk) { - PrimitiveObjectInspector poi = ((PrimitiveObjectInspector)arguments[0]); - conditionTypeIsOk = (poi.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN - || poi.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.VOID); + PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) arguments[0]); + conditionTypeIsOk = (poi.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN || poi + .getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.VOID); } if (!conditionTypeIsOk) { throw new UDFArgumentTypeException(0, - "The first argument of function IF should be \"" + Constants.BOOLEAN_TYPE_NAME - + "\", but \"" + arguments[0].getTypeName() + "\" is found"); + "The first argument of function IF should be \"" + + Constants.BOOLEAN_TYPE_NAME + "\", but \"" + + arguments[0].getTypeName() + "\" is found"); } - if( !(returnOIResolver.update(arguments[1]) - && returnOIResolver.update(arguments[2])) ) { + if (!(returnOIResolver.update(arguments[1]) && returnOIResolver + .update(arguments[2]))) { throw new UDFArgumentTypeException(2, - "The second and the third arguments of function IF should have the same type, " + - "but they are different: \"" + arguments[1].getTypeName() - + "\" and \"" + arguments[2].getTypeName() + "\""); + "The second and the third arguments of function IF should have the same type, " + + "but they are different: \"" + arguments[1].getTypeName() + + "\" and \"" + arguments[2].getTypeName() + "\""); } return returnOIResolver.get(); @@ -74,7 +76,8 @@ @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { Object condition = arguments[0].get(); - if(condition != null && ((BooleanObjectInspector)argumentOIs[0]).get(condition)) { + if (condition != null + && ((BooleanObjectInspector) argumentOIs[0]).get(condition)) { return returnOIResolver.convertIfNecessary(arguments[1].get(), argumentOIs[1]); } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java (working copy) @@ -19,12 +19,10 @@ package org.apache.hadoop.hive.ql.udf.generic; import java.lang.reflect.Array; -import java.lang.reflect.GenericArrayType; import java.lang.reflect.Method; import java.lang.reflect.ParameterizedType; import java.lang.reflect.Type; import java.nio.ByteBuffer; -import java.util.Arrays; import java.util.HashMap; import org.apache.commons.logging.Log; @@ -60,30 +58,30 @@ public static boolean isUtfStartByte(byte b) { return (b & 0xC0) != 0x80; } - + /** * This class helps to find the return ObjectInspector for a GenericUDF. * - * In many cases like CASE and IF, the GenericUDF is returning a value out - * of several possibilities. However these possibilities may not always - * have the same ObjectInspector. + * In many cases like CASE and IF, the GenericUDF is returning a value out of + * several possibilities. However these possibilities may not always have the + * same ObjectInspector. * - * This class will help detect whether all possibilities have exactly the - * same ObjectInspector. If not, then we need to convert the Objects to - * the same ObjectInspector. + * This class will help detect whether all possibilities have exactly the same + * ObjectInspector. If not, then we need to convert the Objects to the same + * ObjectInspector. * - * A special case is when some values are constant NULL. In this case we - * can use the same ObjectInspector. + * A special case is when some values are constant NULL. In this case we can + * use the same ObjectInspector. */ public static class ReturnObjectInspectorResolver { boolean allowTypeConversion; ObjectInspector returnObjectInspector; - - // We create converters beforehand, so that the converters can reuse the - // same object for returning conversion results. + + // We create converters beforehand, so that the converters can reuse the + // same object for returning conversion results. HashMap converters; - + public ReturnObjectInspectorResolver() { this(false); } @@ -91,61 +89,67 @@ public ReturnObjectInspectorResolver(boolean allowTypeConversion) { this.allowTypeConversion = allowTypeConversion; } + /** * Update returnObjectInspector and valueInspectorsAreTheSame based on the * ObjectInspector seen. + * * @return false if there is a type mismatch */ - public boolean update(ObjectInspector oi) - throws UDFArgumentTypeException { + public boolean update(ObjectInspector oi) throws UDFArgumentTypeException { if (oi instanceof VoidObjectInspector) { return true; } - + if (returnObjectInspector == null) { // The first argument, just set it. returnObjectInspector = oi; return true; } - + if (returnObjectInspector == oi) { - // The new ObjectInspector is the same as the old one, directly return true + // The new ObjectInspector is the same as the old one, directly return + // true return true; } - + TypeInfo oiTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(oi); - TypeInfo rTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(returnObjectInspector); + TypeInfo rTypeInfo = TypeInfoUtils + .getTypeInfoFromObjectInspector(returnObjectInspector); if (oiTypeInfo == rTypeInfo) { // Convert everything to writable, if types of arguments are the same, // but ObjectInspectors are different. - returnObjectInspector = ObjectInspectorUtils.getStandardObjectInspector(returnObjectInspector, - ObjectInspectorCopyOption.WRITABLE); + returnObjectInspector = ObjectInspectorUtils + .getStandardObjectInspector(returnObjectInspector, + ObjectInspectorCopyOption.WRITABLE); return true; } - + if (!allowTypeConversion) { return false; } - - // Types are different, we need to check whether we can convert them to + + // Types are different, we need to check whether we can convert them to // a common base class or not. - TypeInfo commonTypeInfo = FunctionRegistry.getCommonClass(oiTypeInfo, rTypeInfo); + TypeInfo commonTypeInfo = FunctionRegistry.getCommonClass(oiTypeInfo, + rTypeInfo); if (commonTypeInfo == null) { return false; } - returnObjectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(commonTypeInfo); - + returnObjectInspector = TypeInfoUtils + .getStandardWritableObjectInspectorFromTypeInfo(commonTypeInfo); + return true; } - + /** * Returns the ObjectInspector of the return value. */ public ObjectInspector get() { return returnObjectInspector; } - + /** * Convert the return Object if necessary (when the ObjectInspectors of * different possibilities are not all the same). @@ -159,160 +163,174 @@ if (o == null) { return null; } - + if (converters == null) { converters = new HashMap(); } - + Converter converter = converters.get(oi); if (converter == null) { - converter = ObjectInspectorConverters.getConverter(oi, returnObjectInspector); + converter = ObjectInspectorConverters.getConverter(oi, + returnObjectInspector); converters.put(oi, converter); } converted = converter.convert(o); } return converted; } - + } - + /** - * Convert parameters for the method if needed. + * Convert parameters for the method if needed. */ public static class ConversionHelper { - private Method m; - private ObjectInspector[] givenParameterOIs; + private final ObjectInspector[] givenParameterOIs; Type[] methodParameterTypes; - private boolean isVariableLengthArgument; + private final boolean isVariableLengthArgument; Type lastParaElementType; - + boolean conversionNeeded; Converter[] converters; Object[] convertedParameters; Object[] convertedParametersInArray; - private static Class getClassFromType(Type t) { if (t instanceof Class) { - return (Class)t; + return (Class) t; } else if (t instanceof ParameterizedType) { - ParameterizedType pt = (ParameterizedType)t; - return (Class)pt.getRawType(); + ParameterizedType pt = (ParameterizedType) t; + return (Class) pt.getRawType(); } return null; } - + /** - * Create a PrimitiveConversionHelper for Method m. The ObjectInspector's + * Create a PrimitiveConversionHelper for Method m. The ObjectInspector's * input parameters are specified in parameters. */ - public ConversionHelper(Method m, ObjectInspector[] parameterOIs) throws UDFArgumentException { - this.m = m; - this.givenParameterOIs = parameterOIs; - + public ConversionHelper(Method m, ObjectInspector[] parameterOIs) + throws UDFArgumentException { + givenParameterOIs = parameterOIs; + methodParameterTypes = m.getGenericParameterTypes(); - // Whether the method takes an array like Object[], + // Whether the method takes an array like Object[], // or String[] etc in the last argument. - lastParaElementType = TypeInfoUtils.getArrayElementType( - methodParameterTypes.length == 0 ? null : - methodParameterTypes[methodParameterTypes.length-1]); + lastParaElementType = TypeInfoUtils + .getArrayElementType(methodParameterTypes.length == 0 ? null + : methodParameterTypes[methodParameterTypes.length - 1]); isVariableLengthArgument = (lastParaElementType != null); - + // Create the output OI array ObjectInspector[] methodParameterOIs = new ObjectInspector[parameterOIs.length]; - + if (isVariableLengthArgument) { - - // ConversionHelper can be called without method parameter length checkings + + // ConversionHelper can be called without method parameter length + // checkings // for terminatePartial() and merge() calls. if (parameterOIs.length < methodParameterTypes.length - 1) { - throw new UDFArgumentLengthException(m.toString() + " requires at least " - + (methodParameterTypes.length - 1) + " arguments but only " - + parameterOIs.length + " are passed in."); + throw new UDFArgumentLengthException(m.toString() + + " requires at least " + (methodParameterTypes.length - 1) + + " arguments but only " + parameterOIs.length + + " are passed in."); } // Copy the first methodParameterTypes.length - 1 entries for (int i = 0; i < methodParameterTypes.length - 1; i++) { - // This method takes Object, so it accepts whatever types that are passed in. + // This method takes Object, so it accepts whatever types that are + // passed in. if (methodParameterTypes[i] == Object.class) { methodParameterOIs[i] = ObjectInspectorUtils - .getStandardObjectInspector(parameterOIs[i], ObjectInspectorCopyOption.JAVA); + .getStandardObjectInspector(parameterOIs[i], + ObjectInspectorCopyOption.JAVA); } else { methodParameterOIs[i] = ObjectInspectorFactory - .getReflectionObjectInspector(methodParameterTypes[i], ObjectInspectorOptions.JAVA); + .getReflectionObjectInspector(methodParameterTypes[i], + ObjectInspectorOptions.JAVA); } } // Deal with the last entry if (lastParaElementType == Object.class) { - // This method takes Object[], so it accepts whatever types that are passed in. + // This method takes Object[], so it accepts whatever types that are + // passed in. for (int i = methodParameterTypes.length - 1; i < parameterOIs.length; i++) { methodParameterOIs[i] = ObjectInspectorUtils - .getStandardObjectInspector(parameterOIs[i], ObjectInspectorCopyOption.JAVA); + .getStandardObjectInspector(parameterOIs[i], + ObjectInspectorCopyOption.JAVA); } } else { - // This method takes something like String[], so it only accepts something like String - ObjectInspector oi = ObjectInspectorFactory.getReflectionObjectInspector( - lastParaElementType, ObjectInspectorOptions.JAVA); + // This method takes something like String[], so it only accepts + // something like String + ObjectInspector oi = ObjectInspectorFactory + .getReflectionObjectInspector(lastParaElementType, + ObjectInspectorOptions.JAVA); for (int i = methodParameterTypes.length - 1; i < parameterOIs.length; i++) { methodParameterOIs[i] = oi; } } - + } else { - + // Normal case, the last parameter is a normal parameter. - // ConversionHelper can be called without method parameter length checkings + // ConversionHelper can be called without method parameter length + // checkings // for terminatePartial() and merge() calls. if (methodParameterTypes.length != parameterOIs.length) { - throw new UDFArgumentLengthException(m.toString() + " requires " - + methodParameterTypes.length + " arguments but " + throw new UDFArgumentLengthException(m.toString() + " requires " + + methodParameterTypes.length + " arguments but " + parameterOIs.length + " are passed in."); } for (int i = 0; i < methodParameterTypes.length; i++) { - // This method takes Object, so it accepts whatever types that are passed in. + // This method takes Object, so it accepts whatever types that are + // passed in. if (methodParameterTypes[i] == Object.class) { methodParameterOIs[i] = ObjectInspectorUtils - .getStandardObjectInspector(parameterOIs[i], ObjectInspectorCopyOption.JAVA); + .getStandardObjectInspector(parameterOIs[i], + ObjectInspectorCopyOption.JAVA); } else { methodParameterOIs[i] = ObjectInspectorFactory - .getReflectionObjectInspector(methodParameterTypes[i], ObjectInspectorOptions.JAVA); + .getReflectionObjectInspector(methodParameterTypes[i], + ObjectInspectorOptions.JAVA); } } } - + // Create the converters conversionNeeded = false; converters = new Converter[parameterOIs.length]; for (int i = 0; i < parameterOIs.length; i++) { - Converter pc = ObjectInspectorConverters - .getConverter(parameterOIs[i], methodParameterOIs[i]); + Converter pc = ObjectInspectorConverters.getConverter(parameterOIs[i], + methodParameterOIs[i]); converters[i] = pc; // Conversion is needed? - conversionNeeded = conversionNeeded || (!(pc instanceof IdentityConverter)); + conversionNeeded = conversionNeeded + || (!(pc instanceof IdentityConverter)); } - + if (isVariableLengthArgument) { convertedParameters = new Object[methodParameterTypes.length]; - convertedParametersInArray = (Object[])Array.newInstance( - getClassFromType(lastParaElementType), parameterOIs.length - methodParameterTypes.length + 1); + convertedParametersInArray = (Object[]) Array.newInstance( + getClassFromType(lastParaElementType), parameterOIs.length + - methodParameterTypes.length + 1); convertedParameters[convertedParameters.length - 1] = convertedParametersInArray; } else { convertedParameters = new Object[parameterOIs.length]; } } - + public Object[] convertIfNecessary(Object... parameters) { - - assert(parameters.length == givenParameterOIs.length); - + + assert (parameters.length == givenParameterOIs.length); + if (!conversionNeeded && !isVariableLengthArgument) { // no conversion needed, and not variable-length argument: // just return what is passed in. return parameters; } - + if (isVariableLengthArgument) { // convert the first methodParameterTypes.length - 1 entries for (int i = 0; i < methodParameterTypes.length - 1; i++) { @@ -320,8 +338,8 @@ } // convert the rest and put into the last entry for (int i = methodParameterTypes.length - 1; i < parameters.length; i++) { - convertedParametersInArray[i + 1 - methodParameterTypes.length] = - converters[i].convert(parameters[i]); + convertedParametersInArray[i + 1 - methodParameterTypes.length] = converters[i] + .convert(parameters[i]); } } else { // normal case, convert all parameters @@ -338,25 +356,25 @@ */ public static String getOrdinal(int i) { int unit = i % 10; - return (i <= 0) ? "" - : (i != 11 && unit == 1) ? i + "st" - : (i != 12 && unit == 2) ? i + "nd" - : (i != 13 && unit == 3) ? i + "rd" - : i + "th"; - } + return (i <= 0) ? "" : (i != 11 && unit == 1) ? i + "st" + : (i != 12 && unit == 2) ? i + "nd" : (i != 13 && unit == 3) ? i + "rd" + : i + "th"; + } /** - * Finds any occurence of subtext from text in the backing - * buffer, for avoiding string encoding and decoding. - * Shamelessly copy from {@link org.apache.hadoop.io.Text#find(String, int)}. + * Finds any occurence of subtext from text in the + * backing buffer, for avoiding string encoding and decoding. Shamelessly copy + * from {@link org.apache.hadoop.io.Text#find(String, int)}. */ public static int findText(Text text, Text subtext, int start) { // src.position(start) can't accept negative numbers. - if(start < 0) - return -1; + if (start < 0) { + return -1; + } - ByteBuffer src = ByteBuffer.wrap(text.getBytes(),0,text.getLength()); - ByteBuffer tgt = ByteBuffer.wrap(subtext.getBytes(),0,subtext.getLength()); + ByteBuffer src = ByteBuffer.wrap(text.getBytes(), 0, text.getLength()); + ByteBuffer tgt = ByteBuffer + .wrap(subtext.getBytes(), 0, subtext.getLength()); byte b = tgt.get(); src.position(start); @@ -365,7 +383,7 @@ src.mark(); // save position in loop tgt.mark(); // save position in target boolean found = true; - int pos = src.position()-1; + int pos = src.position() - 1; while (tgt.hasRemaining()) { if (!src.hasRemaining()) { // src expired first tgt.reset(); @@ -380,7 +398,9 @@ break; // no match } } - if (found) return pos; + if (found) { + return pos; + } } } return -1; // not found Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java (working copy) @@ -23,61 +23,81 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; /** - * A Generic User-defined aggregation function (GenericUDAF) for the use with + * A Generic User-defined aggregation function (GenericUDAF) for the use with * Hive. * * New GenericUDAF classes need to inherit from this GenericUDAF class. * - * The GenericUDAF are superior to normal UDAFs in the following ways: - * 1. It can accept arguments of complex types, and return complex types. - * 2. It can accept variable length of arguments. - * 3. It can accept an infinite number of function signature - for example, - * it's easy to write a GenericUDAF that accepts array, - * array> and so on (arbitrary levels of nesting). + * The GenericUDAF are superior to normal UDAFs in the following ways: 1. It can + * accept arguments of complex types, and return complex types. 2. It can accept + * variable length of arguments. 3. It can accept an infinite number of function + * signature - for example, it's easy to write a GenericUDAF that accepts + * array, array> and so on (arbitrary levels of nesting). */ -@UDFType(deterministic=true) +@UDFType(deterministic = true) public abstract class GenericUDAFEvaluator { - + static public enum Mode { - /** PARTIAL1: from original data to partial aggregation data: iterate() and terminatePartial() will be called */ + /** + * PARTIAL1: from original data to partial aggregation data: iterate() and + * terminatePartial() will be called + */ PARTIAL1, - /** PARTIAL2: from partial aggregation data to partial aggregation data: merge() and terminatePartial() will be called */ + /** + * PARTIAL2: from partial aggregation data to partial aggregation data: + * merge() and terminatePartial() will be called + */ PARTIAL2, - /** FINAL: from partial aggregation to full aggregation: merge() and terminate() will be called */ + /** + * FINAL: from partial aggregation to full aggregation: merge() and + * terminate() will be called + */ FINAL, - /** COMPLETE: from original data directly to full aggregation: iterate() and terminate() will be called */ + /** + * COMPLETE: from original data directly to full aggregation: iterate() and + * terminate() will be called + */ COMPLETE }; Mode mode; + /** * The constructor */ public GenericUDAFEvaluator() { } - /** Initialize the evaluator. - * @param m The mode of aggregation. - * @param parameters The ObjectInspector for the parameters: - * In PARTIAL1 and COMPLETE mode, the parameters are original data; - * In PARTIAL2 and FINAL mode, the parameters are just partial aggregations (in that case, the array will always have a single element). - * @return The ObjectInspector for the return value. - * In PARTIAL1 and PARTIAL2 mode, the ObjectInspector for the return value of terminatePartial() call; - * In FINAL and COMPLETE mode, the ObjectInspector for the return value of terminate() call. - * - * NOTE: We need ObjectInspector[] (in addition to the TypeInfo[] in GenericUDAFResolver) for 2 reasons: - * 1. ObjectInspector contains more information than TypeInfo; - * and GenericUDAFEvaluator.init at execution time. - * 2. We call GenericUDAFResolver.getEvaluator at compilation time, + /** + * Initialize the evaluator. + * + * @param m + * The mode of aggregation. + * @param parameters + * The ObjectInspector for the parameters: In PARTIAL1 and COMPLETE + * mode, the parameters are original data; In PARTIAL2 and FINAL + * mode, the parameters are just partial aggregations (in that case, + * the array will always have a single element). + * @return The ObjectInspector for the return value. In PARTIAL1 and PARTIAL2 + * mode, the ObjectInspector for the return value of + * terminatePartial() call; In FINAL and COMPLETE mode, the + * ObjectInspector for the return value of terminate() call. + * + * NOTE: We need ObjectInspector[] (in addition to the TypeInfo[] in + * GenericUDAFResolver) for 2 reasons: 1. ObjectInspector contains + * more information than TypeInfo; and GenericUDAFEvaluator.init at + * execution time. 2. We call GenericUDAFResolver.getEvaluator at + * compilation time, */ - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + public ObjectInspector init(Mode m, ObjectInspector[] parameters) + throws HiveException { // This function should be overriden in every sub class // And the sub class should call super.init(m, parameters) to get mode set. mode = m; return null; } - /** + /** * The interface for a class that is used to store the aggregation result * during the process of aggregation. * @@ -90,33 +110,44 @@ */ public static interface AggregationBuffer { }; - + /** * Get a new aggregation object. */ - public abstract AggregationBuffer getNewAggregationBuffer() throws HiveException; + public abstract AggregationBuffer getNewAggregationBuffer() + throws HiveException; - /** Reset the aggregation. This is useful if we want to reuse the same aggregation. + /** + * Reset the aggregation. This is useful if we want to reuse the same + * aggregation. */ public abstract void reset(AggregationBuffer agg) throws HiveException; /** - * This function will be called by GroupByOperator when it sees a new input row. - * @param agg The object to store the aggregation result. - * @param parameters The row, can be inspected by the OIs passed in init(). + * This function will be called by GroupByOperator when it sees a new input + * row. + * + * @param agg + * The object to store the aggregation result. + * @param parameters + * The row, can be inspected by the OIs passed in init(). */ - public void aggregate(AggregationBuffer agg, Object[] parameters) throws HiveException { + public void aggregate(AggregationBuffer agg, Object[] parameters) + throws HiveException { if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { iterate(agg, parameters); } else { - assert(parameters.length == 1); + assert (parameters.length == 1); merge(agg, parameters[0]); } } /** - * This function will be called by GroupByOperator when it sees a new input row. - * @param agg The object to store the aggregation result. + * This function will be called by GroupByOperator when it sees a new input + * row. + * + * @param agg + * The object to store the aggregation result. */ public Object evaluate(AggregationBuffer agg) throws HiveException { if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { @@ -125,25 +156,38 @@ return terminate(agg); } } - - /** Iterate through original data. - * @param parameters The objects of parameters. + + /** + * Iterate through original data. + * + * @param parameters + * The objects of parameters. */ - public abstract void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException; + public abstract void iterate(AggregationBuffer agg, Object[] parameters) + throws HiveException; - /** Get partial aggregation result. - * @return partial aggregation result. + /** + * Get partial aggregation result. + * + * @return partial aggregation result. */ - public abstract Object terminatePartial(AggregationBuffer agg) throws HiveException; + public abstract Object terminatePartial(AggregationBuffer agg) + throws HiveException; - /** Merge with partial aggregation result. - * NOTE: null might be passed in case there is no input data. - * @param partial The partial aggregation result. + /** + * Merge with partial aggregation result. NOTE: null might be passed in case + * there is no input data. + * + * @param partial + * The partial aggregation result. */ - public abstract void merge(AggregationBuffer agg, Object partial) throws HiveException; + public abstract void merge(AggregationBuffer agg, Object partial) + throws HiveException; - /** Get final aggregation result. - * @return final aggregation result. + /** + * Get final aggregation result. + * + * @return final aggregation result. */ public abstract Object terminate(AggregationBuffer agg) throws HiveException; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLocate.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLocate.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLocate.java (working copy) @@ -31,27 +31,24 @@ import org.apache.hadoop.io.Text; /** - * Generic UDF for string function LOCATE(substr, str), LOCATE(substr, str, start). - * This mimcs the function from MySQL + * Generic UDF for string function LOCATE(substr, str), + * LOCATE(substr, str, start). This mimcs the function from MySQL * http://dev.mysql.com/doc/refman/5.1/en/string-functions.html#function_locate - *

 
+ * 
+ * 
  * usage:
  * LOCATE(substr, str)
  * LOCATE(substr, str, start)
- * 

+ *

+ *

*/ -@description( - name = "locate", - value = "_FUNC_(substr, str[, pos]) - Returns the position of the first " + - "occurance of substr in str after position pos", - extended = "Example:\n" + - " > SELECT _FUNC_('bar', 'foobarbar', 5) FROM src LIMIT 1;\n" + - " 7" - ) -public class GenericUDFLocate extends GenericUDF{ +@description(name = "locate", value = "_FUNC_(substr, str[, pos]) - Returns the position of the first " + + "occurance of substr in str after position pos", extended = "Example:\n" + + " > SELECT _FUNC_('bar', 'foobarbar', 5) FROM src LIMIT 1;\n" + " 7") +public class GenericUDFLocate extends GenericUDF { ObjectInspectorConverters.Converter[] converters; - + @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -60,22 +57,23 @@ "The function LOCATE accepts exactly 2 or 3 arguments."); } - for(int i = 0; i < arguments.length; i++) { + for (int i = 0; i < arguments.length; i++) { Category category = arguments[i].getCategory(); - if(category != Category.PRIMITIVE) { - throw new UDFArgumentTypeException(i, - "The " + GenericUDFUtils.getOrdinal(i + 1) + " argument of function LOCATE is expected to a " - + Category.PRIMITIVE.toString().toLowerCase() - + " type, but " + category.toString().toLowerCase() + " is found"); + if (category != Category.PRIMITIVE) { + throw new UDFArgumentTypeException(i, "The " + + GenericUDFUtils.getOrdinal(i + 1) + + " argument of function LOCATE is expected to a " + + Category.PRIMITIVE.toString().toLowerCase() + " type, but " + + category.toString().toLowerCase() + " is found"); } } converters = new ObjectInspectorConverters.Converter[arguments.length]; - for(int i = 0; i < arguments.length; i++) { - if(i == 0 || i == 1) { + for (int i = 0; i < arguments.length; i++) { + if (i == 0 || i == 1) { converters[i] = ObjectInspectorConverters.getConverter(arguments[i], PrimitiveObjectInspectorFactory.writableStringObjectInspector); - } else if(i == 2) { + } else if (i == 2) { converters[i] = ObjectInspectorConverters.getConverter(arguments[i], PrimitiveObjectInspectorFactory.writableIntObjectInspector); } @@ -83,32 +81,35 @@ return PrimitiveObjectInspectorFactory.writableIntObjectInspector; } - + IntWritable intWritable = new IntWritable(0); + @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - if(arguments[0].get() == null || arguments[1].get() == null) + if (arguments[0].get() == null || arguments[1].get() == null) { return null; + } Text subtext = (Text) converters[0].convert(arguments[0].get()); Text text = (Text) converters[1].convert(arguments[1].get()); int start = 1; - if(arguments.length == 3) { - IntWritable startWritable = (IntWritable)converters[2].convert(arguments[2].get()); - if(startWritable == null) { - intWritable.set(0); - return intWritable; - } - start = startWritable.get(); + if (arguments.length == 3) { + IntWritable startWritable = (IntWritable) converters[2] + .convert(arguments[2].get()); + if (startWritable == null) { + intWritable.set(0); + return intWritable; + } + start = startWritable.get(); } intWritable.set(GenericUDFUtils.findText(text, subtext, start - 1) + 1); - return intWritable; + return intWritable; } @Override public String getDisplayString(String[] children) { - assert(children.length == 2 || children.length == 3); - return "locate(" + children[0] + children[1] - + (children.length == 3 ? children[2] : "") + ")"; + assert (children.length == 2 || children.length == 3); + return "locate(" + children[0] + children[1] + + (children.length == 3 ? children[2] : "") + ")"; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java (working copy) @@ -20,24 +20,18 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** - * GenericUDF Class for SQL construct "CASE WHEN a THEN b WHEN c THEN d [ELSE f] END". + * GenericUDF Class for SQL construct + * "CASE WHEN a THEN b WHEN c THEN d [ELSE f] END". * - * NOTES: - * 1. a and c should be boolean, or an exception will be thrown. - * 2. b, d and f should have the same TypeInfo, or an exception will be thrown. + * NOTES: 1. a and c should be boolean, or an exception will be thrown. 2. b, d + * and f should have the same TypeInfo, or an exception will be thrown. */ public class GenericUDFCase extends GenericUDF { @@ -46,79 +40,80 @@ ObjectInspector[] argumentOIs; GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; GenericUDFUtils.ReturnObjectInspectorResolver caseOIResolver; - + @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentTypeException { - - this.argumentOIs = arguments; + + argumentOIs = arguments; caseOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(); returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(); - + boolean r = caseOIResolver.update(arguments[0]); - assert(r); - for (int i=1; i+1= 3); + assert (children.length >= 3); StringBuilder sb = new StringBuilder(); sb.append("CASE ("); sb.append(children[0]); sb.append(")"); - for(int i=1; i+1 ret = new HashMap(); - + @Override public ObjectInspector initialize(ObjectInspector[] arguments) - throws UDFArgumentException { - + throws UDFArgumentException { + if (arguments.length % 2 != 0) { throw new UDFArgumentLengthException( "Arguments must be in key/value pairs"); } - GenericUDFUtils.ReturnObjectInspectorResolver keyOIResolver = - new GenericUDFUtils.ReturnObjectInspectorResolver(true); - GenericUDFUtils.ReturnObjectInspectorResolver valueOIResolver = - new GenericUDFUtils.ReturnObjectInspectorResolver(true); + GenericUDFUtils.ReturnObjectInspectorResolver keyOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver( + true); + GenericUDFUtils.ReturnObjectInspectorResolver valueOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver( + true); - for(int i=0; i SELECT _FUNC_(NULL, 1, NULL) FROM src LIMIT 1;\n" + - " 1" - ) +@description(name = "coalesce", value = "_FUNC_(a1, a2, ...) - Returns the first non-null argument", extended = "Example:\n" + + " > SELECT _FUNC_(NULL, 1, NULL) FROM src LIMIT 1;\n" + " 1") public class GenericUDFCoalesce extends GenericUDF { - private static Log LOG = LogFactory.getLog(GenericUDFCoalesce.class.getName()); + private static Log LOG = LogFactory + .getLog(GenericUDFCoalesce.class.getName()); ObjectInspector[] argumentOIs; GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; - + @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentTypeException { - - this.argumentOIs = arguments; - + + argumentOIs = arguments; + returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(); - for (int i=0; i 0) { sb.append(children[0]); - for(int i=1; i ret = new ArrayList(); + @Override public ObjectInspector initialize(ObjectInspector[] arguments) - throws UDFArgumentException { + throws UDFArgumentException { GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true); - for(int i=0; i, - * array> and so on (arbitrary levels of nesting). - * 4. It can do short-circuit evaluations using DeferedObject. + * The GenericUDF are superior to normal UDFs in the following ways: 1. It can + * accept arguments of complex types, and return complex types. 2. It can accept + * variable length of arguments. 3. It can accept an infinite number of function + * signature - for example, it's easy to write a GenericUDF that accepts + * array, array> and so on (arbitrary levels of nesting). 4. It + * can do short-circuit evaluations using DeferedObject. */ -@UDFType(deterministic=true) +@UDFType(deterministic = true) public abstract class GenericUDF { - + /** - * A Defered Object allows us to do lazy-evaluation - * and short-circuiting. + * A Defered Object allows us to do lazy-evaluation and short-circuiting. * GenericUDF use DeferedObject to pass arguments. */ public static interface DeferredObject { - public Object get() throws HiveException; + public Object get() throws HiveException; }; - + /** * The constructor */ @@ -58,26 +56,30 @@ * Initialize this GenericUDF. This will be called once and only once per * GenericUDF instance. * - * @param arguments The ObjectInspector for the arguments + * @param arguments + * The ObjectInspector for the arguments * @throws UDFArgumentException - * Thrown when arguments have wrong types, wrong length, etc. - * @return The ObjectInspector for the return value + * Thrown when arguments have wrong types, wrong length, etc. + * @return The ObjectInspector for the return value */ - public abstract ObjectInspector initialize(ObjectInspector[] arguments) + public abstract ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException; - + /** * Evaluate the GenericUDF with the arguments. - * @param arguments The arguments as DeferedObject, use DeferedObject.get() to - * get the actual argument Object. The Objects can be inspected - * by the ObjectInspectors passed in the initialize call. - * @return The + * + * @param arguments + * The arguments as DeferedObject, use DeferedObject.get() to get the + * actual argument Object. The Objects can be inspected by the + * ObjectInspectors passed in the initialize call. + * @return The */ - public abstract Object evaluate(DeferredObject[] arguments) throws HiveException; - + public abstract Object evaluate(DeferredObject[] arguments) + throws HiveException; + /** * Get the String to be displayed in explain. */ public abstract String getDisplayString(String[] children); - + } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFField.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFField.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFField.java (working copy) @@ -28,60 +28,60 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.IntWritable; -@description( - name="field", - value = "_FUNC_(str, str1, str2, ...) - returns the index of str in the str1,str2,... list or 0 if not found", - extended = "All primitive types are supported, arguments are compared using str.equals(x)." + - " If str is NULL, the return value is 0." - ) +@description(name = "field", value = "_FUNC_(str, str1, str2, ...) - returns the index of str in the str1,str2,... list or 0 if not found", extended = "All primitive types are supported, arguments are compared using str.equals(x)." + + " If str is NULL, the return value is 0.") public class GenericUDFField extends GenericUDF { - public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { if (arguments.length < 2) { - throw new UDFArgumentException("The function FIELD(str, str1, str2, ...) needs at least two arguments."); + throw new UDFArgumentException( + "The function FIELD(str, str1, str2, ...) needs at least two arguments."); } - + for (int i = 0; i < arguments.length; i++) { Category category = arguments[i].getCategory(); if (category != Category.PRIMITIVE) { - throw new UDFArgumentTypeException(i, - "The " + GenericUDFUtils.getOrdinal(i + 1) + " argument of function FIELD is expected to a " - + Category.PRIMITIVE.toString().toLowerCase() - + " type, but " + category.toString().toLowerCase() + " is found"); + throw new UDFArgumentTypeException(i, "The " + + GenericUDFUtils.getOrdinal(i + 1) + + " argument of function FIELD is expected to a " + + Category.PRIMITIVE.toString().toLowerCase() + " type, but " + + category.toString().toLowerCase() + " is found"); } } - + return PrimitiveObjectInspectorFactory.writableIntObjectInspector; } - - private IntWritable r = new IntWritable(); + private final IntWritable r = new IntWritable(); + @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { if (arguments[0].get() == null) { r.set(0); return r; } - - for (int i=1; i< arguments.length; i++) { + + for (int i = 1; i < arguments.length; i++) { if (arguments[0].get().equals(arguments[i].get())) { r.set(i); return r; } } - + r.set(0); return r; } @Override public String getDisplayString(String[] children) { - assert(children.length >= 2); - + assert (children.length >= 2); + final StringBuilder sb = new StringBuilder(); sb.append("field("); sb.append(StringUtils.join(children, ", ")); sb.append(")"); - + return sb.toString(); } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFElt.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFElt.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFElt.java (working copy) @@ -28,22 +28,16 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.Text; /** - * Generic UDF for string function ELT(N,str1,str2,str3,...). - * This mimics the function from MySQL + * Generic UDF for string function ELT(N,str1,str2,str3,...). This + * mimics the function from MySQL * http://dev.mysql.com/doc/refman/5.1/en/string-functions.html#function_elt * * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF */ -@description( - name = "elt", - value = "_FUNC_(n, str1, str2, ...) - returns the n-th string", - extended = "Example:\n" + - " > SELECT _FUNC_(1, 'face', 'book') FROM src LIMIT 1;\n" + - " 'face'" - ) +@description(name = "elt", value = "_FUNC_(n, str1, str2, ...) - returns the n-th string", extended = "Example:\n" + + " > SELECT _FUNC_(1, 'face', 'book') FROM src LIMIT 1;\n" + " 'face'") public class GenericUDFElt extends GenericUDF { ObjectInspectorConverters.Converter[] converters; @@ -56,19 +50,20 @@ "The function ELT(N,str1,str2,str3,...) needs at least two arguments."); } - for(int i = 0; i < arguments.length; i++) { + for (int i = 0; i < arguments.length; i++) { Category category = arguments[i].getCategory(); - if(category != Category.PRIMITIVE) { - throw new UDFArgumentTypeException(i, - "The " + GenericUDFUtils.getOrdinal(i + 1) + " argument of function ELT is expected to a " - + Category.PRIMITIVE.toString().toLowerCase() - + " type, but " + category.toString().toLowerCase() + " is found"); + if (category != Category.PRIMITIVE) { + throw new UDFArgumentTypeException(i, "The " + + GenericUDFUtils.getOrdinal(i + 1) + + " argument of function ELT is expected to a " + + Category.PRIMITIVE.toString().toLowerCase() + " type, but " + + category.toString().toLowerCase() + " is found"); } } converters = new ObjectInspectorConverters.Converter[arguments.length]; - for(int i = 0; i < arguments.length; i++) { - if(i == 0) { + for (int i = 0; i < arguments.length; i++) { + if (i == 0) { converters[i] = ObjectInspectorConverters.getConverter(arguments[i], PrimitiveObjectInspectorFactory.writableIntObjectInspector); } else { @@ -82,14 +77,16 @@ @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - IntWritable intWritable = (IntWritable)converters[0].convert(arguments[0].get()); - if(intWritable == null) { + IntWritable intWritable = (IntWritable) converters[0].convert(arguments[0] + .get()); + if (intWritable == null) { return null; } int index = intWritable.get(); - if (index <= 0 || index >= arguments.length) + if (index <= 0 || index >= arguments.length) { return null; - return (Text) converters[index].convert(arguments[index].get()); + } + return converters[index].convert(arguments[index].get()); } @Override Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java (working copy) @@ -29,47 +29,45 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -@description( - name = "explode", - value = "_FUNC_(a) - separates the elements of array a into multiple rows " -) +@description(name = "explode", value = "_FUNC_(a) - separates the elements of array a into multiple rows ") public class GenericUDTFExplode extends GenericUDTF { ListObjectInspector listOI = null; - + @Override - public void close() throws HiveException{ + public void close() throws HiveException { } - + @Override - public StructObjectInspector initialize(ObjectInspector [] args) - throws UDFArgumentException { - + public StructObjectInspector initialize(ObjectInspector[] args) + throws UDFArgumentException { + if (args.length != 1) { throw new UDFArgumentException("explode() takes only one argument"); } - + if (args[0].getCategory() != ObjectInspector.Category.LIST) { throw new UDFArgumentException("explode() takes an array as a parameter"); } - listOI = (ListObjectInspector)args[0]; - + listOI = (ListObjectInspector) args[0]; + ArrayList fieldNames = new ArrayList(); ArrayList fieldOIs = new ArrayList(); fieldNames.add("col"); fieldOIs.add(listOI.getListElementObjectInspector()); - return ObjectInspectorFactory.getStandardStructObjectInspector( - fieldNames, fieldOIs); + return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, + fieldOIs); } Object forwardObj[] = new Object[1]; + @Override - public void process(Object [] o) throws HiveException { - + public void process(Object[] o) throws HiveException { + List list = listOI.getList(o[0]); for (Object r : list) { forwardObj[0] = r; - this.forward(forwardObj); + forward(forwardObj); } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java (working copy) @@ -41,73 +41,63 @@ import org.apache.hadoop.util.StringUtils; /** - * Compute the variance. This class is extended by: - * GenericUDAFVarianceSample - * GenericUDAFStd - * GenericUDAFStdSample - * + * Compute the variance. This class is extended by: GenericUDAFVarianceSample + * GenericUDAFStd GenericUDAFStdSample + * */ -@description( - name = "variance,var_pop", - value = "_FUNC_(x) - Returns the variance of a set of numbers" -) +@description(name = "variance,var_pop", value = "_FUNC_(x) - Returns the variance of a set of numbers") public class GenericUDAFVariance implements GenericUDAFResolver { - + static final Log LOG = LogFactory.getLog(GenericUDAFVariance.class.getName()); - + @Override - public GenericUDAFEvaluator getEvaluator( - TypeInfo[] parameters) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException { if (parameters.length != 1) { throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected."); } - + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentTypeException(0, - "Only primitive type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); + "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); } - switch (((PrimitiveTypeInfo)parameters[0]).getPrimitiveCategory()) { - case BYTE: - case SHORT: - case INT: - case LONG: - case FLOAT: - case DOUBLE: - case STRING: - return new GenericUDAFVarianceEvaluator(); - case BOOLEAN: - default: - throw new UDFArgumentTypeException(0, - "Only numeric or string type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); + switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { + case BYTE: + case SHORT: + case INT: + case LONG: + case FLOAT: + case DOUBLE: + case STRING: + return new GenericUDAFVarianceEvaluator(); + case BOOLEAN: + default: + throw new UDFArgumentTypeException(0, + "Only numeric or string type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); } } - + /** * Evaluate the variance using the following modification of the formula from * The Art of Computer Programming, vol. 2, p. 232: - * - * variance = variance1 + variance2 + n*alpha^2 + m*betha^2 - * - * where: - * - variance is sum[x-avg^2] (this is actually n times the variance) and is - * updated at every step. - * - n is the count of elements in chunk1 - * - m is the count of elements in chunk2 - * - alpha = avg-a - * - betha = avg-b - * - avg is the the average of all elements from both chunks - * - a is the average of elements in chunk1 - * - b is the average of elements in chunk2 * + * variance = variance1 + variance2 + n*alpha^2 + m*betha^2 + * + * where: - variance is sum[x-avg^2] (this is actually n times the variance) + * and is updated at every step. - n is the count of elements in chunk1 - m is + * the count of elements in chunk2 - alpha = avg-a - betha = avg-b - avg is + * the the average of all elements from both chunks - a is the average of + * elements in chunk1 - b is the average of elements in chunk2 + * */ public static class GenericUDAFVarianceEvaluator extends GenericUDAFEvaluator { - + // For PARTIAL1 and COMPLETE PrimitiveObjectInspector inputOI; - + // For PARTIAL2 and FINAL StructObjectInspector soi; StructField countField; @@ -116,60 +106,60 @@ LongObjectInspector countFieldOI; DoubleObjectInspector sumFieldOI; DoubleObjectInspector varianceFieldOI; - + // For PARTIAL1 and PARTIAL2 Object[] partialResult; - + // For FINAL and COMPLETE DoubleWritable result; - + @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { - assert(parameters.length == 1); + assert (parameters.length == 1); super.init(m, parameters); - + // init input - if (mode == mode.PARTIAL1 || mode == mode.COMPLETE) { - inputOI = (PrimitiveObjectInspector)parameters[0]; + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { + inputOI = (PrimitiveObjectInspector) parameters[0]; } else { - soi = (StructObjectInspector)parameters[0]; - + soi = (StructObjectInspector) parameters[0]; + countField = soi.getStructFieldRef("count"); sumField = soi.getStructFieldRef("sum"); varianceField = soi.getStructFieldRef("variance"); - - countFieldOI = - (LongObjectInspector)countField.getFieldObjectInspector(); - sumFieldOI = (DoubleObjectInspector)sumField.getFieldObjectInspector(); - varianceFieldOI = - (DoubleObjectInspector)varianceField.getFieldObjectInspector(); + + countFieldOI = (LongObjectInspector) countField + .getFieldObjectInspector(); + sumFieldOI = (DoubleObjectInspector) sumField.getFieldObjectInspector(); + varianceFieldOI = (DoubleObjectInspector) varianceField + .getFieldObjectInspector(); } - + // init output - if (mode == mode.PARTIAL1 || mode == mode.PARTIAL2) { + if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { // The output of a partial aggregation is a struct containing - // a long count and doubles sum and variance. - + // a long count and doubles sum and variance. + ArrayList foi = new ArrayList(); - + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - + ArrayList fname = new ArrayList(); fname.add("count"); fname.add("sum"); fname.add("variance"); - + partialResult = new Object[3]; partialResult[0] = new LongWritable(0); partialResult[1] = new DoubleWritable(0); partialResult[2] = new DoubleWritable(0); - - return ObjectInspectorFactory.getStandardStructObjectInspector( - fname, foi); - + + return ObjectInspectorFactory.getStandardStructObjectInspector(fname, + foi); + } else { result = new DoubleWritable(0); return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; @@ -191,42 +181,44 @@ @Override public void reset(AggregationBuffer agg) throws HiveException { - StdAgg myagg = (StdAgg)agg; + StdAgg myagg = (StdAgg) agg; myagg.count = 0; - myagg.sum = 0; + myagg.sum = 0; myagg.variance = 0; } - + boolean warned = false; - + @Override - public void iterate(AggregationBuffer agg, Object[] parameters) - throws HiveException { - assert(parameters.length == 1); + public void iterate(AggregationBuffer agg, Object[] parameters) + throws HiveException { + assert (parameters.length == 1); Object p = parameters[0]; if (p != null) { - StdAgg myagg = (StdAgg)agg; + StdAgg myagg = (StdAgg) agg; try { - double v = PrimitiveObjectInspectorUtils.getDouble(p, - (PrimitiveObjectInspector)inputOI); - - if(myagg.count != 0) { // if count==0 => the variance is going to be 0 - // after 1 iteration - double alpha = (myagg.sum + v) / (myagg.count+1) - - myagg.sum / myagg.count; - double betha = (myagg.sum + v) / (myagg.count+1) - v; - + double v = PrimitiveObjectInspectorUtils.getDouble(p, inputOI); + + if (myagg.count != 0) { // if count==0 => the variance is going to be + // 0 + // after 1 iteration + double alpha = (myagg.sum + v) / (myagg.count + 1) - myagg.sum + / myagg.count; + double betha = (myagg.sum + v) / (myagg.count + 1) - v; + // variance = variance1 + variance2 + n*alpha^2 + m*betha^2 // => variance += n*alpha^2 + betha^2 - myagg.variance += myagg.count*alpha*alpha + betha*betha; + myagg.variance += myagg.count * alpha * alpha + betha * betha; } myagg.count++; myagg.sum += v; } catch (NumberFormatException e) { if (!warned) { warned = true; - LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); - LOG.warn(getClass().getSimpleName() + " ignoring similar exceptions."); + LOG.warn(getClass().getSimpleName() + " " + + StringUtils.stringifyException(e)); + LOG.warn(getClass().getSimpleName() + + " ignoring similar exceptions."); } } } @@ -234,60 +226,61 @@ @Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { - StdAgg myagg = (StdAgg)agg; - ((LongWritable)partialResult[0]).set(myagg.count); - ((DoubleWritable)partialResult[1]).set(myagg.sum); - ((DoubleWritable)partialResult[2]).set(myagg.variance); + StdAgg myagg = (StdAgg) agg; + ((LongWritable) partialResult[0]).set(myagg.count); + ((DoubleWritable) partialResult[1]).set(myagg.sum); + ((DoubleWritable) partialResult[2]).set(myagg.variance); return partialResult; } @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException { + public void merge(AggregationBuffer agg, Object partial) + throws HiveException { if (partial != null) { - StdAgg myagg = (StdAgg)agg; - + StdAgg myagg = (StdAgg) agg; + Object partialCount = soi.getStructFieldData(partial, countField); Object partialSum = soi.getStructFieldData(partial, sumField); Object partialVariance = soi.getStructFieldData(partial, varianceField); - + long n = myagg.count; long m = countFieldOI.get(partialCount); - - if(n == 0) { + + if (n == 0) { // Just copy the information since there is nothing so far myagg.variance = sumFieldOI.get(partialVariance); myagg.count = countFieldOI.get(partialCount); myagg.sum = sumFieldOI.get(partialSum); } - - if(m != 0 && n != 0) { + + if (m != 0 && n != 0) { // Merge the two partials - + double a = myagg.sum; double b = sumFieldOI.get(partialSum); - - double alpha = (a+b)/(n+m) - a/n; - double betha = (a+b)/(n+m) - b/m; - + + double alpha = (a + b) / (n + m) - a / n; + double betha = (a + b) / (n + m) - b / m; + // variance = variance1 + variance2 + n*alpha^2 + m*betha^2 myagg.variance += sumFieldOI.get(partialVariance) - + (n*alpha*alpha + m*betha*betha); + + (n * alpha * alpha + m * betha * betha); myagg.count += m; myagg.sum += b; } - + } } @Override public Object terminate(AggregationBuffer agg) throws HiveException { - StdAgg myagg = (StdAgg)agg; - + StdAgg myagg = (StdAgg) agg; + if (myagg.count == 0) { // SQL standard - return null for zero elements return null; } else { - if(myagg.count > 1) { - result.set(myagg.variance / (myagg.count)); + if (myagg.count > 1) { + result.set(myagg.variance / (myagg.count)); } else { // for one element the variance is always 0 result.set(0); } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/Collector.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/Collector.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/Collector.java (working copy) @@ -26,6 +26,7 @@ public interface Collector { /** * Other classes will call collect() with the data that it has. + * * @param input */ void collect(Object input) throws HiveException; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStdSample.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStdSample.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStdSample.java (working copy) @@ -26,63 +26,61 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** - * Compute the sample standard deviation by extending GenericUDAFVariance and + * Compute the sample standard deviation by extending GenericUDAFVariance and * overriding the terminate() method of the evaluator. - * + * */ -@description( - name = "stddev_samp", - value = "_FUNC_(x) - Returns the sample standard deviation of a set of " + - "numbers" -) +@description(name = "stddev_samp", value = "_FUNC_(x) - Returns the sample standard deviation of a set of " + + "numbers") public class GenericUDAFStdSample extends GenericUDAFVariance { - + @Override - public GenericUDAFEvaluator getEvaluator( - TypeInfo[] parameters) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException { if (parameters.length != 1) { throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected."); } - + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentTypeException(0, - "Only primitive type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); + "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); } - switch (((PrimitiveTypeInfo)parameters[0]).getPrimitiveCategory()) { - case BYTE: - case SHORT: - case INT: - case LONG: - case FLOAT: - case DOUBLE: - case STRING: - return new GenericUDAFStdSampleEvaluator(); - case BOOLEAN: - default: - throw new UDFArgumentTypeException(0, - "Only numeric or string type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); + switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { + case BYTE: + case SHORT: + case INT: + case LONG: + case FLOAT: + case DOUBLE: + case STRING: + return new GenericUDAFStdSampleEvaluator(); + case BOOLEAN: + default: + throw new UDFArgumentTypeException(0, + "Only numeric or string type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); } } - + /** - * Compute the sample standard deviation by extending + * Compute the sample standard deviation by extending * GenericUDAFVarianceEvaluator and overriding the terminate() method of the - * evaluator + * evaluator */ - public static class GenericUDAFStdSampleEvaluator extends GenericUDAFVarianceEvaluator { + public static class GenericUDAFStdSampleEvaluator extends + GenericUDAFVarianceEvaluator { @Override public Object terminate(AggregationBuffer agg) throws HiveException { - StdAgg myagg = (StdAgg)agg; - + StdAgg myagg = (StdAgg) agg; + if (myagg.count == 0) { // SQL standard - return null for zero elements return null; } else { - if(myagg.count > 1) { - result.set(Math.sqrt(myagg.variance / (myagg.count-1))); + if (myagg.count > 1) { + result.set(Math.sqrt(myagg.variance / (myagg.count - 1))); } else { // for one element the variance is always 0 result.set(0); } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInstr.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInstr.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInstr.java (working copy) @@ -31,26 +31,23 @@ import org.apache.hadoop.io.Text; /** - * Generic UDF for string function INSTR(str,substr). - * This mimcs the function from MySQL + * Generic UDF for string function INSTR(str,substr). This mimcs + * the function from MySQL * http://dev.mysql.com/doc/refman/5.1/en/string-functions.html#function_instr - *
 
+ * 
+ * 
  * usage:
  * INSTR(str, substr)
- * 

+ *

+ *

*/ -@description( - name = "instr", - value = "_FUNC_(str, substr) - Returns the index of the first occurance " + - "of substr in str", - extended = "Example:\n" + - " > SELECT _FUNC_('Facebook', 'boo') FROM src LIMIT 1;\n" + - " 5" - ) -public class GenericUDFInstr extends GenericUDF{ +@description(name = "instr", value = "_FUNC_(str, substr) - Returns the index of the first occurance " + + "of substr in str", extended = "Example:\n" + + " > SELECT _FUNC_('Facebook', 'boo') FROM src LIMIT 1;\n" + " 5") +public class GenericUDFInstr extends GenericUDF { ObjectInspectorConverters.Converter[] converters; - + @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -59,40 +56,43 @@ "The function INSTR accepts exactly 2 arguments."); } - for(int i = 0; i < arguments.length; i++) { + for (int i = 0; i < arguments.length; i++) { Category category = arguments[i].getCategory(); - if(category != Category.PRIMITIVE) { - throw new UDFArgumentTypeException(i, - "The " + GenericUDFUtils.getOrdinal(i + 1) + " argument of function INSTR is expected to a " - + Category.PRIMITIVE.toString().toLowerCase() - + " type, but " + category.toString().toLowerCase() + " is found"); + if (category != Category.PRIMITIVE) { + throw new UDFArgumentTypeException(i, "The " + + GenericUDFUtils.getOrdinal(i + 1) + + " argument of function INSTR is expected to a " + + Category.PRIMITIVE.toString().toLowerCase() + " type, but " + + category.toString().toLowerCase() + " is found"); } } converters = new ObjectInspectorConverters.Converter[arguments.length]; - for(int i = 0; i < arguments.length; i++) { - converters[i] = ObjectInspectorConverters.getConverter(arguments[i], - PrimitiveObjectInspectorFactory.writableStringObjectInspector); + for (int i = 0; i < arguments.length; i++) { + converters[i] = ObjectInspectorConverters.getConverter(arguments[i], + PrimitiveObjectInspectorFactory.writableStringObjectInspector); } return PrimitiveObjectInspectorFactory.writableIntObjectInspector; } - + IntWritable intWritable = new IntWritable(0); + @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - if(arguments[0].get() == null || arguments[1].get() == null) + if (arguments[0].get() == null || arguments[1].get() == null) { return null; + } Text text = (Text) converters[0].convert(arguments[0].get()); Text subtext = (Text) converters[1].convert(arguments[1].get()); intWritable.set(GenericUDFUtils.findText(text, subtext, 0) + 1); - return intWritable; + return intWritable; } @Override public String getDisplayString(String[] children) { - assert(children.length == 2); + assert (children.length == 2); return "instr(" + children[0] + children[1] + ")"; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java (working copy) @@ -20,26 +20,18 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * GenericUDF Class for SQL construct "CASE a WHEN b THEN c [ELSE f] END". * - * NOTES: - * 1. a and b should have the same TypeInfo, or an exception will be thrown. - * 2. c and f should have the same TypeInfo, or an exception will be thrown. + * NOTES: 1. a and b should have the same TypeInfo, or an exception will be + * thrown. 2. c and f should have the same TypeInfo, or an exception will be + * thrown. */ public class GenericUDFWhen extends GenericUDF { @@ -48,70 +40,72 @@ ObjectInspector[] argumentOIs; GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; GenericUDFUtils.ReturnObjectInspectorResolver caseOIResolver; - + @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentTypeException { - - this.argumentOIs = arguments; + + argumentOIs = arguments; returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(); - - for (int i=0; i+1= 2); + assert (children.length >= 2); StringBuilder sb = new StringBuilder(); sb.append("CASE"); - for(int i=0; i+1 SELECT _FUNC_('facebook ') FROM src LIMIT 1;\n" + - " 'facebook'" - ) +@description(name = "rtrim", value = "_FUNC_(str) - Removes the trailing space characters from str ", extended = "Example:\n" + + " > SELECT _FUNC_('facebook ') FROM src LIMIT 1;\n" + " 'facebook'") public class UDFRTrim extends UDF { Text result = new Text(); + public UDFRTrim() { } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java (working copy) @@ -24,7 +24,6 @@ import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.lazy.LazyByte; import org.apache.hadoop.hive.serde2.lazy.LazyShort; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; @@ -33,128 +32,136 @@ import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; - public class UDFToShort extends UDF { private static Log LOG = LogFactory.getLog(UDFToByte.class.getName()); ShortWritable shortWritable = new ShortWritable(); - + public UDFToShort() { } /** * Convert from void to a short. This is called for CAST(... AS SMALLINT) - * - * @param i The void value to convert + * + * @param i + * The void value to convert * @return ShortWritable */ - public ShortWritable evaluate(NullWritable i) { + public ShortWritable evaluate(NullWritable i) { return null; } /** * Convert from boolean to a short. This is called for CAST(... AS SMALLINT) - * - * @param i The boolean value to convert + * + * @param i + * The boolean value to convert * @return ShortWritable */ - public ShortWritable evaluate(BooleanWritable i) { + public ShortWritable evaluate(BooleanWritable i) { if (i == null) { return null; } else { - shortWritable.set(i.get() ? (short)1 : (short)0); + shortWritable.set(i.get() ? (short) 1 : (short) 0); return shortWritable; } } /** * Convert from byte to a short. This is called for CAST(... AS SMALLINT) - * - * @param i The byte value to convert + * + * @param i + * The byte value to convert * @return ShortWritable */ - public ShortWritable evaluate(ByteWritable i) { + public ShortWritable evaluate(ByteWritable i) { if (i == null) { return null; } else { - shortWritable.set((short)i.get()); + shortWritable.set(i.get()); return shortWritable; } } /** * Convert from integer to a short. This is called for CAST(... AS SMALLINT) - * - * @param i The integer value to convert + * + * @param i + * The integer value to convert * @return ShortWritable */ - public ShortWritable evaluate(IntWritable i) { + public ShortWritable evaluate(IntWritable i) { if (i == null) { return null; } else { - shortWritable.set((short)i.get()); + shortWritable.set((short) i.get()); return shortWritable; } } /** * Convert from long to a short. This is called for CAST(... AS SMALLINT) - * - * @param i The long value to convert + * + * @param i + * The long value to convert * @return ShortWritable */ - public ShortWritable evaluate(LongWritable i) { + public ShortWritable evaluate(LongWritable i) { if (i == null) { return null; } else { - shortWritable.set((short)i.get()); + shortWritable.set((short) i.get()); return shortWritable; } } - + /** * Convert from float to a short. This is called for CAST(... AS SMALLINT) - * - * @param i The float value to convert + * + * @param i + * The float value to convert * @return ShortWritable */ - public ShortWritable evaluate(FloatWritable i) { + public ShortWritable evaluate(FloatWritable i) { if (i == null) { return null; } else { - shortWritable.set((short)i.get()); + shortWritable.set((short) i.get()); return shortWritable; } } - + /** * Convert from double to a short. This is called for CAST(... AS SMALLINT) - * - * @param i The double value to convert + * + * @param i + * The double value to convert * @return ShortWritable */ - public ShortWritable evaluate(DoubleWritable i) { + public ShortWritable evaluate(DoubleWritable i) { if (i == null) { return null; } else { - shortWritable.set((short)i.get()); + shortWritable.set((short) i.get()); return shortWritable; } } - + /** * Convert from string to a short. This is called for CAST(... AS SMALLINT) - * - * @param i The string value to convert + * + * @param i + * The string value to convert * @return ShortWritable */ - public ShortWritable evaluate(Text i) { + public ShortWritable evaluate(Text i) { if (i == null) { return null; } else { try { - shortWritable.set(LazyShort.parseShort(i.getBytes(), 0 , i.getLength(), 10)); + shortWritable.set(LazyShort.parseShort(i.getBytes(), 0, i.getLength(), + 10)); return shortWritable; } catch (NumberFormatException e) { // MySQL returns 0 if the string is not a well-formed numeric value. Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnixTimeStamp.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnixTimeStamp.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnixTimeStamp.java (working copy) @@ -18,9 +18,8 @@ package org.apache.hadoop.hive.ql.udf; +import java.text.ParseException; import java.text.SimpleDateFormat; -import java.text.ParseException; -import java.util.TimeZone; import java.util.Date; import org.apache.commons.logging.Log; @@ -30,30 +29,28 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; - -@UDFType(deterministic=false) -@description( - name = "unix_timestamp", - value = "_FUNC_([date[, pattern]]) - Returns the UNIX timestamp", - extended = "Converts the current or specified time to number of seconds " + - "since 1970-01-01." - ) +@UDFType(deterministic = false) +@description(name = "unix_timestamp", value = "_FUNC_([date[, pattern]]) - Returns the UNIX timestamp", extended = "Converts the current or specified time to number of seconds " + + "since 1970-01-01.") public class UDFUnixTimeStamp extends UDF { private static Log LOG = LogFactory.getLog(UDFUnixTimeStamp.class.getName()); - // For now, we just use the default time zone. - private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + // For now, we just use the default time zone. + private final SimpleDateFormat formatter = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); LongWritable result = new LongWritable(); + public UDFUnixTimeStamp() { } /** * Return current UnixTime. + * * @return long Number of seconds from 1970-01-01 00:00:00 */ - public LongWritable evaluate() { + public LongWritable evaluate() { Date date = new Date(); result.set(date.getTime() / 1000); return result; @@ -61,16 +58,18 @@ /** * Convert time string to UnixTime. - * @param dateText Time string in format yyyy-MM-dd HH:mm:ss + * + * @param dateText + * Time string in format yyyy-MM-dd HH:mm:ss * @return long Number of seconds from 1970-01-01 00:00:00 */ - public LongWritable evaluate(Text dateText) { + public LongWritable evaluate(Text dateText) { if (dateText == null) { return null; } try { - Date date = (Date)formatter.parse(dateText.toString()); + Date date = formatter.parse(dateText.toString()); result.set(date.getTime() / 1000); return result; } catch (ParseException e) { @@ -79,13 +78,17 @@ } Text lastPatternText = new Text(); + /** * Convert time string to UnixTime with user defined pattern. - * @param dateText Time string in format patternstring - * @param patternText Time patterns string supported by SimpleDateFormat + * + * @param dateText + * Time string in format patternstring + * @param patternText + * Time patterns string supported by SimpleDateFormat * @return long Number of seconds from 1970-01-01 00:00:00 */ - public LongWritable evaluate(Text dateText, Text patternText) { + public LongWritable evaluate(Text dateText, Text patternText) { if (dateText == null || patternText == null) { return null; } @@ -93,7 +96,7 @@ if (!patternText.equals(lastPatternText)) { formatter.applyPattern(patternText.toString()); lastPatternText.set(patternText); - } + } } catch (Exception e) { return null; } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java (working copy) @@ -31,123 +31,130 @@ import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; - public class UDFToFloat extends UDF { private static Log LOG = LogFactory.getLog(UDFToFloat.class.getName()); FloatWritable floatWritable = new FloatWritable(); - + public UDFToFloat() { } /** * Convert from void to a float. This is called for CAST(... AS FLOAT) - * - * @param i The void value to convert + * + * @param i + * The void value to convert * @return FloatWritable */ - public FloatWritable evaluate(NullWritable i) { + public FloatWritable evaluate(NullWritable i) { return null; } /** * Convert from boolean to a float. This is called for CAST(... AS FLOAT) - * - * @param i The boolean value to convert + * + * @param i + * The boolean value to convert * @return FloatWritable */ - public FloatWritable evaluate(BooleanWritable i) { + public FloatWritable evaluate(BooleanWritable i) { if (i == null) { return null; } else { - floatWritable.set(i.get() ? (float)1.0 : (float)0.0); + floatWritable.set(i.get() ? (float) 1.0 : (float) 0.0); return floatWritable; } } - + /** * Convert from byte to a float. This is called for CAST(... AS FLOAT) - * - * @param i The byte value to convert + * + * @param i + * The byte value to convert * @return FloatWritable */ - public FloatWritable evaluate(ByteWritable i) { + public FloatWritable evaluate(ByteWritable i) { if (i == null) { return null; } else { - floatWritable.set((float)i.get()); + floatWritable.set(i.get()); return floatWritable; } } - + /** * Convert from short to a float. This is called for CAST(... AS FLOAT) - * - * @param i The short value to convert + * + * @param i + * The short value to convert * @return FloatWritable */ - public FloatWritable evaluate(ShortWritable i) { + public FloatWritable evaluate(ShortWritable i) { if (i == null) { return null; } else { - floatWritable.set((float)i.get()); + floatWritable.set(i.get()); return floatWritable; } } - + /** * Convert from integer to a float. This is called for CAST(... AS FLOAT) - * - * @param i The integer value to convert + * + * @param i + * The integer value to convert * @return FloatWritable */ - public FloatWritable evaluate(IntWritable i) { + public FloatWritable evaluate(IntWritable i) { if (i == null) { return null; } else { - floatWritable.set((float)i.get()); + floatWritable.set(i.get()); return floatWritable; } } - + /** * Convert from long to a float. This is called for CAST(... AS FLOAT) - * - * @param i The long value to convert + * + * @param i + * The long value to convert * @return FloatWritable */ - public FloatWritable evaluate(LongWritable i) { + public FloatWritable evaluate(LongWritable i) { if (i == null) { return null; } else { - floatWritable.set((float)i.get()); + floatWritable.set(i.get()); return floatWritable; } } /** * Convert from double to a float. This is called for CAST(... AS FLOAT) - * - * @param i The double value to convert + * + * @param i + * The double value to convert * @return FloatWritable - */ - public FloatWritable evaluate(DoubleWritable i) { + */ + public FloatWritable evaluate(DoubleWritable i) { if (i == null) { return null; } else { - floatWritable.set((float)i.get()); + floatWritable.set((float) i.get()); return floatWritable; } } - + /** * Convert from string to a float. This is called for CAST(... AS FLOAT) - * - * @param i The string value to convert + * + * @param i + * The string value to convert * @return FloatWritable */ - public FloatWritable evaluate(Text i) { + public FloatWritable evaluate(Text i) { if (i == null) { return null; } else { @@ -161,5 +168,5 @@ } } } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java (working copy) @@ -30,38 +30,37 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@description( - name = "day,dayofmonth", - value = "_FUNC_(date) - Returns the date of the month of date", - extended = "date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or " + - "'yyyy-MM-dd'.\n" + - "Example:\n " + - " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + - " 30" - ) +@description(name = "day,dayofmonth", value = "_FUNC_(date) - Returns the date of the month of date", extended = "date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or " + + "'yyyy-MM-dd'.\n" + + "Example:\n " + + " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + " 30") public class UDFDayOfMonth extends UDF { private static Log LOG = LogFactory.getLog(UDFDayOfMonth.class.getName()); - private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private Calendar calendar = Calendar.getInstance(); + private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private final Calendar calendar = Calendar.getInstance(); IntWritable result = new IntWritable(); + public UDFDayOfMonth() { } /** * Get the day of month from a date string. * - * @param dateString the dateString in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd". - * @return an int from 1 to 31. null if the dateString is not a valid date string. + * @param dateString + * the dateString in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy-MM-dd". + * @return an int from 1 to 31. null if the dateString is not a valid date + * string. */ - public IntWritable evaluate(Text dateString) { - + public IntWritable evaluate(Text dateString) { + if (dateString == null) { return null; } - + try { Date date = formatter.parse(dateString.toString()); calendar.setTime(date); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseBitOP.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseBitOP.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseBitOP.java (working copy) @@ -25,8 +25,8 @@ import org.apache.hadoop.io.LongWritable; /** - * Base class for numeric operators like +, -, / etc. All these operators - * share a common method resolver (NumericOpMethodResolver). + * Base class for numeric operators like +, -, / etc. All these operators share + * a common method resolver (NumericOpMethodResolver). */ public abstract class UDFBaseBitOP extends UDF { @@ -41,5 +41,5 @@ protected ShortWritable shortWritable = new ShortWritable(); protected IntWritable intWritable = new IntWritable(); protected LongWritable longWritable = new LongWritable(); - + } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLn.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLn.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLn.java (working copy) @@ -24,26 +24,21 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -@description( - name = "ln", - value = "_FUNC_(x) - Returns the natural logarithm of x", - extended = "Example:\n" + - " > SELECT _FUNC_(1) FROM src LIMIT 1;\n" + - " 0" - ) +@description(name = "ln", value = "_FUNC_(x) - Returns the natural logarithm of x", extended = "Example:\n" + + " > SELECT _FUNC_(1) FROM src LIMIT 1;\n" + " 0") public class UDFLn extends UDF { private static Log LOG = LogFactory.getLog(UDFLn.class.getName()); DoubleWritable result = new DoubleWritable(); - + public UDFLn() { } /** * Returns the natural logarithm of "a". */ - public DoubleWritable evaluate(DoubleWritable a) { + public DoubleWritable evaluate(DoubleWritable a) { if (a == null || a.get() <= 0.0) { return null; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java (working copy) @@ -19,113 +19,99 @@ package org.apache.hadoop.hive.ql.udf; import java.util.ArrayList; -import java.util.Collection; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.json.JSONArray; -import org.json.JSONObject; -import org.json.JSONException; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.io.Text; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; -@description( - name = "get_json_object", - value = "_FUNC_(json_txt, path) - Extract a json object from path ", - extended = "Extract json object from a json string based on json path " + - "specified, and return json string of the extracted json object. It " + - "will return null if the input json string is invalid.\n" + - "A limited version of JSONPath supported:\n" + - " $ : Root object\n" + - " . : Child operator\n" + - " [] : Subscript operator for array\n" + - " * : Wildcard for []\n" + - "Syntax not supported that's worth noticing:\n" + - " '' : Zero length string as key\n" + - " .. : Recursive descent\n" + - " &#064; : Current object/element\n" + - " () : Script expression\n" + - " ?() : Filter (script) expression.\n" + - " [,] : Union operator\n" + - " [start:end:step] : array slice operator\n" - ) +@description(name = "get_json_object", value = "_FUNC_(json_txt, path) - Extract a json object from path ", extended = "Extract json object from a json string based on json path " + + "specified, and return json string of the extracted json object. It " + + "will return null if the input json string is invalid.\n" + + "A limited version of JSONPath supported:\n" + + " $ : Root object\n" + + " . : Child operator\n" + + " [] : Subscript operator for array\n" + + " * : Wildcard for []\n" + + "Syntax not supported that's worth noticing:\n" + + " '' : Zero length string as key\n" + + " .. : Recursive descent\n" + + " &#064; : Current object/element\n" + + " () : Script expression\n" + + " ?() : Filter (script) expression.\n" + + " [,] : Union operator\n" + + " [start:end:step] : array slice operator\n") public class UDFJson extends UDF { private static Log LOG = LogFactory.getLog(UDFJson.class.getName()); - private Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-]+).*"); - private Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]"); + private final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-]+).*"); + private final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]"); // An LRU cache using a linked hash map - static class HashCache extends LinkedHashMap { - - private static final int CACHE_SIZE = 16; - private static final int INIT_SIZE = 32; + static class HashCache extends LinkedHashMap { + + private static final int CACHE_SIZE = 16; + private static final int INIT_SIZE = 32; private static final float LOAD_FACTOR = 0.6f; - + HashCache() { - super(INIT_SIZE,LOAD_FACTOR); + super(INIT_SIZE, LOAD_FACTOR); } + private static final long serialVersionUID = 1; - @Override - protected boolean removeEldestEntry (Map.Entry eldest) { - return size() > CACHE_SIZE; + + @Override + protected boolean removeEldestEntry(Map.Entry eldest) { + return size() > CACHE_SIZE; } } - - static Map extractObjectCache = - new HashCache(); - static Map pathExprCache = - new HashCache(); - static Map> indexListCache = - new HashCache>(); - static Map mKeyGroup1Cache = - new HashCache(); - static Map mKeyMatchesCache = - new HashCache(); - + + static Map extractObjectCache = new HashCache(); + static Map pathExprCache = new HashCache(); + static Map> indexListCache = new HashCache>(); + static Map mKeyGroup1Cache = new HashCache(); + static Map mKeyMatchesCache = new HashCache(); + Text result = new Text(); - + public UDFJson() { } /** - * Extract json object from a json string based on json path specified, - * and return json string of the extracted json object. It will return null - * if the input json string is invalid. - * - * A limited version of JSONPath supported: - * $ : Root object - * . : Child operator - * [] : Subscript operator for array - * * : Wildcard for [] - * - * Syntax not supported that's worth noticing: - * '' : Zero length string as key - * .. : Recursive descent - * &#064; : Current object/element - * () : Script expression - * ?() : Filter (script) expression. - * [,] : Union operator - * [start:end:step] : array slice operator - * - * @param jsonString the json string. - * @param pathString the json path expression. + * Extract json object from a json string based on json path specified, and + * return json string of the extracted json object. It will return null if the + * input json string is invalid. + * + * A limited version of JSONPath supported: $ : Root object . : Child operator + * [] : Subscript operator for array * : Wildcard for [] + * + * Syntax not supported that's worth noticing: '' : Zero length string as key + * .. : Recursive descent &#064; : Current object/element () : Script + * expression ?() : Filter (script) expression. [,] : Union operator + * [start:end:step] : array slice operator + * + * @param jsonString + * the json string. + * @param pathString + * the json path expression. * @return json string or null when an error happens. */ public Text evaluate(String jsonString, String pathString) { - if(jsonString == null || jsonString == "" || - pathString == null || pathString == "") { + if (jsonString == null || jsonString == "" || pathString == null + || pathString == "") { return null; } - + try { // Cache pathExpr String[] pathExpr = pathExprCache.get(pathString); @@ -133,13 +119,13 @@ pathExpr = pathString.split("\\.", -1); pathExprCache.put(pathString, pathExpr); } - + if (!pathExpr[0].equalsIgnoreCase("$")) { return null; } // Cache extractObject Object extractObject = extractObjectCache.get(jsonString); - if(extractObject == null) { + if (extractObject == null) { extractObject = new JSONObject(jsonString); extractObjectCache.put(jsonString, extractObject); } @@ -166,7 +152,7 @@ if (!mKeyMatches.booleanValue()) { return null; } - + // Cache mkey.group(1) String mKeyGroup1 = mKeyGroup1Cache.get(path); if (mKeyGroup1 == null) { @@ -177,10 +163,10 @@ mKeyGroup1Cache.put(path, mKeyGroup1); } json = extract_json_withkey(json, mKeyGroup1); - + // Cache indexList ArrayList indexList = indexListCache.get(path); - if(indexList == null) { + if (indexList == null) { Matcher mIndex = patternIndex.matcher(path); indexList = new ArrayList(); while (mIndex.find()) { @@ -192,15 +178,15 @@ if (indexList.size() > 0) { json = extract_json_withindex(json, indexList); } - + return json; } ArrayList jsonList = new ArrayList(); - + private Object extract_json_withindex(Object json, ArrayList indexList) throws JSONException { - + jsonList.clear(); jsonList.add(json); Iterator itr = indexList.iterator(); @@ -208,9 +194,9 @@ String index = itr.next(); ArrayList tmp_jsonList = new ArrayList(); if (index.equalsIgnoreCase("*")) { - for (int i = 0; i < ((ArrayList) jsonList).size(); i++) { + for (int i = 0; i < (jsonList).size(); i++) { try { - JSONArray array = (JSONArray) ((ArrayList) jsonList).get(i); + JSONArray array = (JSONArray) (jsonList).get(i); for (int j = 0; j < array.length(); j++) { tmp_jsonList.add(array.get(j)); } @@ -220,11 +206,10 @@ } jsonList = tmp_jsonList; } else { - for (int i = 0; i < ((ArrayList) jsonList).size(); i++) { + for (int i = 0; i < (jsonList).size(); i++) { try { - tmp_jsonList - .add(((JSONArray) ((ArrayList) jsonList).get(i)) - .get(Integer.parseInt(index))); + tmp_jsonList.add(((JSONArray) (jsonList).get(i)).get(Integer + .parseInt(index))); } catch (ClassCastException e) { continue; } catch (JSONException e) { @@ -234,8 +219,7 @@ } } } - return (jsonList.size() > 1) ? new JSONArray((Collection) jsonList) - : jsonList.get(0); + return (jsonList.size() > 1) ? new JSONArray(jsonList) : jsonList.get(0); } private Object extract_json_withkey(Object json, String path) Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFTrim.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFTrim.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFTrim.java (working copy) @@ -18,25 +18,18 @@ package org.apache.hadoop.hive.ql.udf; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.io.Text; -import org.apache.commons.lang.StringUtils; -import java.util.regex.Pattern; -import java.util.regex.Matcher; - -@description( - name = "trim", - value = "_FUNC_(str) - Removes the leading and trailing space characters " + - "from str ", - extended = "Example:\n" + - " > SELECT _FUNC_(' facebook ') FROM src LIMIT 1;\n" + - " 'facebook'" - ) +@description(name = "trim", value = "_FUNC_(str) - Removes the leading and trailing space characters " + + "from str ", extended = "Example:\n" + + " > SELECT _FUNC_(' facebook ') FROM src LIMIT 1;\n" + " 'facebook'") public class UDFTrim extends UDF { Text result = new Text(); + public UDFTrim() { } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java (working copy) @@ -30,41 +30,41 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@description( - name = "hour", - value = "_FUNC_(date) - Returns the hour of date", - extended = "date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or " + - "'HH:mm:ss'.\n" + - "Example:\n " + - " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + - " 12\n" + - " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + - " 12" - ) +@description(name = "hour", value = "_FUNC_(date) - Returns the hour of date", extended = "date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or " + + "'HH:mm:ss'.\n" + + "Example:\n " + + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + + " 12\n" + + " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + " 12") public class UDFHour extends UDF { private static Log LOG = LogFactory.getLog(UDFHour.class.getName()); - private SimpleDateFormat formatter1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - private SimpleDateFormat formatter2 = new SimpleDateFormat("HH:mm:ss"); - private Calendar calendar = Calendar.getInstance(); + private final SimpleDateFormat formatter1 = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + private final SimpleDateFormat formatter2 = new SimpleDateFormat("HH:mm:ss"); + private final Calendar calendar = Calendar.getInstance(); IntWritable result = new IntWritable(); + public UDFHour() { } /** * Get the hour from a date string. * - * @param dateString the dateString in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd". - * @return an int from 0 to 23. null if the dateString is not a valid date string. + * @param dateString + * the dateString in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy-MM-dd". + * @return an int from 0 to 23. null if the dateString is not a valid date + * string. */ - public IntWritable evaluate(Text dateString) { - + public IntWritable evaluate(Text dateString) { + if (dateString == null) { return null; } - + try { Date date = null; try { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLpad.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLpad.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLpad.java (working copy) @@ -23,52 +23,47 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@description( - name = "lpad", - value = "_FUNC_(str, len, pad) - Returns str, left-padded with pad to a " + - "length of len", - extended = "If str is longer than len, the return value is shortened to " + - "len characters.\n" + - "Example:\n" + - " > SELECT _FUNC_('hi', 5, '??') FROM src LIMIT 1;\n" + - " '???hi'" + - " > SELECT _FUNC_('hi', 1, '??') FROM src LIMIT 1;\n" + - " 'h'" - ) -public class UDFLpad extends UDF { - - private Text result = new Text(); - +@description(name = "lpad", value = "_FUNC_(str, len, pad) - Returns str, left-padded with pad to a " + + "length of len", extended = "If str is longer than len, the return value is shortened to " + + "len characters.\n" + + "Example:\n" + + " > SELECT _FUNC_('hi', 5, '??') FROM src LIMIT 1;\n" + + " '???hi'" + + " > SELECT _FUNC_('hi', 1, '??') FROM src LIMIT 1;\n" + " 'h'") +public class UDFLpad extends UDF { + + private final Text result = new Text(); + public Text evaluate(Text s, IntWritable n, Text pad) { if (s == null || n == null || pad == null) { return null; } - + int len = n.get(); - + byte[] data = result.getBytes(); - if(data.length < len) { + if (data.length < len) { data = new byte[len]; } - + byte[] txt = s.getBytes(); byte[] padTxt = pad.getBytes(); - + // The length of the padding needed int pos = Math.max(len - s.getLength(), 0); - + // Copy the padding - for(int i = 0; i < pos; i += pad.getLength()) { - for(int j = 0; j < pad.getLength() && j < pos-i; j++) { - data[i+j] = padTxt[j]; + for (int i = 0; i < pos; i += pad.getLength()) { + for (int j = 0; j < pad.getLength() && j < pos - i; j++) { + data[i + j] = padTxt[j]; } } - + // Copy the text - for(int i = 0; pos+i < len && i < s.getLength(); i++) { - data[pos+i] = txt[i]; + for (int i = 0; pos + i < len && i < s.getLength(); i++) { + data[pos + i] = txt[i]; } - + result.set(data, 0, len); return result; } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPLessThan.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPLessThan.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPLessThan.java (working copy) @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.udf; -import java.sql.Date; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.description; @@ -33,21 +31,19 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -@description( - name = "<", - value = "a _FUNC_ b - Returns TRUE if a is less than b" -) +@description(name = "<", value = "a _FUNC_ b - Returns TRUE if a is less than b") public class UDFOPLessThan extends UDFBaseCompare { private static Log LOG = LogFactory.getLog(UDFOPLessThan.class.getName()); BooleanWritable resultCache; + public UDFOPLessThan() { resultCache = new BooleanWritable(); } - public BooleanWritable evaluate(Text a, Text b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(Text a, Text b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -57,8 +53,8 @@ return r; } - public BooleanWritable evaluate(ByteWritable a, ByteWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(ByteWritable a, ByteWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -68,8 +64,8 @@ return r; } - public BooleanWritable evaluate(ShortWritable a, ShortWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(ShortWritable a, ShortWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -79,8 +75,8 @@ return r; } - public BooleanWritable evaluate(IntWritable a, IntWritable b) { - BooleanWritable r = this.resultCache; + public BooleanWritable evaluate(IntWritable a, IntWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -89,9 +85,9 @@ // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; } - - public BooleanWritable evaluate(LongWritable a, LongWritable b) { - BooleanWritable r = this.resultCache; + + public BooleanWritable evaluate(LongWritable a, LongWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -100,9 +96,9 @@ // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; } - - public BooleanWritable evaluate(FloatWritable a, FloatWritable b) { - BooleanWritable r = this.resultCache; + + public BooleanWritable evaluate(FloatWritable a, FloatWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { @@ -110,10 +106,11 @@ } // LOG.info("evaluate(" + a + "," + b + ")=" + r); return r; - } + } - public BooleanWritable evaluate(DoubleWritable a, DoubleWritable b) { - BooleanWritable r = this.resultCache; + @Override + public BooleanWritable evaluate(DoubleWritable a, DoubleWritable b) { + BooleanWritable r = resultCache; if ((a == null) || (b == null)) { r = null; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseCompare.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseCompare.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseCompare.java (working copy) @@ -26,13 +26,13 @@ public abstract class UDFBaseCompare extends UDF { /** - * This constructor sets the resolver to be used for comparison operators. - * See {@link org.apache.hadoop.hive.ql.exec.UDFMethodResolver} + * This constructor sets the resolver to be used for comparison operators. See + * {@link org.apache.hadoop.hive.ql.exec.UDFMethodResolver} */ public UDFBaseCompare() { super(null); setResolver(new ComparisonOpMethodResolver(this.getClass())); } - public abstract BooleanWritable evaluate(DoubleWritable a, DoubleWritable b); + public abstract BooleanWritable evaluate(DoubleWritable a, DoubleWritable b); } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFloor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFloor.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFloor.java (working copy) @@ -25,29 +25,24 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.io.LongWritable; -@description( - name = "floor", - value = "_FUNC_(x) - Find the largest integer not greater than x", - extended = "Example:\n" + - " > SELECT _FUNC_(-0.1) FROM src LIMIT 1;\n" + - " -1\n" + - " > SELECT _FUNC_(5) FROM src LIMIT 1;\n" + - " 5" - ) +@description(name = "floor", value = "_FUNC_(x) - Find the largest integer not greater than x", extended = "Example:\n" + + " > SELECT _FUNC_(-0.1) FROM src LIMIT 1;\n" + + " -1\n" + + " > SELECT _FUNC_(5) FROM src LIMIT 1;\n" + " 5") public class UDFFloor extends UDF { private static Log LOG = LogFactory.getLog(UDFFloor.class.getName()); LongWritable result = new LongWritable(); - + public UDFFloor() { } - public LongWritable evaluate(DoubleWritable i) { + public LongWritable evaluate(DoubleWritable i) { if (i == null) { return null; } else { - result.set((long)Math.floor(i.get())); + result.set((long) Math.floor(i.get())); return result; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog10.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog10.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog10.java (working copy) @@ -24,32 +24,27 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -@description( - name = "log10", - value = "_FUNC_(x) - Returns the logarithm of x with base 10", - extended = "Example:\n" + - " > SELECT _FUNC_(10) FROM src LIMIT 1;\n" + - " 1" - ) +@description(name = "log10", value = "_FUNC_(x) - Returns the logarithm of x with base 10", extended = "Example:\n" + + " > SELECT _FUNC_(10) FROM src LIMIT 1;\n" + " 1") public class UDFLog10 extends UDF { private static Log LOG = LogFactory.getLog(UDFLog10.class.getName()); private static double log10 = Math.log(10.0); - + DoubleWritable result = new DoubleWritable(); - + public UDFLog10() { } /** * Returns the logarithm of "a" with base 10. */ - public DoubleWritable evaluate(DoubleWritable a) { + public DoubleWritable evaluate(DoubleWritable a) { if (a == null || a.get() <= 0.0) { return null; } else { - result.set(Math.log(a.get())/log10); + result.set(Math.log(a.get()) / log10); return result; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFindInSet.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFindInSet.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFindInSet.java (working copy) @@ -20,51 +20,46 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; -@description( - name = "find_in_set", - value = "_FUNC_(str,str_array) - Returns the first occurrence " + - " of str in str_array where str_array is a comma-delimited string." + - " Returns null if either argument is null." + - " Returns 0 if the first argument has any commas.", - extended = "Example:\n" + - " > SELECT _FUNC_('ab','abc,b,ab,c,def') FROM src LIMIT 1;\n" + - " 3\n" + - " > SELECT * FROM src1 WHERE NOT _FUNC_(key,'311,128,345,956')=0;\n" + - " 311 val_311\n" + - " 128" +@description(name = "find_in_set", value = "_FUNC_(str,str_array) - Returns the first occurrence " + + " of str in str_array where str_array is a comma-delimited string." + + " Returns null if either argument is null." + + " Returns 0 if the first argument has any commas.", extended = "Example:\n" + + " > SELECT _FUNC_('ab','abc,b,ab,c,def') FROM src LIMIT 1;\n" + + " 3\n" + + " > SELECT * FROM src1 WHERE NOT _FUNC_(key,'311,128,345,956')=0;\n" + + " 311 val_311\n" + " 128" - ) - +) public class UDFFindInSet extends UDF { - private IntWritable result = new IntWritable(); - + private final IntWritable result = new IntWritable(); + public IntWritable evaluate(Text s, Text txtarray) { if (s == null || txtarray == null) { return null; } - + byte[] search_bytes = s.getBytes(); - - for(int i = 0; i < s.getLength(); i++) { - if(search_bytes[i]==',') { + + for (int i = 0; i < s.getLength(); i++) { + if (search_bytes[i] == ',') { result.set(0); return result; - } - + } + } - + byte[] data = txtarray.getBytes(); int search_length = s.getLength(); - + int cur_pos_in_array = 0; int cur_length = 0; boolean matching = true; - - for(int i = 0; i < txtarray.getLength(); i++) { - if(data[i] == ',') { + + for (int i = 0; i < txtarray.getLength(); i++) { + if (data[i] == ',') { cur_pos_in_array++; if (matching && cur_length == search_length) { result.set(cur_pos_in_array); @@ -74,19 +69,19 @@ cur_length = 0; } } else { - if (cur_length + 1 <= search_length){ - if(!matching || search_bytes[cur_length] != data[i]) { + if (cur_length + 1 <= search_length) { + if (!matching || search_bytes[cur_length] != data[i]) { matching = false; } } else { matching = false; } - cur_length++; + cur_length++; } - + } - - if(matching && cur_length == search_length) { + + if (matching && cur_length == search_length) { cur_pos_in_array++; result.set(cur_pos_in_array); return result; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java (working copy) @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.udf; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.UDF; @@ -25,46 +28,42 @@ import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.Text; -import java.util.regex.Pattern; -import java.util.regex.Matcher; - -@description( - name = "like", - value = "_FUNC_(str, pattern) - Checks if str matches pattern", - extended = "Example:\n" + - " > SELECT a.* FROM srcpart a WHERE a.hr _FUNC_ '%2' LIMIT 1;\n" + - " 27 val_27 2008-04-08 12" - ) +@description(name = "like", value = "_FUNC_(str, pattern) - Checks if str matches pattern", extended = "Example:\n" + + " > SELECT a.* FROM srcpart a WHERE a.hr _FUNC_ '%2' LIMIT 1;\n" + + " 27 val_27 2008-04-08 12") public class UDFLike extends UDF { private static Log LOG = LogFactory.getLog(UDFLike.class.getName()); - private Text lastLikePattern = new Text(); + private final Text lastLikePattern = new Text(); private Pattern p = null; - - // Doing characters comparison directly instead of regular expression + + // Doing characters comparison directly instead of regular expression // matching for simple patterns like "%abc%". - private enum PatternType { - NONE, // "abc" - BEGIN, // "abc%" - END, // "%abc" - MIDDLE, // "%abc%" - COMPLEX, // all other cases, such as "ab%c_de" + private enum PatternType { + NONE, // "abc" + BEGIN, // "abc%" + END, // "%abc" + MIDDLE, // "%abc%" + COMPLEX, // all other cases, such as "ab%c_de" } + private PatternType type = PatternType.COMPLEX; - private Text simplePattern = new Text(); - - private BooleanWritable result = new BooleanWritable(); + private final Text simplePattern = new Text(); + + private final BooleanWritable result = new BooleanWritable(); + public UDFLike() { } public static String likePatternToRegExp(String likePattern) { StringBuilder sb = new StringBuilder(); - for(int i=0; i - * Examples: - *
+   * Examples: 
+ * + *
    * parseSimplePattern("%abc%") changes {@link #type} to PatternType.MIDDLE
    * and changes {@link #simplePattern} to "abc"
    * parseSimplePattern("%ab_c%") changes {@link #type} to PatternType.COMPLEX
    * and does not change {@link #simplePattern}
-   * 
+ *
* - * @param likePattern the input LIKE query pattern + *
+ * + * @param likePattern + * the input LIKE query pattern */ private void parseSimplePattern(String likePattern) { - int length = likePattern.length(); + int length = likePattern.length(); int beginIndex = 0; - int endIndex = length; - char lastChar = 'a'; + int endIndex = length; + char lastChar = 'a'; String strPattern = new String(); type = PatternType.NONE; - - for (int i=0; i SELECT _FUNC_('2009-30-07') FROM src LIMIT 1;\n" + - " 7" - ) +@description(name = "month", value = "_FUNC_(date) - Returns the month of date", extended = "Example:\n" + + " > SELECT _FUNC_('2009-30-07') FROM src LIMIT 1;\n" + " 7") public class UDFMonth extends UDF { private static Log LOG = LogFactory.getLog(UDFMonth.class.getName()); - private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private Calendar calendar = Calendar.getInstance(); + private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private final Calendar calendar = Calendar.getInstance(); IntWritable result = new IntWritable(); + public UDFMonth() { } /** * Get the month from a date string. * - * @param dateString the dateString in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd". - * @return an int from 1 to 12. null if the dateString is not a valid date string. + * @param dateString + * the dateString in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy-MM-dd". + * @return an int from 1 to 12. null if the dateString is not a valid date + * string. */ - public IntWritable evaluate(Text dateString) { + public IntWritable evaluate(Text dateString) { if (dateString == null) { return null; } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAcos.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAcos.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAcos.java (working copy) @@ -24,29 +24,23 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -@description( - name = "acos", - value = "_FUNC_(x) - returns the arc cosine of x if -1<=x<=1 or " + - "NULL otherwise", - extended = "Example:\n" + - " > SELECT _FUNC_(1) FROM src LIMIT 1;\n" + - " 0\n" + - " > SELECT _FUNC_(2) FROM src LIMIT 1;\n" + - " NULL" - ) +@description(name = "acos", value = "_FUNC_(x) - returns the arc cosine of x if -1<=x<=1 or " + + "NULL otherwise", extended = "Example:\n" + + " > SELECT _FUNC_(1) FROM src LIMIT 1;\n" + " 0\n" + + " > SELECT _FUNC_(2) FROM src LIMIT 1;\n" + " NULL") public class UDFAcos extends UDF { private static Log LOG = LogFactory.getLog(UDFAcos.class.getName()); DoubleWritable result = new DoubleWritable(); - + public UDFAcos() { } /** * Take Arc Cosine of a in radians. */ - public DoubleWritable evaluate(DoubleWritable a) { + public DoubleWritable evaluate(DoubleWritable a) { if (a == null) { return null; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateAdd.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateAdd.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateAdd.java (working copy) @@ -31,46 +31,47 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@description( - name = "date_add", - value = "_FUNC_(start_date, num_days) - Returns the date that is num_days" + - " after start_date.", - extended = "start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or" + - " 'yyyy-MM-dd'. num_days is a number. The time part of start_date is " + - "ignored.\n" + - "Example:\n " + - " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + - " '2009-31-07'" - ) +@description(name = "date_add", value = "_FUNC_(start_date, num_days) - Returns the date that is num_days" + + " after start_date.", extended = "start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or" + + " 'yyyy-MM-dd'. num_days is a number. The time part of start_date is " + + "ignored.\n" + + "Example:\n " + + " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + + " '2009-31-07'") public class UDFDateAdd extends UDF { private static Log LOG = LogFactory.getLog(UDFDateAdd.class.getName()); - private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private final Calendar calendar = Calendar.getInstance(TimeZone + .getTimeZone("UTC")); Text result = new Text(); - + public UDFDateAdd() { } /** - * Add a number of days to the date. - * The time part of the string will be ignored. + * Add a number of days to the date. The time part of the string will be + * ignored. * * NOTE: This is a subset of what MySQL offers as: - * http://dev.mysql.com/doc/refman/5.1/en/date-and-time-functions.html#function_date-add + * http://dev.mysql.com/doc/refman + * /5.1/en/date-and-time-functions.html#function_date-add * - * @param dateString1 the date string in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd". - * @param days The number of days to add. + * @param dateString1 + * the date string in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy-MM-dd". + * @param days + * The number of days to add. * @return the date in the format of "yyyy-MM-dd". */ - public Text evaluate(Text dateString1, IntWritable days) { - + public Text evaluate(Text dateString1, IntWritable days) { + if (dateString1 == null || days == null) { return null; } - + try { calendar.setTime(formatter.parse(dateString1.toString())); calendar.add(Calendar.DAY_OF_MONTH, days.get()); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExpReplace.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExpReplace.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExpReplace.java (working copy) @@ -18,30 +18,27 @@ package org.apache.hadoop.hive.ql.udf; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.io.Text; -import java.util.regex.Pattern; -import java.util.regex.Matcher; - -@description( - name = "regexp_replace", - value = "_FUNC_(str, regexp, rep) - replace all substrings of str that " + - "match regexp with rep", - extended = "Example:\n" + - " > SELECT _FUNC_('100-200', '(\\d+)', 'num') FROM src LIMIT 1;\n" + - " 'num-num'" - ) +@description(name = "regexp_replace", value = "_FUNC_(str, regexp, rep) - replace all substrings of str that " + + "match regexp with rep", extended = "Example:\n" + + " > SELECT _FUNC_('100-200', '(\\d+)', 'num') FROM src LIMIT 1;\n" + + " 'num-num'") public class UDFRegExpReplace extends UDF { - private Text lastRegex = new Text(); + private final Text lastRegex = new Text(); private Pattern p = null; - - private Text lastReplacement = new Text(); - private String replacementString = ""; + private final Text lastReplacement = new Text(); + private String replacementString = ""; + Text result = new Text(); + public UDFRegExpReplace() { } @@ -60,15 +57,15 @@ lastReplacement.set(replacement); replacementString = replacement.toString(); } - + StringBuffer sb = new StringBuffer(); while (m.find()) { m.appendReplacement(sb, replacementString); } m.appendTail(sb); - + result.set(sb.toString()); - return result; + return result; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRpad.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRpad.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRpad.java (working copy) @@ -23,52 +23,46 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; +@description(name = "rpad", value = "_FUNC_(str, len, pad) - Returns str, right-padded with pad to a " + + "length of len", extended = "If str is longer than len, the return value is shortened to " + + "len characters.\n" + + "Example:\n" + + " > SELECT _FUNC_('hi', 5, '??') FROM src LIMIT 1;\n" + + " 'hi???'" + + " > SELECT _FUNC_('hi', 1, '??') FROM src LIMIT 1;\n" + " 'h'") +public class UDFRpad extends UDF { -@description( - name = "rpad", - value = "_FUNC_(str, len, pad) - Returns str, right-padded with pad to a " + - "length of len", - extended = "If str is longer than len, the return value is shortened to " + - "len characters.\n" + - "Example:\n" + - " > SELECT _FUNC_('hi', 5, '??') FROM src LIMIT 1;\n" + - " 'hi???'" + - " > SELECT _FUNC_('hi', 1, '??') FROM src LIMIT 1;\n" + - " 'h'" - ) -public class UDFRpad extends UDF { - - private Text result = new Text(); - + private final Text result = new Text(); + public Text evaluate(Text s, IntWritable n, Text pad) { if (s == null || n == null || pad == null) { return null; } - + int len = n.get(); - + byte[] data = result.getBytes(); - if(data.length < len) { + if (data.length < len) { data = new byte[len]; } - + byte[] txt = s.getBytes(); byte[] padTxt = pad.getBytes(); - + int pos; // Copy the text - for(pos = 0; pos < s.getLength() && pos < len; pos++) { + for (pos = 0; pos < s.getLength() && pos < len; pos++) { data[pos] = txt[pos]; } - + // Copy the padding - while(pos < len) { - for(int i = 0; i < pad.getLength() && i < len-pos; i++) { - data[pos+i] = padTxt[i]; + while (pos < len) { + for (int i = 0; i < pad.getLength() && i < len - pos; i++) { + data[pos + i] = padTxt[i]; } pos += pad.getLength(); } - + result.set(data, 0, len); return result; } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java (working copy) @@ -20,52 +20,48 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -@description( - name = "unhex", - value = "_FUNC_(str) - Converts hexadecimal argument to string", - extended = "Performs the inverse operation of HEX(str). That is, it interprets\n" + - "each pair of hexadecimal digits in the argument as a number and\n" + - "converts it to the character represented by the number. The\n" + - "resulting characters are returned as a binary string.\n\n" + - "Example:\n" + - "> SELECT UNHEX('4D7953514C') from src limit 1;\n" + - "'MySQL'\n" + - "> SELECT UNHEX(HEX('string')) from src limit 1;\n" + - "'string'\n" + - "> SELECT HEX(UNHEX('1267')) from src limit 1;\n" + - "'1267'\n\n" + - "The characters in the argument string must be legal hexadecimal\n" + - "digits: '0' .. '9', 'A' .. 'F', 'a' .. 'f'. If UNHEX() encounters\n" + - "any nonhexadecimal digits in the argument, it returns NULL. Also,\n" + - "if there are an odd number of characters a leading 0 is appended." - ) -public class UDFUnhex extends UDF { - + +@description(name = "unhex", value = "_FUNC_(str) - Converts hexadecimal argument to string", extended = "Performs the inverse operation of HEX(str). That is, it interprets\n" + + "each pair of hexadecimal digits in the argument as a number and\n" + + "converts it to the character represented by the number. The\n" + + "resulting characters are returned as a binary string.\n\n" + + "Example:\n" + + "> SELECT UNHEX('4D7953514C') from src limit 1;\n" + + "'MySQL'\n" + + "> SELECT UNHEX(HEX('string')) from src limit 1;\n" + + "'string'\n" + + "> SELECT HEX(UNHEX('1267')) from src limit 1;\n" + + "'1267'\n\n" + + "The characters in the argument string must be legal hexadecimal\n" + + "digits: '0' .. '9', 'A' .. 'F', 'a' .. 'f'. If UNHEX() encounters\n" + + "any nonhexadecimal digits in the argument, it returns NULL. Also,\n" + + "if there are an odd number of characters a leading 0 is appended.") +public class UDFUnhex extends UDF { + /** - * Convert every two hex digits in s into + * Convert every two hex digits in s into * */ public Text evaluate(Text s) { if (s == null) { return null; } - - //append a leading 0 if needed + + // append a leading 0 if needed String str; - if (s.getLength() % 2 == 1) + if (s.getLength() % 2 == 1) { str = "0" + s.toString(); - else + } else { str = s.toString(); - - byte [] result = new byte[str.length()/2]; - for(int i = 0; i < str.length(); i += 2) { + } + + byte[] result = new byte[str.length() / 2]; + for (int i = 0; i < str.length(); i += 2) { try { - result[i/2] = ((byte) Integer.parseInt(str.substring(i, i+2), 16)); + result[i / 2] = ((byte) Integer.parseInt(str.substring(i, i + 2), 16)); } catch (NumberFormatException e) { - //invalid character present, return null + // invalid character present, return null return null; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFParseUrl.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFParseUrl.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFParseUrl.java (working copy) @@ -27,10 +27,6 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; - - - - /** * UDF to extract specfic parts from URL For example, * parse_url('http://facebook.com/path/p1.php?query=1', 'HOST') will return @@ -44,22 +40,17 @@ * HOST,PATH,QUERY,REF,PROTOCOL,AUTHORITY,FILE,USERINFO Also you can get a value * of particular key in QUERY, using syntax QUERY: eg: QUERY:k1. */ -@description( - name = "parse_url", - value = "_FUNC_(url, partToExtract[, key]) - extracts a part from a URL", - extended = "Parts: HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, " + - "USERINFO\nkey specifies which query to extract\n" + - "Example:\n" + - " > SELECT _FUNC_('http://facebook.com/path/p1.php?query=1', " + - "'HOST') FROM src LIMIT 1;\n" + - " 'facebook.com'\n" + - " > SELECT _FUNC_('http://facebook.com/path/p1.php?query=1', " + - "'QUERY') FROM src LIMIT 1;\n" + - " 'query=1'\n" + - " > SELECT _FUNC_('http://facebook.com/path/p1.php?query=1', " + - "'QUERY', 'query') FROM src LIMIT 1;\n" + - " '1'" - ) +@description(name = "parse_url", value = "_FUNC_(url, partToExtract[, key]) - extracts a part from a URL", extended = "Parts: HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, " + + "USERINFO\nkey specifies which query to extract\n" + + "Example:\n" + + " > SELECT _FUNC_('http://facebook.com/path/p1.php?query=1', " + + "'HOST') FROM src LIMIT 1;\n" + + " 'facebook.com'\n" + + " > SELECT _FUNC_('http://facebook.com/path/p1.php?query=1', " + + "'QUERY') FROM src LIMIT 1;\n" + + " 'query=1'\n" + + " > SELECT _FUNC_('http://facebook.com/path/p1.php?query=1', " + + "'QUERY', 'query') FROM src LIMIT 1;\n" + " '1'") public class UDFParseUrl extends UDF { private static Log LOG = LogFactory.getLog(UDFParseUrl.class.getName()); @@ -67,7 +58,6 @@ private URL url = null; private Pattern p = null; private String lastKey = null; - public UDFParseUrl() { } @@ -86,42 +76,51 @@ } lastUrlStr = urlStr; - if (partToExtract.equals("HOST")) + if (partToExtract.equals("HOST")) { return url.getHost(); - if (partToExtract.equals("PATH")) + } + if (partToExtract.equals("PATH")) { return url.getPath(); - if (partToExtract.equals("QUERY")) + } + if (partToExtract.equals("QUERY")) { return url.getQuery(); - if (partToExtract.equals("REF")) + } + if (partToExtract.equals("REF")) { return url.getRef(); - if (partToExtract.equals("PROTOCOL")) + } + if (partToExtract.equals("PROTOCOL")) { return url.getProtocol(); - if (partToExtract.equals("FILE")) + } + if (partToExtract.equals("FILE")) { return url.getFile(); - if (partToExtract.equals("AUTHORITY")) + } + if (partToExtract.equals("AUTHORITY")) { return url.getAuthority(); - if (partToExtract.equals("USERINFO")) + } + if (partToExtract.equals("USERINFO")) { return url.getUserInfo(); + } return null; } public String evaluate(String urlStr, String partToExtract, String key) { - if (!partToExtract.equals("QUERY")) + if (!partToExtract.equals("QUERY")) { return null; + } String query = this.evaluate(urlStr, partToExtract); - if (query == null) + if (query == null) { return null; + } - - if (!key.equals(lastKey)){ - p = Pattern.compile("(&|^)"+key+"=([^&]*)"); + if (!key.equals(lastKey)) { + p = Pattern.compile("(&|^)" + key + "=([^&]*)"); } - + lastKey = key; Matcher m = p.matcher(query); - if (m.find()){ + if (m.find()) { return m.group(2); } return null; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPBitAnd.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPBitAnd.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPBitAnd.java (working copy) @@ -20,20 +20,14 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; -@description( - name = "&", - value = "a _FUNC_ b - Bitwise and", - extended = "Example:\n" + - " > SELECT 3 _FUNC_ 5 FROM src LIMIT 1;\n" + - " 1" -) +@description(name = "&", value = "a _FUNC_ b - Bitwise and", extended = "Example:\n" + + " > SELECT 3 _FUNC_ 5 FROM src LIMIT 1;\n" + " 1") public class UDFOPBitAnd extends UDFBaseBitOP { private static Log LOG = LogFactory.getLog(UDFOPBitAnd.class.getName()); @@ -45,7 +39,7 @@ if (a == null || b == null) { return null; } - byteWritable.set((byte)(a.get() & b.get())); + byteWritable.set((byte) (a.get() & b.get())); return byteWritable; } @@ -53,10 +47,10 @@ if (a == null || b == null) { return null; } - shortWritable.set((short)(a.get() & b.get())); + shortWritable.set((short) (a.get() & b.get())); return shortWritable; } - + public IntWritable evaluate(IntWritable a, IntWritable b) { if (a == null || b == null) { return null; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPAnd.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPAnd.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPAnd.java (working copy) @@ -24,25 +24,22 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.io.BooleanWritable; -@description( - name = "and", - value = "a _FUNC_ b - Logical and", - extended = "Example:\n" + - " > SELECT * FROM srcpart WHERE src.hr=12 _FUNC_ " + - "src.hr='2008-04-08' LIMIT 1;\n" + - " 27 val_27 2008-04-08 12" -) +@description(name = "and", value = "a _FUNC_ b - Logical and", extended = "Example:\n" + + " > SELECT * FROM srcpart WHERE src.hr=12 _FUNC_ " + + "src.hr='2008-04-08' LIMIT 1;\n" + " 27 val_27 2008-04-08 12") public class UDFOPAnd extends UDF { - private static Log LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.udf.UDFOPAnd"); + private static Log LOG = LogFactory + .getLog("org.apache.hadoop.hive.ql.udf.UDFOPAnd"); BooleanWritable result = new BooleanWritable(); + public UDFOPAnd() { } // Three-value Boolean: NULL stands for unknown - public BooleanWritable evaluate(BooleanWritable a, BooleanWritable b) { - + public BooleanWritable evaluate(BooleanWritable a, BooleanWritable b) { + if ((a != null && a.get() == false) || (b != null && b.get() == false)) { result.set(false); return result; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java (working copy) @@ -21,7 +21,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.UDF; - import org.apache.hadoop.hive.serde2.ByteStream; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -35,24 +34,24 @@ import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; - public class UDFToString extends UDF { private static Log LOG = LogFactory.getLog(UDFToString.class.getName()); Text t = new Text(); ByteStream.Output out = new ByteStream.Output(); - + public UDFToString() { } - public Text evaluate(NullWritable i) { + public Text evaluate(NullWritable i) { return null; } - byte[] trueBytes = {'T', 'R', 'U', 'E'}; - byte[] falseBytes = {'F', 'A', 'L', 'S', 'E'}; - public Text evaluate(BooleanWritable i) { + byte[] trueBytes = { 'T', 'R', 'U', 'E' }; + byte[] falseBytes = { 'F', 'A', 'L', 'S', 'E' }; + + public Text evaluate(BooleanWritable i) { if (i == null) { return null; } else { @@ -61,8 +60,8 @@ return t; } } - - public Text evaluate(ByteWritable i) { + + public Text evaluate(ByteWritable i) { if (i == null) { return null; } else { @@ -72,8 +71,8 @@ return t; } } - - public Text evaluate(ShortWritable i) { + + public Text evaluate(ShortWritable i) { if (i == null) { return null; } else { @@ -83,8 +82,8 @@ return t; } } - - public Text evaluate(IntWritable i) { + + public Text evaluate(IntWritable i) { if (i == null) { return null; } else { @@ -95,7 +94,7 @@ } } - public Text evaluate(LongWritable i) { + public Text evaluate(LongWritable i) { if (i == null) { return null; } else { @@ -105,8 +104,8 @@ return t; } } - - public Text evaluate(FloatWritable i) { + + public Text evaluate(FloatWritable i) { if (i == null) { return null; } else { @@ -114,8 +113,8 @@ return t; } } - - public Text evaluate(DoubleWritable i) { + + public Text evaluate(DoubleWritable i) { if (i == null) { return null; } else { @@ -123,5 +122,5 @@ return t; } } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFReverse.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFReverse.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFReverse.java (working copy) @@ -23,53 +23,51 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils; import org.apache.hadoop.io.Text; -@description( - name = "reverse", - value = "_FUNC_(str) - reverse str", - extended = "Example:\n" + - " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + - " 'koobecaF'" - ) -public class UDFReverse extends UDF { - private Text result = new Text(); - +@description(name = "reverse", value = "_FUNC_(str) - reverse str", extended = "Example:\n" + + " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + " 'koobecaF'") +public class UDFReverse extends UDF { + private final Text result = new Text(); + /** * Reverse a portion of an array in-place. * - * @param arr The array where the data will be reversed. - * @param first The beginning of the portion (inclusive). - * @param last The end of the portion (inclusive). + * @param arr + * The array where the data will be reversed. + * @param first + * The beginning of the portion (inclusive). + * @param last + * The end of the portion (inclusive). */ private void reverse(byte[] arr, int first, int last) { - for(int i = 0; i < (last-first+1)/2; i++) { + for (int i = 0; i < (last - first + 1) / 2; i++) { byte temp = arr[last - i]; arr[last - i] = arr[first + i]; arr[first + i] = temp; } } - + public Text evaluate(Text s) { if (s == null) { return null; } - + // set() will only allocate memory if the buffer of result is smaller than // s.getLength() and will never resize the buffer down. result.set(s); - + // Now do an in-place reversal in result.getBytes(). First, reverse every // character, then reverse the whole string. byte[] data = result.getBytes(); int prev = 0; // The index where the current char starts - for(int i = 1; i < result.getLength(); i++) { - if( GenericUDFUtils.isUtfStartByte(data[i]) ) { - reverse(data, prev, i-1); + for (int i = 1; i < result.getLength(); i++) { + if (GenericUDFUtils.isUtfStartByte(data[i])) { + reverse(data, prev, i - 1); prev = i; } } reverse(data, prev, result.getLength() - 1); reverse(data, 0, result.getLength() - 1); - + return result; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPOr.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPOr.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPOr.java (working copy) @@ -24,20 +24,18 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.io.BooleanWritable; -@description( - name = "or", - value = "a _FUNC_ b - Logical or" -) +@description(name = "or", value = "a _FUNC_ b - Logical or") public class UDFOPOr extends UDF { private static Log LOG = LogFactory.getLog(UDFOPOr.class.getName()); BooleanWritable result = new BooleanWritable(); + public UDFOPOr() { } // Three-value Boolean: NULL stands for unknown - public BooleanWritable evaluate(BooleanWritable a, BooleanWritable b) { + public BooleanWritable evaluate(BooleanWritable a, BooleanWritable b) { if ((a != null && a.get() == true) || (b != null && b.get() == true)) { result.set(true); return result; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRepeat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRepeat.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRepeat.java (working copy) @@ -18,45 +18,38 @@ package org.apache.hadoop.hive.ql.udf; -import java.util.Arrays; - import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@description( - name = "repeat", - value = "_FUNC_(str, n) - repeat str n times ", - extended = "Example:\n" + - " > SELECT _FUNC_('123', 2) FROM src LIMIT 1;\n" + - " '123123'" - ) -public class UDFRepeat extends UDF { - private Text result = new Text(); - +@description(name = "repeat", value = "_FUNC_(str, n) - repeat str n times ", extended = "Example:\n" + + " > SELECT _FUNC_('123', 2) FROM src LIMIT 1;\n" + " '123123'") +public class UDFRepeat extends UDF { + private final Text result = new Text(); + public Text evaluate(Text s, IntWritable n) { if (n == null || s == null) { return null; } - - int len = n.get()*s.getLength(); - if(len < 0) { + + int len = n.get() * s.getLength(); + if (len < 0) { len = 0; } - + byte[] data = result.getBytes(); - - if(data.length < len) { + + if (data.length < len) { data = new byte[len]; } - - for(int i = 0; i < len; i += s.getLength()) { - for(int j = 0; j < s.getLength(); j++) { - data[i + j] = s.getBytes()[j]; + + for (int i = 0; i < len; i += s.getLength()) { + for (int j = 0; j < s.getLength(); j++) { + data[i + j] = s.getBytes()[j]; } } - + result.set(data, 0, len); return result; } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDate.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDate.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDate.java (working copy) @@ -22,7 +22,6 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.UDF; - public class UDFToDate extends UDF { private static Log LOG = LogFactory.getLog(UDFToDate.class.getName()); @@ -30,7 +29,7 @@ public UDFToDate() { } - public java.sql.Date evaluate(String i) { + public java.sql.Date evaluate(String i) { if (i == null) { return null; } else { @@ -38,68 +37,63 @@ // Supported format: "YYYY-MM-DD" return java.sql.Date.valueOf(i); } catch (IllegalArgumentException e) { - // We return NULL when the string is in a wrong format, which is conservative. + // We return NULL when the string is in a wrong format, which is + // conservative. return null; } } } - + public java.sql.Date evaluate(Void i) { return null; } - + public java.sql.Date evaluate(Byte i) { if (i == null) { return null; - } - else { + } else { return new java.sql.Date(i.longValue()); } } - + public java.sql.Date evaluate(Short i) { if (i == null) { return null; - } - else { + } else { return new java.sql.Date(i.longValue()); } } - + public java.sql.Date evaluate(Integer i) { if (i == null) { return null; - } - else { + } else { return new java.sql.Date(i.longValue()); } } - + public java.sql.Date evaluate(Long i) { if (i == null) { return null; - } - else { + } else { return new java.sql.Date(i.longValue()); } } - + public java.sql.Date evaluate(Float i) { if (i == null) { return null; - } - else { + } else { return new java.sql.Date(i.longValue()); } } - + public java.sql.Date evaluate(Double i) { if (i == null) { return null; - } - else { + } else { return new java.sql.Date(i.longValue()); } } - + } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog.java (working copy) @@ -24,25 +24,21 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -@description( - name = "log", - value = "_FUNC_([b], x) - Returns the logarithm of x with base b", - extended = "Example:\n" + - " > SELECT _FUNC_(13, 13) FROM src LIMIT 1;\n" + - " 1" - ) +@description(name = "log", value = "_FUNC_([b], x) - Returns the logarithm of x with base b", extended = "Example:\n" + + " > SELECT _FUNC_(13, 13) FROM src LIMIT 1;\n" + " 1") public class UDFLog extends UDF { private static Log LOG = LogFactory.getLog(UDFLog.class.getName()); DoubleWritable result = new DoubleWritable(); + public UDFLog() { } /** * Returns the natural logarithm of "a". */ - public DoubleWritable evaluate(DoubleWritable a) { + public DoubleWritable evaluate(DoubleWritable a) { if (a == null || a.get() <= 0.0) { return null; } else { @@ -54,11 +50,11 @@ /** * Returns the logarithm of "a" with base "base". */ - public DoubleWritable evaluate(DoubleWritable base, DoubleWritable a) { + public DoubleWritable evaluate(DoubleWritable base, DoubleWritable a) { if (a == null || a.get() <= 0.0 || base == null || base.get() <= 1.0) { return null; } else { - result.set(Math.log(a.get())/Math.log(base.get())); + result.set(Math.log(a.get()) / Math.log(base.get())); return result; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLength.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLength.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLength.java (working copy) @@ -23,15 +23,10 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@description( - name = "length", - value = "_FUNC_(str) - Returns the length of str ", - extended = "Example:\n" + - " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + - " 8" - ) +@description(name = "length", value = "_FUNC_(str) - Returns the length of str ", extended = "Example:\n" + + " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + " 8") public class UDFLength extends UDF { - private IntWritable result = new IntWritable(); + private final IntWritable result = new IntWritable(); public IntWritable evaluate(Text s) { if (s == null) { @@ -40,12 +35,12 @@ byte[] data = s.getBytes(); int len = 0; - for(int i = 0; i < s.getLength(); i++) { - if( GenericUDFUtils.isUtfStartByte(data[i]) ) { + for (int i = 0; i < s.getLength(); i++) { + if (GenericUDFUtils.isUtfStartByte(data[i])) { len++; } } - + result.set(len); return result; } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPlus.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPlus.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPlus.java (working copy) @@ -28,82 +28,91 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; - /** - * The reason that we list evaluate methods with all numeric types is for - * both better performance and type checking (so we know int + int is still an int - * instead of a double); otherwise a single method that takes (Number a, Number b) - * and use a.doubleValue() == b.doubleValue() is enough. + * The reason that we list evaluate methods with all numeric types is for both + * better performance and type checking (so we know int + int is still an int + * instead of a double); otherwise a single method that takes (Number a, Number + * b) and use a.doubleValue() == b.doubleValue() is enough. * - * The case of int + double will be handled by implicit type casting using - * UDFRegistry.implicitConvertable method. + * The case of int + double will be handled by implicit type casting using + * UDFRegistry.implicitConvertable method. */ -@description( - name = "+", - value = "a _FUNC_ b - Returns a+b" -) +@description(name = "+", value = "a _FUNC_ b - Returns a+b") public class UDFOPPlus extends UDFBaseNumericOp { - private static Log LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.udf.UDFOPPlus"); + private static Log LOG = LogFactory + .getLog("org.apache.hadoop.hive.ql.udf.UDFOPPlus"); public UDFOPPlus() { } @Override - public ByteWritable evaluate(ByteWritable a, ByteWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public ByteWritable evaluate(ByteWritable a, ByteWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - byteWritable.set((byte)(a.get() + b.get())); + byteWritable.set((byte) (a.get() + b.get())); return byteWritable; } @Override - public ShortWritable evaluate(ShortWritable a, ShortWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public ShortWritable evaluate(ShortWritable a, ShortWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - shortWritable.set((short)(a.get() + b.get())); + shortWritable.set((short) (a.get() + b.get())); return shortWritable; } @Override - public IntWritable evaluate(IntWritable a, IntWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public IntWritable evaluate(IntWritable a, IntWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - intWritable.set((int)(a.get() + b.get())); + intWritable.set((a.get() + b.get())); return intWritable; } @Override - public LongWritable evaluate(LongWritable a, LongWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public LongWritable evaluate(LongWritable a, LongWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } longWritable.set(a.get() + b.get()); return longWritable; } @Override - public FloatWritable evaluate(FloatWritable a, FloatWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public FloatWritable evaluate(FloatWritable a, FloatWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } floatWritable.set(a.get() + b.get()); return floatWritable; } - + @Override - public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } doubleWritable.set(a.get() + b.get()); return doubleWritable; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRand.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRand.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRand.java (working copy) @@ -27,22 +27,20 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.io.LongWritable; -@description( - name = "rand", - value = "_FUNC_([seed]) - Returns a pseudorandom number between 0 and 1" -) -@UDFType(deterministic=false) +@description(name = "rand", value = "_FUNC_([seed]) - Returns a pseudorandom number between 0 and 1") +@UDFType(deterministic = false) public class UDFRand extends UDF { private static Log LOG = LogFactory.getLog(UDFRand.class.getName()); private Random random; - + DoubleWritable result = new DoubleWritable(); + public UDFRand() { } - public DoubleWritable evaluate() { + public DoubleWritable evaluate() { if (random == null) { random = new Random(); } @@ -50,7 +48,7 @@ return result; } - public DoubleWritable evaluate(LongWritable seed) { + public DoubleWritable evaluate(LongWritable seed) { if (random == null) { random = new Random(seed.get()); } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFMax.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFMax.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFMax.java (working copy) @@ -29,11 +29,7 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; - -@description( - name = "max", - value = "_FUNC_(expr) - Returns the maximum value of expr" - ) +@description(name = "max", value = "_FUNC_(expr) - Returns the maximum value of expr") public class UDAFMax extends UDAF { static public class MaxShortEvaluator implements UDAFEvaluator { @@ -270,5 +266,4 @@ } } - } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLTrim.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLTrim.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLTrim.java (working copy) @@ -18,24 +18,17 @@ package org.apache.hadoop.hive.ql.udf; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.io.Text; -import org.apache.commons.lang.StringUtils; -import java.util.regex.Pattern; -import java.util.regex.Matcher; - -@description( - name = "ltrim", - value = "_FUNC_(str) - Removes the leading space characters from str ", - extended = "Example:\n" + - " > SELECT _FUNC_(' facebook') FROM src LIMIT 1;\n" + - " 'facebook'" - ) +@description(name = "ltrim", value = "_FUNC_(str) - Removes the leading space characters from str ", extended = "Example:\n" + + " > SELECT _FUNC_(' facebook') FROM src LIMIT 1;\n" + " 'facebook'") public class UDFLTrim extends UDF { Text result = new Text(); + public UDFLTrim() { } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAscii.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAscii.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAscii.java (working copy) @@ -23,32 +23,27 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@description( - name = "ascii", - value = "_FUNC_(str) - returns the numeric value of the first character" + - " of str", - extended = "Returns 0 if str is empty or NULL if str is NULL\n" + - "Example:\n" + - " > SELECT _FUNC_('222') FROM src LIMIT 1;" + - " 50\n" + - " > SELECT _FUNC_(2) FROM src LIMIT 1;\n" + - " 50" - ) -public class UDFAscii extends UDF { - - private IntWritable result = new IntWritable(); - +@description(name = "ascii", value = "_FUNC_(str) - returns the numeric value of the first character" + + " of str", extended = "Returns 0 if str is empty or NULL if str is NULL\n" + + "Example:\n" + + " > SELECT _FUNC_('222') FROM src LIMIT 1;" + + " 50\n" + + " > SELECT _FUNC_(2) FROM src LIMIT 1;\n" + " 50") +public class UDFAscii extends UDF { + + private final IntWritable result = new IntWritable(); + public IntWritable evaluate(Text s) { if (s == null) { return null; } - - if(s.getLength() > 0) { - result.set(s.getBytes()[0]); + + if (s.getLength() > 0) { + result.set(s.getBytes()[0]); } else { result.set(0); } - + return result; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSin.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSin.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSin.java (working copy) @@ -24,26 +24,21 @@ import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -@description( - name = "sin", - value = "_FUNC_(x) - returns the sine of x (x is in radians)", - extended = "Example:\n " + - " > SELECT _FUNC_(0) FROM src LIMIT 1;\n" + - " 0" - ) +@description(name = "sin", value = "_FUNC_(x) - returns the sine of x (x is in radians)", extended = "Example:\n " + + " > SELECT _FUNC_(0) FROM src LIMIT 1;\n" + " 0") public class UDFSin extends UDF { private static Log LOG = LogFactory.getLog(UDFSin.class.getName()); DoubleWritable result = new DoubleWritable(); - + public UDFSin() { } /** - * Take Sine of a. + * Take Sine of a. */ - public DoubleWritable evaluate(DoubleWritable a) { + public DoubleWritable evaluate(DoubleWritable a) { if (a == null) { return null; } else { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMinus.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMinus.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMinus.java (working copy) @@ -28,72 +28,82 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; -@description( - name = "-", - value = "a _FUNC_ b - Returns the difference a-b" -) +@description(name = "-", value = "a _FUNC_ b - Returns the difference a-b") public class UDFOPMinus extends UDFBaseNumericOp { - private static Log LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.udf.UDFOPMinus"); + private static Log LOG = LogFactory + .getLog("org.apache.hadoop.hive.ql.udf.UDFOPMinus"); public UDFOPMinus() { } @Override - public ByteWritable evaluate(ByteWritable a, ByteWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public ByteWritable evaluate(ByteWritable a, ByteWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - byteWritable.set((byte)(a.get() - b.get())); + byteWritable.set((byte) (a.get() - b.get())); return byteWritable; } @Override - public ShortWritable evaluate(ShortWritable a, ShortWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public ShortWritable evaluate(ShortWritable a, ShortWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - shortWritable.set((short)(a.get() - b.get())); + shortWritable.set((short) (a.get() - b.get())); return shortWritable; } @Override - public IntWritable evaluate(IntWritable a, IntWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public IntWritable evaluate(IntWritable a, IntWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } - intWritable.set((int)(a.get() - b.get())); + intWritable.set((a.get() - b.get())); return intWritable; } @Override - public LongWritable evaluate(LongWritable a, LongWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public LongWritable evaluate(LongWritable a, LongWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } longWritable.set(a.get() - b.get()); return longWritable; } @Override - public FloatWritable evaluate(FloatWritable a, FloatWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public FloatWritable evaluate(FloatWritable a, FloatWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } floatWritable.set(a.get() - b.get()); return floatWritable; } - + @Override - public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { - // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + b); - if ((a == null) || (b == null)) + public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { + // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" + // + b); + if ((a == null) || (b == null)) { return null; + } doubleWritable.set(a.get() - b.get()); return doubleWritable; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSpace.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSpace.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSpace.java (working copy) @@ -25,36 +25,29 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; +@description(name = "space", value = "_FUNC_(n) - returns n spaces", extended = "Example:\n " + + " > SELECT _FUNC_(2) FROM src LIMIT 1;\n" + " ' '") +public class UDFSpace extends UDF { + private final Text result = new Text(); -@description( - name = "space", - value = "_FUNC_(n) - returns n spaces", - extended = "Example:\n " + - " > SELECT _FUNC_(2) FROM src LIMIT 1;\n" + - " ' '" - ) -public class UDFSpace extends UDF { - private Text result = new Text(); - public Text evaluate(IntWritable n) { if (n == null) { return null; } - + int len = n.get(); - if(len < 0) { + if (len < 0) { len = 0; } - - if(result.getBytes().length >= len) { + + if (result.getBytes().length >= len) { result.set(result.getBytes(), 0, len); - } - else { + } else { byte[] spaces = new byte[len]; - Arrays.fill(spaces, (byte)' '); + Arrays.fill(spaces, (byte) ' '); result.set(spaces); } - + return result; } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFWrongArgLengthForTestCase.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFWrongArgLengthForTestCase.java (revision 901511) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFWrongArgLengthForTestCase.java (working copy) @@ -23,42 +23,42 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; - public class UDAFWrongArgLengthForTestCase extends UDAF { - static public class UDAFWrongArgLengthForTestCaseEvaluator implements UDAFEvaluator { - + static public class UDAFWrongArgLengthForTestCaseEvaluator implements + UDAFEvaluator { + private long mCount; - + public UDAFWrongArgLengthForTestCaseEvaluator() { super(); init(); } - + public void init() { mCount = 0; } - + Text emptyText = new Text(); - + public boolean iterate(Object o) { if (o != null && !emptyText.equals(o)) { - mCount ++; + mCount++; } return true; } - + public LongWritable terminatePartial() { return new LongWritable(mCount); } - + public boolean merge() { return true; } - + public LongWritable terminate() { return new LongWritable(mCount); } } - + }