diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/PartitionIterable.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/PartitionIterable.java new file mode 100644 index 0000000..b956987 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/PartitionIterable.java @@ -0,0 +1,163 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.metadata; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + + +/** + * PartitionIterable - effectively a lazy Iterable + * + * Sometimes, we have a need for iterating through a list of partitions, + * but the list of partitions can be too big to fetch as a single object. + * Thus, the goal of PartitionIterable is to act as an Iterable + * while lazily fetching each relevant partition, one after the other as + * independent metadata calls. + * + * It is very likely that any calls to PartitionIterable are going to result + * in a large number of calls, so use sparingly only when the memory cost + * of fetching all the partitions in one shot is too prohibitive. + * + * This is still pretty costly in that it would retain a list of partition + * names, but that should be far less expensive than the entire partition + * objects. + * + * Note that remove() is an illegal call on this, and will result in an + * IllegalStateException. + */ +public class PartitionIterable implements Iterable { + + @Override + public Iterator iterator() { + return new Iterator(){ + + private boolean initialized = false; + private Iterator ptnsIterator = null; + + private Iterator partitionNamesIter = null; + private Iterator batchIter = null; + + private void initialize(){ + if(!initialized){ + if (currType == Type.LIST_PROVIDED){ + ptnsIterator = ptnsProvided.iterator(); + } else { + partitionNamesIter = partitionNames.iterator(); + } + initialized = true; + } + } + + public boolean hasNext() { + initialize(); + if (currType == Type.LIST_PROVIDED){ + return ptnsIterator.hasNext(); + } else { + return ((batchIter != null) && batchIter.hasNext()) || partitionNamesIter.hasNext(); + } + } + + @Override + public Partition next() { + initialize(); + if (currType == Type.LIST_PROVIDED){ + return ptnsIterator.next(); + } + + if ((batchIter == null) || !batchIter.hasNext()){ + getNextBatch(); + } + + return batchIter.next(); + } + + private void getNextBatch() { + int batch_counter = 0; + List nameBatch = new ArrayList(); + while (batch_counter < batch_size && partitionNamesIter.hasNext()){ + nameBatch.add(partitionNamesIter.next()); + batch_counter++; + } + try { + batchIter = db.getPartitionsByNames(table,nameBatch).iterator(); + } catch (HiveException e) { + throw new RuntimeException(e); + } + } + + @Override + public void remove() { + throw new IllegalStateException( + "PartitionIterable is a read-only iterable and remove() is unsupported"); + } + }; + } + + enum Type { + LIST_PROVIDED, // Where a List ptnsProvided = null; + + // used for LAZY_FETCH_PARTITIONS cases + private Hive db = null; + private Table table = null; + private Map partialPartitionSpec = null; + private List partitionNames = null; + private int batch_size; + + /** + * Dummy constructor, which simply acts as an iterator on an already-present + * list of partitions, allows for easy drop-in replacement for other methods + * that already have a List + */ + public PartitionIterable(List ptnsProvided){ + this.currType = Type.LIST_PROVIDED; + this.ptnsProvided = ptnsProvided; + } + + /** + * Primary constructor that fetches all partitions in a given table, given + * a Hive object and a table object, and a partial partition spec. + */ + public PartitionIterable(Hive db, Table table, Map partialPartitionSpec, + int batch_size) throws HiveException { + this.currType = Type.LAZY_FETCH_PARTITIONS; + this.db = db; + this.table = table; + this.partialPartitionSpec = partialPartitionSpec; + this.batch_size = batch_size; + + if (this.partialPartitionSpec == null){ + partitionNames = db.getPartitionNames( + table.getDbName(),table.getTableName(), (short) -1); + } else { + partitionNames = db.getPartitionNames( + table.getDbName(),table.getTableName(),partialPartitionSpec,(short)-1); + } + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java index 56999de..16dcdc4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java @@ -168,37 +168,43 @@ public static String relativeToAbsolutePath(HiveConf conf, String location) thro public static final String METADATA_FORMAT_FORWARD_COMPATIBLE_VERSION = null; public static void createExportDump(FileSystem fs, Path metadataPath, org.apache.hadoop.hive.ql.metadata.Table tableHandle, - List partitions) throws SemanticException, IOException { + Iterable partitions) throws SemanticException, IOException { + OutputStream out = fs.create(metadataPath); + write(out, "{ \"version\":\""+METADATA_FORMAT_VERSION+"\""); + if (METADATA_FORMAT_FORWARD_COMPATIBLE_VERSION != null) { + write(out, ",\"fcversion\":\""+METADATA_FORMAT_FORWARD_COMPATIBLE_VERSION+"\""); + } + TSerializer serializer = new TSerializer(new TJSONProtocol.Factory()); try { - JSONObject jsonContainer = new JSONObject(); - jsonContainer.put("version", METADATA_FORMAT_VERSION); - if (METADATA_FORMAT_FORWARD_COMPATIBLE_VERSION != null) { - jsonContainer.put("fcversion", METADATA_FORMAT_FORWARD_COMPATIBLE_VERSION); - } - TSerializer serializer = new TSerializer(new TJSONProtocol.Factory()); - try { - String tableDesc = serializer.toString(tableHandle.getTTable(), "UTF-8"); - jsonContainer.put("table", tableDesc); - JSONArray jsonPartitions = new JSONArray(); - if (partitions != null) { - for (org.apache.hadoop.hive.ql.metadata.Partition partition : partitions) { - String partDesc = serializer.toString(partition.getTPartition(), "UTF-8"); - jsonPartitions.put(partDesc); + String tableDesc = serializer.toString(tableHandle.getTTable(), "UTF-8"); + write(out, ",\"table\":"+ JSONObject.quote(tableDesc) +""); + write(out, ",\"partitions\":["); + if (partitions != null) { + boolean firstPartition = true; + for (org.apache.hadoop.hive.ql.metadata.Partition partition : partitions) { + String partDesc = serializer.toString(partition.getTPartition(), "UTF-8"); + if (firstPartition){ + write(out, JSONObject.quote(partDesc)); + firstPartition = false; + } else { + write(out, ","+JSONObject.quote(partDesc)); } + out.flush(); } - jsonContainer.put("partitions", jsonPartitions); - } catch (TException e) { - throw new SemanticException( - ErrorMsg.GENERIC_ERROR - .getMsg("Exception while serializing the metastore objects"), e); } - OutputStream out = fs.create(metadataPath); - out.write(jsonContainer.toString().getBytes("UTF-8")); - out.close(); - - } catch (JSONException e) { - throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg("Error in serializing metadata"), e); + write(out, "]"); + } catch (TException e) { + throw new SemanticException( + ErrorMsg.GENERIC_ERROR + .getMsg("Exception while serializing the metastore objects"), e); } + write(out, "}"); + out.close(); + + } + + private static void write(OutputStream out, String s) throws IOException { + out.write(s.getBytes("UTF-8")); } public static Map.Entry> diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java index 7091fef..f56a1a4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.parse; +import org.apache.hadoop.hive.ql.metadata.PartitionIterable; + import java.io.FileNotFoundException; import java.io.IOException; import java.io.Serializable; @@ -80,11 +82,13 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast), e); } - List partitions = null; + PartitionIterable partitions = null; try { - partitions = null; if (ts.tableHandle.isPartitioned()) { - partitions = (ts.partitions != null) ? ts.partitions : db.getPartitions(ts.tableHandle); + partitions = (ts.partitions != null) ? + new PartitionIterable(ts.partitions) : + new PartitionIterable(db,ts.tableHandle,null,conf.getIntVar( + HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX)); } Path path = new Path(ctx.getLocalTmpPath(), "_metadata"); EximUtil.createExportDump(FileSystem.getLocal(conf), path, ts.tableHandle, partitions);