diff --git .gitignore .gitignore
index d0c97d1..ae78098 100644
--- .gitignore
+++ .gitignore
@@ -25,3 +25,4 @@ hcatalog/core/target
hcatalog/webhcat/java-client/target
hcatalog/storage-handlers/hbase/target
hcatalog/webhcat/svr/target
+conf/hive-default.xml.template
diff --git common/pom.xml common/pom.xml
index b7f6642..4b5b6ba 100644
--- common/pom.xml
+++ common/pom.xml
@@ -108,6 +108,35 @@
+
+
+ dist
+
+
+
+ org.apache.maven.plugins
+ maven-antrun-plugin
+
+
+ generate-template
+ package
+
+
+
+
+
+
+
+
+ run
+
+
+
+
+
+
+
@@ -149,21 +178,6 @@
run
-
- generate-template
- package
-
-
-
-
-
-
-
-
- run
-
-
diff --git conf/hive-default.xml.template conf/hive-default.xml.template
deleted file mode 100644
index d6a8e70..0000000
--- conf/hive-default.xml.template
+++ /dev/null
@@ -1,3045 +0,0 @@
-
-
-
-
-
-
-
- hive.exec.script.wrapper
-
-
-
-
- hive.exec.plan
-
-
-
-
- hive.plan.serialization.format
- kryo
-
- Query plan format serialization between client and task nodes.
- Two supported values are : kryo and javaXML. Kryo is default.
-
-
-
- hive.exec.scratchdir
- /tmp/hive-${system:user.name}
- Scratch space for Hive jobs
-
-
- hive.exec.local.scratchdir
- ${system:java.io.tmpdir}/${system:user.name}
- Local scratch space for Hive jobs
-
-
- hive.scratch.dir.permission
- 700
-
-
-
- hive.exec.submitviachild
- false
-
-
-
- hive.exec.submit.local.task.via.child
- true
-
- Determines whether local tasks (typically mapjoin hashtable generation phase) runs in
- separate JVM (true recommended) or not.
- Avoids the overhead of spawning new JVM, but can lead to out-of-memory issues.
-
-
-
- hive.exec.script.maxerrsize
- 100000
-
- Maximum number of bytes a script is allowed to emit to standard error (per map-reduce task).
- This prevents runaway scripts from filling logs partitions to capacity
-
-
-
- hive.exec.script.allow.partial.consumption
- false
-
- When enabled, this option allows a user script to exit successfully without consuming
- all the data from the standard input.
-
-
-
- stream.stderr.reporter.prefix
- reporter:
- Streaming jobs that log to standard error with this prefix can log counter or status information.
-
-
- stream.stderr.reporter.enabled
- true
- Enable consumption of status and counter messages for streaming jobs.
-
-
- hive.exec.compress.output
- false
-
- This controls whether the final outputs of a query (to a local/HDFS file or a Hive table) is compressed.
- The compression codec and other options are determined from Hadoop config variables mapred.output.compress*
-
-
-
- hive.exec.compress.intermediate
- false
-
- This controls whether intermediate files produced by Hive between multiple map-reduce jobs are compressed.
- The compression codec and other options are determined from Hadoop config variables mapred.output.compress*
-
-
-
- hive.intermediate.compression.codec
-
-
-
-
- hive.intermediate.compression.type
-
-
-
-
- hive.exec.reducers.bytes.per.reducer
- 1000000000
- size per reducer.The default is 1G, i.e if the input size is 10G, it will use 10 reducers.
-
-
- hive.exec.reducers.max
- 999
-
- max number of reducers will be used. If the one specified in the configuration parameter mapred.reduce.tasks is
- negative, Hive will use this one as the max number of reducers when automatically determine number of reducers.
-
-
-
- hive.exec.pre.hooks
-
-
- Comma-separated list of pre-execution hooks to be invoked for each statement.
- A pre-execution hook is specified as the name of a Java class which implements the
- org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
-
-
-
- hive.exec.post.hooks
-
-
- Comma-separated list of post-execution hooks to be invoked for each statement.
- A post-execution hook is specified as the name of a Java class which implements the
- org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
-
-
-
- hive.exec.failure.hooks
-
-
- Comma-separated list of on-failure hooks to be invoked for each statement.
- An on-failure hook is specified as the name of Java class which implements the
- org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
-
-
-
- hive.client.stats.publishers
-
-
- Comma-separated list of statistics publishers to be invoked on counters on each job.
- A client stats publisher is specified as the name of a Java class which implements the
- org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface.
-
-
-
- hive.exec.parallel
- false
- Whether to execute jobs in parallel
-
-
- hive.exec.parallel.thread.number
- 8
- How many jobs at most can be executed in parallel
-
-
- hive.mapred.reduce.tasks.speculative.execution
- true
- Whether speculative execution for reducers should be turned on.
-
-
- hive.exec.counters.pull.interval
- 1000
-
- The interval with which to poll the JobTracker for the counters the running job.
- The smaller it is the more load there will be on the jobtracker, the higher it is the less granular the caught will be.
-
-
-
- hive.exec.dynamic.partition
- true
- Whether or not to allow dynamic partitions in DML/DDL.
-
-
- hive.exec.dynamic.partition.mode
- strict
-
- In strict mode, the user must specify at least one static partition
- in case the user accidentally overwrites all partitions.
-
-
-
- hive.exec.max.dynamic.partitions
- 1000
- Maximum number of dynamic partitions allowed to be created in total.
-
-
- hive.exec.max.dynamic.partitions.pernode
- 100
- Maximum number of dynamic partitions allowed to be created in each mapper/reducer node.
-
-
- hive.exec.max.created.files
- 100000
- Maximum number of HDFS files created by all mappers/reducers in a MapReduce job.
-
-
- hive.downloaded.resources.dir
- ${system:java.io.tmpdir}/${hive.session.id}_resources
- Temporary local directory for added resources in the remote file system.
-
-
- hive.exec.default.partition.name
- __HIVE_DEFAULT_PARTITION__
-
- The default partition name in case the dynamic partition column value is null/empty string or any other values that cannot be escaped.
- This value must not contain any special character used in HDFS URI (e.g., ':', '%', '/' etc).
- The user has to be aware that the dynamic partition value should not contain this value to avoid confusions.
-
-
-
- hive.lockmgr.zookeeper.default.partition.name
- __HIVE_DEFAULT_ZOOKEEPER_PARTITION__
-
-
-
- hive.exec.show.job.failure.debug.info
- true
-
- If a job fails, whether to provide a link in the CLI to the task with the
- most failures, along with debugging hints if applicable.
-
-
-
- hive.exec.job.debug.capture.stacktraces
- true
-
- Whether or not stack traces parsed from the task logs of a sampled failed task
- for each failed job should be stored in the SessionState
-
-
-
- hive.exec.job.debug.timeout
- 30000
-
-
-
- hive.exec.tasklog.debug.timeout
- 20000
-
-
-
- hive.output.file.extension
-
-
- String used as a file extension for output files.
- If not set, defaults to the codec extension for text files (e.g. ".gz"), or no extension otherwise.
-
-
-
- hive.exec.mode.local.auto
- false
- Let Hive determine whether to run in local mode automatically
-
-
- hive.exec.mode.local.auto.inputbytes.max
- 134217728
- When hive.exec.mode.local.auto is true, input bytes should less than this for local mode.
-
-
- hive.exec.mode.local.auto.input.files.max
- 4
- When hive.exec.mode.local.auto is true, the number of tasks should less than this for local mode.
-
-
- hive.exec.drop.ignorenonexistent
- true
- Do not report an error if DROP TABLE/VIEW specifies a non-existent table/view
-
-
- hive.ignore.mapjoin.hint
- true
- Ignore the mapjoin hint
-
-
- hive.file.max.footer
- 100
- maximum number of lines for footer user can define for a table file
-
-
- hive.resultset.use.unique.column.names
- true
-
- Make column names unique in the result set by qualifying column names with table alias if needed.
- Table alias will be added to column names for queries of type "select *" or
- if query explicitly uses table alias "select r1.x..".
-
-
-
- fs.har.impl
- org.apache.hadoop.hive.shims.HiveHarFileSystem
- The implementation for accessing Hadoop Archives. Note that this won't be applicable to Hadoop versions less than 0.20
-
-
- hive.metastore.metadb.dir
-
-
-
-
- hive.metastore.warehouse.dir
- /user/hive/warehouse
- location of default database for the warehouse
-
-
- hive.metastore.uris
-
- Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.
-
-
- hive.metastore.connect.retries
- 3
- Number of retries while opening a connection to metastore
-
-
- hive.metastore.failure.retries
- 1
- Number of retries upon failure of Thrift metastore calls
-
-
- hive.metastore.client.connect.retry.delay
- 1
- Number of seconds for the client to wait between consecutive connection attempts
-
-
- hive.metastore.client.socket.timeout
- 600
- MetaStore Client socket timeout in seconds
-
-
- javax.jdo.option.ConnectionPassword
- mine
- password to use against metastore database
-
-
- hive.metastore.ds.connection.url.hook
-
- Name of the hook to use for retrieving the JDO connection URL. If empty, the value in javax.jdo.option.ConnectionURL is used
-
-
- javax.jdo.option.Multithreaded
- true
- Set this to true if multiple threads access metastore through JDO concurrently.
-
-
- javax.jdo.option.ConnectionURL
- jdbc:derby:;databaseName=metastore_db;create=true
- JDBC connect string for a JDBC metastore
-
-
- hive.metastore.force.reload.conf
- false
-
- Whether to force reloading of the metastore configuration (including
- the connection URL, before the next metastore query that accesses the
- datastore. Once reloaded, this value is reset to false. Used for
- testing only.
-
-
-
- hive.hmshandler.retry.attempts
- 1
- The number of times to retry a HMSHandler call if there were a connection error
-
-
- hive.hmshandler.retry.interval
- 1000
- The number of milliseconds between HMSHandler retry attempts
-
-
- hive.hmshandler.force.reload.conf
- false
-
- Whether to force reloading of the HMSHandler configuration (including
- the connection URL, before the next metastore query that accesses the
- datastore. Once reloaded, this value is reset to false. Used for
- testing only.
-
-
-
- hive.metastore.server.min.threads
- 200
- Minimum number of worker threads in the Thrift server's pool.
-
-
- hive.metastore.server.max.threads
- 100000
- Maximum number of worker threads in the Thrift server's pool.
-
-
- hive.metastore.server.tcp.keepalive
- true
- Whether to enable TCP keepalive for the metastore server. Keepalive will prevent accumulation of half-open connections.
-
-
- hive.metastore.archive.intermediate.original
- _INTERMEDIATE_ORIGINAL
-
- Intermediate dir suffixes used for archiving. Not important what they
- are, as long as collisions are avoided
-
-
-
- hive.metastore.archive.intermediate.archived
- _INTERMEDIATE_ARCHIVED
-
-
-
- hive.metastore.archive.intermediate.extracted
- _INTERMEDIATE_EXTRACTED
-
-
-
- hive.metastore.kerberos.keytab.file
-
- The path to the Kerberos Keytab file containing the metastore Thrift server's service principal.
-
-
- hive.metastore.kerberos.principal
- hive-metastore/_HOST@EXAMPLE.COM
-
- The service principal for the metastore Thrift server.
- The special string _HOST will be replaced automatically with the correct host name.
-
-
-
- hive.metastore.sasl.enabled
- false
- If true, the metastore Thrift interface will be secured with SASL. Clients must authenticate with Kerberos.
-
-
- hive.metastore.thrift.framed.transport.enabled
- false
- If true, the metastore Thrift interface will use TFramedTransport. When false (default) a standard TTransport is used.
-
-
- hive.cluster.delegation.token.store.class
- org.apache.hadoop.hive.thrift.MemoryTokenStore
- The delegation token store implementation. Set to org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced cluster.
-
-
- hive.cluster.delegation.token.store.zookeeper.connectString
-
- The ZooKeeper token store connect string.
-
-
- hive.cluster.delegation.token.store.zookeeper.znode
- /hive/cluster/delegation
- The root path for token store data.
-
-
- hive.cluster.delegation.token.store.zookeeper.acl
-
- ACL for token store entries. List comma separated all server principals for the cluster.
-
-
- hive.metastore.cache.pinobjtypes
- Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order
- List of comma separated metastore object types that should be pinned in the cache
-
-
- datanucleus.connectionPoolingType
- BONECP
- Specify connection pool library for datanucleus
-
-
- datanucleus.validateTables
- false
- validates existing schema against code. turn this on if you want to verify existing schema
-
-
- datanucleus.validateColumns
- false
- validates existing schema against code. turn this on if you want to verify existing schema
-
-
- datanucleus.validateConstraints
- false
- validates existing schema against code. turn this on if you want to verify existing schema
-
-
- datanucleus.storeManagerType
- rdbms
- metadata store type
-
-
- datanucleus.autoCreateSchema
- true
- creates necessary schema on a startup if one doesn't exist. set this to false, after creating it once
-
-
- datanucleus.fixedDatastore
- false
-
-
-
- hive.metastore.schema.verification
- false
-
- Enforce metastore schema version consistency.
- True: Verify that version information stored in metastore matches with one from Hive jars. Also disable automatic
- schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures
- proper metastore schema migration. (Default)
- False: Warn if the version information stored in metastore doesn't match with one from in Hive jars.
-
-
-
- datanucleus.autoStartMechanismMode
- checked
- throw exception if metadata tables are incorrect
-
-
- datanucleus.transactionIsolation
- read-committed
- Default transaction isolation level for identity generation.
-
-
- datanucleus.cache.level2
- false
- Use a level 2 cache. Turn this off if metadata is changed independently of Hive metastore server
-
-
- datanucleus.cache.level2.type
- none
-
-
-
- datanucleus.identifierFactory
- datanucleus1
-
- Name of the identifier factory to use when generating table/column names etc.
- 'datanucleus1' is used for backward compatibility with DataNucleus v1
-
-
-
- datanucleus.rdbms.useLegacyNativeValueStrategy
- true
-
-
-
- datanucleus.plugin.pluginRegistryBundleCheck
- LOG
- Defines what happens when plugin bundles are found and are duplicated [EXCEPTION|LOG|NONE]
-
-
- hive.metastore.batch.retrieve.max
- 300
-
- Maximum number of objects (tables/partitions) can be retrieved from metastore in one batch.
- The higher the number, the less the number of round trips is needed to the Hive metastore server,
- but it may also cause higher memory requirement at the client side.
-
-
-
- hive.metastore.batch.retrieve.table.partition.max
- 1000
- Maximum number of table partitions that metastore internally retrieves in one batch.
-
-
- hive.metastore.init.hooks
-
-
- A comma separated list of hooks to be invoked at the beginning of HMSHandler initialization.
- An init hook is specified as the name of Java class which extends org.apache.hadoop.hive.metastore.MetaStoreInitListener.
-
-
-
- hive.metastore.pre.event.listeners
-
- List of comma separated listeners for metastore events.
-
-
- hive.metastore.event.listeners
-
-
-
-
- hive.metastore.authorization.storage.checks
- false
-
- Should the metastore do authorization checks against the underlying storage (usually hdfs)
- for operations like drop-partition (disallow the drop-partition if the user in
- question doesn't have permissions to delete the corresponding directory
- on the storage).
-
-
-
- hive.metastore.event.clean.freq
- 0
- Frequency at which timer task runs to purge expired events in metastore(in seconds).
-
-
- hive.metastore.event.expiry.duration
- 0
- Duration after which events expire from events table (in seconds)
-
-
- hive.metastore.execute.setugi
- true
-
- In unsecure mode, setting this property to true will cause the metastore to execute DFS operations using
- the client's reported user and group permissions. Note that this property must be set on
- both the client and server sides. Further note that its best effort.
- If client sets its to true and server sets it to false, client setting will be ignored.
-
-
-
- hive.metastore.partition.name.whitelist.pattern
-
- Partition names will be checked against this regex pattern and rejected if not matched.
-
-
- hive.metastore.integral.jdo.pushdown
- false
-
- Allow JDO query pushdown for integral partition columns in metastore. Off by default. This
- improves metastore perf for integral columns, especially if there's a large number of partitions.
- However, it doesn't work correctly with integral values that are not normalized (e.g. have
- leading zeroes, like 0012). If metastore direct SQL is enabled and works, this optimization
- is also irrelevant.
-
-
-
- hive.metastore.try.direct.sql
- true
-
-
-
- hive.metastore.try.direct.sql.ddl
- true
-
-
-
- hive.metastore.disallow.incompatible.col.type.changes
- false
-
- If true (default is false), ALTER TABLE operations which change the type of
- a column (say STRING) to an incompatible type (say MAP<STRING, STRING>) are disallowed.
- RCFile default SerDe (ColumnarSerDe) serializes the values in such a way that the
- datatypes can be converted from string to any type. The map is also serialized as
- a string, which can be read as a string as well. However, with any binary
- serialization, this is not true. Blocking the ALTER TABLE prevents ClassCastExceptions
- when subsequently trying to access old partitions.
-
- Primitive types like INT, STRING, BIGINT, etc are compatible with each other and are
- not blocked.
-
- See HIVE-4409 for more details.
-
-
-
- hive.table.parameters.default
-
- Default property values for newly created tables
-
-
- hive.ddl.createtablelike.properties.whitelist
-
- Table Properties to copy over when executing a Create Table Like.
-
-
- hive.metastore.rawstore.impl
- org.apache.hadoop.hive.metastore.ObjectStore
-
- Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface.
- This class is used to store and retrieval of raw metadata objects such as table, database
-
-
-
- javax.jdo.option.ConnectionDriverName
- org.apache.derby.jdbc.EmbeddedDriver
- Driver class name for a JDBC metastore
-
-
- javax.jdo.PersistenceManagerFactoryClass
- org.datanucleus.api.jdo.JDOPersistenceManagerFactory
- class implementing the jdo persistence
-
-
- hive.metastore.expression.proxy
- org.apache.hadoop.hive.ql.optimizer.ppr.PartitionExpressionForMetastore
-
-
-
- javax.jdo.option.DetachAllOnCommit
- true
- Detaches all objects from session so that they can be used after transaction is committed
-
-
- javax.jdo.option.NonTransactionalRead
- true
- Reads outside of transactions
-
-
- javax.jdo.option.ConnectionUserName
- APP
- Username to use against metastore database
-
-
- hive.metastore.end.function.listeners
-
- List of comma separated listeners for the end of metastore functions.
-
-
- hive.metastore.partition.inherit.table.properties
-
-
- List of comma separated keys occurring in table properties which will get inherited to newly created partitions.
- * implies all the keys will get inherited.
-
-
-
- hive.metadata.export.location
-
-
- When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener,
- it is the location to which the metadata will be exported. The default is an empty string, which results in the
- metadata being exported to the current user's home directory on HDFS.
-
-
-
- hive.metadata.move.exported.metadata.to.trash
- true
-
- When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener,
- this setting determines if the metadata that is exported will subsequently be moved to the user's trash directory
- alongside the dropped table data. This ensures that the metadata will be cleaned up along with the dropped table data.
-
-
-
- hive.cli.errors.ignore
- false
-
-
-
- hive.cli.print.current.db
- false
- Whether to include the current database in the Hive prompt.
-
-
- hive.cli.prompt
- hive
-
- Command line prompt configuration value. Other hiveconf can be used in this configuration value.
- Variable substitution will only be invoked at the Hive CLI startup.
-
-
-
- hive.cli.pretty.output.num.cols
- -1
-
- The number of columns to use when formatting output generated by the DESCRIBE PRETTY table_name command.
- If the value of this property is -1, then Hive will use the auto-detected terminal width.
-
-
-
- hive.metastore.fs.handler.class
- org.apache.hadoop.hive.metastore.HiveMetaStoreFsImpl
-
-
-
- hive.session.id
-
-
-
-
- hive.session.silent
- false
-
-
-
- hive.session.history.enabled
- false
- Whether to log Hive query, query plan, runtime statistics etc.
-
-
- hive.query.string
-
- Query being executed (might be multiple per a session)
-
-
- hive.query.id
-
- ID for query being executed (might be multiple per a session)
-
-
- hive.jobname.length
- 50
- max jobname length
-
-
- hive.jar.path
-
-
-
-
- hive.aux.jars.path
-
-
-
-
- hive.added.files.path
-
-
-
-
- hive.added.jars.path
-
-
-
-
- hive.added.archives.path
-
-
-
-
- hive.auto.progress.timeout
- 0
-
- How long to run autoprogressor for the script/UDTF operators (in seconds).
- Set to 0 for forever.
-
-
-
- hive.table.name
-
-
-
-
- hive.partition.name
-
-
-
-
- hive.script.auto.progress
- false
-
- Whether Hive Transform/Map/Reduce Clause should automatically send progress information to TaskTracker
- to avoid the task getting killed because of inactivity. Hive sends progress information when the script is
- outputting to stderr. This option removes the need of periodically producing stderr messages,
- but users should be cautious because this may prevent infinite loops in the scripts to be killed by TaskTracker.
-
-
-
- hive.script.operator.id.env.var
- HIVE_SCRIPT_OPERATOR_ID
-
- Name of the environment variable that holds the unique script operator ID in the user's
- transform function (the custom mapper/reducer that the user has specified in the query)
-
-
-
- hive.script.operator.truncate.env
- false
- Truncate each environment variable for external script in scripts operator to 20KB (to fit system limits)
-
-
- hive.mapred.mode
- nonstrict
-
- The mode in which the Hive operations are being performed.
- In strict mode, some risky queries are not allowed to run. They include:
- Cartesian Product.
- No partition being picked up for a query.
- Comparing bigints and strings.
- Comparing bigints and doubles.
- Orderby without limit.
-
-
-
- hive.alias
-
-
-
-
- hive.map.aggr
- true
- Whether to use map-side aggregation in Hive Group By queries
-
-
- hive.groupby.skewindata
- false
- Whether there is skew in data to optimize group by queries
-
-
- hive.optimize.multigroupby.common.distincts
- true
-
- Whether to optimize a multi-groupby query with the same distinct.
- Consider a query like:
-
- from src
- insert overwrite table dest1 select col1, count(distinct colx) group by col1
- insert overwrite table dest2 select col2, count(distinct colx) group by col2;
-
- With this parameter set to true, first we spray by the distinct value (colx), and then
- perform the 2 groups bys. This makes sense if map-side aggregation is turned off. However,
- with maps-side aggregation, it might be useful in some cases to treat the 2 inserts independently,
- thereby performing the query above in 2MR jobs instead of 3 (due to spraying by distinct key first).
- If this parameter is turned off, we don't consider the fact that the distinct key is the same across
- different MR jobs.
-
-
-
- hive.join.emit.interval
- 1000
- How many rows in the right-most join operand Hive should buffer before emitting the join result.
-
-
- hive.join.cache.size
- 25000
- How many rows in the joining tables (except the streaming table) should be cached in memory.
-
-
- hive.mapjoin.bucket.cache.size
- 100
-
-
-
- hive.mapjoin.optimized.hashtable
- true
-
- Whether Hive should use memory-optimized hash table for MapJoin. Only works on Tez,
- because memory-optimized hashtable cannot be serialized.
-
-
-
- hive.mapjoin.optimized.keys
- true
-
- Whether MapJoin hashtable should use optimized (size-wise), keys, allowing the table to take less
- memory. Depending on key, the memory savings for entire table can be 5-15% or so.
-
-
-
- hive.mapjoin.lazy.hashtable
- true
-
- Whether MapJoin hashtable should deserialize values on demand. Depending on how many values in
- the table the join will actually touch, it can save a lot of memory by not creating objects for
- rows that are not needed. If all rows are needed obviously there's no gain.
-
-
-
- hive.mapjoin.optimized.hashtable.wbsize
- 10485760
-
- Optimized hashtable (see hive.mapjoin.optimized.hashtable) uses a chain of buffers to
- store data. This is one buffer size. HT may be slightly faster if this is larger, but for small
- joins unnecessary memory will be allocated and then trimmed.
-
-
-
- hive.smbjoin.cache.rows
- 10000
- How many rows with the same key value should be cached in memory per smb joined table.
-
-
- hive.groupby.mapaggr.checkinterval
- 100000
- Number of rows after which size of the grouping keys/aggregation classes is performed
-
-
- hive.map.aggr.hash.percentmemory
- 0.5
- Portion of total memory to be used by map-side group aggregation hash table
-
-
- hive.mapjoin.followby.map.aggr.hash.percentmemory
- 0.3
- Portion of total memory to be used by map-side group aggregation hash table, when this group by is followed by map join
-
-
- hive.map.aggr.hash.force.flush.memory.threshold
- 0.9
-
- The max memory to be used by map-side group aggregation hash table.
- If the memory usage is higher than this number, force to flush data
-
-
-
- hive.map.aggr.hash.min.reduction
- 0.5
-
- Hash aggregation will be turned off if the ratio between hash table size and input rows is bigger than this number.
- Set to 1 to make sure hash aggregation is never turned off.
-
-
-
- hive.multigroupby.singlereducer
- true
-
- Whether to optimize multi group by query to generate single M/R job plan. If the multi group by query has
- common group by keys, it will be optimized to generate single M/R job.
-
-
-
- hive.map.groupby.sorted
- false
-
- If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform
- the group by in the mapper by using BucketizedHiveInputFormat. The only downside to this
- is that it limits the number of mappers to the number of files.
-
-
-
- hive.map.groupby.sorted.testmode
- false
-
- If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform
- the group by in the mapper by using BucketizedHiveInputFormat. If the test mode is set, the plan
- is not converted, but a query property is set to denote the same.
-
-
-
- hive.groupby.orderby.position.alias
- false
- Whether to enable using Column Position Alias in Group By or Order By
-
-
- hive.new.job.grouping.set.cardinality
- 30
-
- Whether a new map-reduce job should be launched for grouping sets/rollups/cubes.
- For a query like: select a, b, c, count(1) from T group by a, b, c with rollup;
- 4 rows are created per row: (a, b, c), (a, b, null), (a, null, null), (null, null, null).
- This can lead to explosion across map-reduce boundary if the cardinality of T is very high,
- and map-side aggregation does not do a very good job.
-
- This parameter decides if Hive should add an additional map-reduce job. If the grouping set
- cardinality (4 in the example above), is more than this value, a new MR job is added under the
- assumption that the original group by will reduce the data size.
-
-
-
- hive.udtf.auto.progress
- false
-
- Whether Hive should automatically send progress information to TaskTracker
- when using UDTF's to prevent the task getting killed because of inactivity. Users should be cautious
- because this may prevent TaskTracker from killing tasks with infinite loops.
-
-
-
- hive.default.fileformat
- TextFile
-
- Default file format for CREATE TABLE statement.
- Options are TextFile, SequenceFile, RCfile and ORC. Users can explicitly override it by CREATE TABLE ... STORED AS [FORMAT]
-
-
-
- hive.query.result.fileformat
- TextFile
- Default file format for storing result of the query. Allows TextFile, SequenceFile and RCfile
-
-
- hive.fileformat.check
- true
- Whether to check file format or not when loading data files
-
-
- hive.default.rcfile.serde
- org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe
- The default SerDe Hive will use for the RCFile format
-
-
- hive.default.serde
- org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- The default SerDe Hive will use for storage formats that do not specify a SerDe.
-
-
- hive.serdes.using.metastore.for.schema
- org.apache.hadoop.hive.ql.io.orc.OrcSerde,org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe,org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe,org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe,org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe,org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe,org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe,org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- SerDes retriving schema from metastore. This an internal parameter. Check with the hive dev. team
-
-
- hive.querylog.location
- ${system:java.io.tmpdir}/${system:user.name}
- Location of Hive run time structured log file
-
-
- hive.querylog.enable.plan.progress
- true
-
- Whether to log the plan's progress every time a job's progress is checked.
- These logs are written to the location specified by hive.querylog.location
-
-
-
- hive.querylog.plan.progress.interval
- 60000
-
- The interval to wait between logging the plan's progress in milliseconds.
- If there is a whole number percentage change in the progress of the mappers or the reducers,
- the progress is logged regardless of this value.
- The actual interval will be the ceiling of (this value divided by the value of
- hive.exec.counters.pull.interval) multiplied by the value of hive.exec.counters.pull.interval
- I.e. if it is not divide evenly by the value of hive.exec.counters.pull.interval it will be
- logged less frequently than specified.
- This only has an effect if hive.querylog.enable.plan.progress is set to true.
-
-
-
- hive.script.serde
- org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- The default SerDe for transmitting input data to and reading output data from the user scripts.
-
-
- hive.script.recordreader
- org.apache.hadoop.hive.ql.exec.TextRecordReader
- The default record reader for reading data from the user scripts.
-
-
- hive.script.recordwriter
- org.apache.hadoop.hive.ql.exec.TextRecordWriter
- The default record writer for writing data to the user scripts.
-
-
- hive.transform.escape.input
- false
-
- This adds an option to escape special chars (newlines, carriage returns and
- tabs) when they are passed to the user script. This is useful if the Hive tables
- can contain data that contains special characters.
-
-
-
- hive.binary.record.max.length
- 1000
-
- Read from a binary stream and treat each hive.binary.record.max.length bytes as a record.
- The last record before the end of stream can have less than hive.binary.record.max.length bytes
-
-
-
- hive.hwi.listen.host
- 0.0.0.0
- This is the host address the Hive Web Interface will listen on
-
-
- hive.hwi.listen.port
- 9999
- This is the port the Hive Web Interface will listen on
-
-
- hive.hwi.war.file
- ${system:HWI_WAR_FILE}
- This sets the path to the HWI war file, relative to ${HIVE_HOME}.
-
-
- hive.mapred.local.mem
- 0
- mapper/reducer memory in local mode
-
-
- hive.mapjoin.smalltable.filesize
- 25000000
-
- The threshold for the input file size of the small tables; if the file size is smaller
- than this threshold, it will try to convert the common join into map join
-
-
-
- hive.sample.seednumber
- 0
- A number used to percentage sampling. By changing this number, user will change the subsets of data sampled.
-
-
- hive.test.mode
- false
- Whether Hive is running in test mode. If yes, it turns on sampling and prefixes the output tablename.
-
-
- hive.test.mode.prefix
- test_
- In test mode, specfies prefixes for the output table
-
-
- hive.test.mode.samplefreq
- 32
-
- In test mode, specfies sampling frequency for table, which is not bucketed,
- For example, the following query:
- INSERT OVERWRITE TABLE dest SELECT col1 from src
- would be converted to
- INSERT OVERWRITE TABLE test_dest
- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1))
-
-
-
- hive.test.mode.nosamplelist
-
- In test mode, specifies comma separated table names which would not apply sampling
-
-
- hive.test.dummystats.aggregator
-
- internal variable for test
-
-
- hive.test.dummystats.publisher
-
- internal variable for test
-
-
- hive.merge.mapfiles
- true
- Merge small files at the end of a map-only job
-
-
- hive.merge.mapredfiles
- false
- Merge small files at the end of a map-reduce job
-
-
- hive.merge.tezfiles
- false
- Merge small files at the end of a Tez DAG
-
-
- hive.merge.size.per.task
- 256000000
- Size of merged files at the end of the job
-
-
- hive.merge.smallfiles.avgsize
- 16000000
-
- When the average output file size of a job is less than this number, Hive will start an additional
- map-reduce job to merge the output files into bigger files. This is only done for map-only jobs
- if hive.merge.mapfiles is true, and for map-reduce jobs if hive.merge.mapredfiles is true.
-
-
-
- hive.merge.rcfile.block.level
- true
-
-
-
- hive.merge.input.format.block.level
- org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat
-
-
-
- hive.merge.current.job.has.dynamic.partitions
- false
-
-
-
- hive.exec.rcfile.use.explicit.header
- true
-
- If this is set the header for RCFiles will simply be RCF. If this is not
- set the header will be that borrowed from sequence files, e.g. SEQ- followed
- by the input and output RCFile formats.
-
-
-
- hive.exec.rcfile.use.sync.cache
- true
-
-
-
- hive.io.rcfile.record.interval
- 2147483647
-
-
-
- hive.io.rcfile.column.number.conf
- 0
-
-
-
- hive.io.rcfile.tolerate.corruptions
- false
-
-
-
- hive.io.rcfile.record.buffer.size
- 4194304
-
-
-
- hive.exec.orc.memory.pool
- 0.5
- Maximum fraction of heap that can be used by ORC file writers
-
-
- hive.exec.orc.write.format
-
- Define the version of the file to write
-
-
- hive.exec.orc.default.stripe.size
- 67108864
- Define the default ORC stripe size
-
-
- hive.exec.orc.default.block.size
- 268435456
- Define the default file system block size for ORC files.
-
-
- hive.exec.orc.dictionary.key.size.threshold
- 0.8
-
- If the number of keys in a dictionary is greater than this fraction of the total number of
- non-null rows, turn off dictionary encoding. Use 1 to always use dictionary encoding.
-
-
-
- hive.exec.orc.default.row.index.stride
- 10000
- Define the default ORC index stride
-
-
- hive.exec.orc.default.buffer.size
- 262144
- Define the default ORC buffer size
-
-
- hive.exec.orc.default.block.padding
- true
- Define the default block padding
-
-
- hive.exec.orc.block.padding.tolerance
- 0.05
-
- Define the tolerance for block padding as a percentage of stripe size.
- For the defaults of 64Mb ORC stripe and 256Mb HDFS blocks, a maximum of 3.2Mb will be reserved for padding within the 256Mb block.
- In that case, if the available size within the block is more than 3.2Mb, a new smaller stripe will be inserted to fit within that space.
- This will make sure that no stripe written will cross block boundaries and cause remote reads within a node local task.
-
-
-
- hive.exec.orc.default.compress
- ZLIB
- Define the default compression codec for ORC file
-
-
- hive.exec.orc.encoding.strategy
- SPEED
-
- Define the encoding strategy to use while writing data. Changing this will
- only affect the light weight encoding for integers. This flag will not
- change the compression level of higher level compression codec (like ZLIB).
- Possible options are SPEED and COMPRESSION.
-
-
-
- hive.orc.splits.include.file.footer
- false
-
- If turned on splits generated by orc will include metadata about the stripes in the file. This
- data is read remotely (from the client or HS2 machine) and sent to all the tasks.
-
-
-
- hive.orc.cache.stripe.details.size
- 10000
- Cache size for keeping meta info about orc splits cached in the client.
-
-
- hive.orc.compute.splits.num.threads
- 10
- How many threads orc should use to create splits in parallel.
-
-
- hive.exec.orc.skip.corrupt.data
- false
-
- If ORC reader encounters corrupt data, this value will be used to determine
- whether to skip the corrupt data or throw exception. The default behavior is to throw exception.
-
-
-
- hive.exec.orc.zerocopy
- false
- Use zerocopy reads with ORC.
-
-
- hive.lazysimple.extended_boolean_literal
- false
-
- LazySimpleSerde uses this property to determine if it treats 'T', 't', 'F', 'f',
- '1', and '0' as extened, legal boolean literal, in addition to 'TRUE' and 'FALSE'.
- The default is false, which means only 'TRUE' and 'FALSE' are treated as legal
- boolean literal.
-
-
-
- hive.optimize.skewjoin
- false
-
- Whether to enable skew join optimization.
- The algorithm is as follows: At runtime, detect the keys with a large skew. Instead of
- processing those keys, store them temporarily in an HDFS directory. In a follow-up map-reduce
- job, process those skewed keys. The same key need not be skewed for all the tables, and so,
- the follow-up map-reduce job (for the skewed keys) would be much faster, since it would be a
- map-join.
-
-
-
- hive.auto.convert.join
- true
- Whether Hive enables the optimization about converting common join into mapjoin based on the input file size
-
-
- hive.auto.convert.join.noconditionaltask
- true
-
- Whether Hive enables the optimization about converting common join into mapjoin based on the input file size.
- If this parameter is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than the
- specified size, the join is directly converted to a mapjoin (there is no conditional task).
-
-
-
- hive.auto.convert.join.noconditionaltask.size
- 10000000
-
- If hive.auto.convert.join.noconditionaltask is off, this parameter does not take affect.
- However, if it is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than this size,
- the join is directly converted to a mapjoin(there is no conditional task). The default is 10MB
-
-
-
- hive.auto.convert.join.use.nonstaged
- false
-
- For conditional joins, if input stream from a small alias can be directly applied to join operator without
- filtering or projection, the alias need not to be pre-staged in distributed cache via mapred local task.
- Currently, this is not working with vectorization or tez execution engine.
-
-
-
- hive.skewjoin.key
- 100000
-
- Determine if we get a skew key in join. If we see more than the specified number of rows with the same key in join operator,
- we think the key as a skew join key.
-
-
-
- hive.skewjoin.mapjoin.map.tasks
- 10000
-
- Determine the number of map task used in the follow up map join job for a skew join.
- It should be used together with hive.skewjoin.mapjoin.min.split to perform a fine grained control.
-
-
-
- hive.skewjoin.mapjoin.min.split
- 33554432
-
- Determine the number of map task at most used in the follow up map join job for a skew join by specifying
- the minimum split size. It should be used together with hive.skewjoin.mapjoin.map.tasks to perform a fine grained control.
-
-
-
- hive.heartbeat.interval
- 1000
- Send a heartbeat after this interval - used by mapjoin and filter operators
-
-
- hive.limit.row.max.size
- 100000
- When trying a smaller subset of data for simple LIMIT, how much size we need to guarantee each row to have at least.
-
-
- hive.limit.optimize.limit.file
- 10
- When trying a smaller subset of data for simple LIMIT, maximum number of files we can sample.
-
-
- hive.limit.optimize.enable
- false
- Whether to enable to optimization to trying a smaller subset of data for simple LIMIT first.
-
-
- hive.limit.optimize.fetch.max
- 50000
-
- Maximum number of rows allowed for a smaller subset of data for simple LIMIT, if it is a fetch query.
- Insert queries are not restricted by this limit.
-
-
-
- hive.limit.pushdown.memory.usage
- -1.0
- The max memory to be used for hash in RS operator for top K selection.
-
-
- hive.limit.query.max.table.partition
- -1
-
- This controls how many partitions can be scanned for each partitioned table.
- The default value "-1" means no limit.
-
-
-
- hive.hashtable.initialCapacity
- 100000
-
-
-
- hive.hashtable.loadfactor
- 0.75
-
-
-
- hive.mapjoin.followby.gby.localtask.max.memory.usage
- 0.55
-
- This number means how much memory the local task can take to hold the key/value into an in-memory hash table
- when this map join is followed by a group by. If the local task's memory usage is more than this number,
- the local task will abort by itself. It means the data of the small table is too large to be held in memory.
-
-
-
- hive.mapjoin.localtask.max.memory.usage
- 0.9
-
- This number means how much memory the local task can take to hold the key/value into an in-memory hash table.
- If the local task's memory usage is more than this number, the local task will abort by itself.
- It means the data of the small table is too large to be held in memory.
-
-
-
- hive.mapjoin.check.memory.rows
- 100000
- The number means after how many rows processed it needs to check the memory usage
-
-
- hive.debug.localtask
- false
-
-
-
- hive.input.format
- org.apache.hadoop.hive.ql.io.CombineHiveInputFormat
- The default input format. Set this to HiveInputFormat if you encounter problems with CombineHiveInputFormat.
-
-
- hive.tez.input.format
- org.apache.hadoop.hive.ql.io.HiveInputFormat
- The default input format for tez. Tez groups splits in the AM.
-
-
- hive.tez.container.size
- -1
- By default Tez will spawn containers of the size of a mapper. This can be used to overwrite.
-
-
- hive.tez.java.opts
-
- By default Tez will use the Java options from map tasks. This can be used to overwrite.
-
-
- hive.tez.log.level
- INFO
-
- The log level to use for tasks executing as part of the DAG.
- Used only if hive.tez.java.opts is used to configure Java options.
-
-
-
- hive.enforce.bucketing
- false
- Whether bucketing is enforced. If true, while inserting into the table, bucketing is enforced.
-
-
- hive.enforce.sorting
- false
- Whether sorting is enforced. If true, while inserting into the table, sorting is enforced.
-
-
- hive.optimize.bucketingsorting
- true
-
- If hive.enforce.bucketing or hive.enforce.sorting is true, don't create a reducer for enforcing
- bucketing/sorting for queries of the form:
- insert overwrite table T2 select * from T1;
- where T1 and T2 are bucketed/sorted by the same keys into the same number of buckets.
-
-
-
- hive.mapred.partitioner
- org.apache.hadoop.hive.ql.io.DefaultHivePartitioner
-
-
-
- hive.enforce.sortmergebucketmapjoin
- false
- If the user asked for sort-merge bucketed map-side join, and it cannot be performed, should the query fail or not ?
-
-
- hive.enforce.bucketmapjoin
- false
-
- If the user asked for bucketed map-side join, and it cannot be performed,
- should the query fail or not ? For example, if the buckets in the tables being joined are
- not a multiple of each other, bucketed map-side join cannot be performed, and the
- query will fail if hive.enforce.bucketmapjoin is set to true.
-
-
-
- hive.auto.convert.sortmerge.join
- false
- Will the join be automatically converted to a sort-merge join, if the joined tables pass the criteria for sort-merge join.
-
-
- hive.auto.convert.sortmerge.join.bigtable.selection.policy
- org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ
-
- The policy to choose the big table for automatic conversion to sort-merge join.
- By default, the table with the largest partitions is assigned the big table. All policies are:
- . based on position of the table - the leftmost table is selected
- org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ.
- . based on total size (all the partitions selected in the query) of the table
- org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ.
- . based on average size (all the partitions selected in the query) of the table
- org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.
- New policies can be added in future.
-
-
-
- hive.auto.convert.sortmerge.join.to.mapjoin
- false
-
- If hive.auto.convert.sortmerge.join is set to true, and a join was converted to a sort-merge join,
- this parameter decides whether each table should be tried as a big table, and effectively a map-join should be
- tried. That would create a conditional task with n+1 children for a n-way join (1 child for each table as the
- big table), and the backup task will be the sort-merge join. In some cases, a map-join would be faster than a
- sort-merge join, if there is no advantage of having the output bucketed and sorted. For example, if a very big sorted
- and bucketed table with few files (say 10 files) are being joined with a very small sorter and bucketed table
- with few files (10 files), the sort-merge join will only use 10 mappers, and a simple map-only join might be faster
- if the complete small table can fit in memory, and a map-join can be performed.
-
-
-
- hive.exec.script.trust
- false
-
-
-
- hive.exec.rowoffset
- false
- Whether to provide the row offset virtual column
-
-
- hive.hadoop.supports.splittable.combineinputformat
- false
-
-
-
- hive.optimize.index.filter
- false
- Whether to enable automatic use of indexes
-
-
- hive.optimize.index.autoupdate
- false
- Whether to update stale indexes automatically
-
-
- hive.optimize.ppd
- true
- Whether to enable predicate pushdown
-
-
- hive.ppd.recognizetransivity
- true
- Whether to transitively replicate predicate filters over equijoin conditions.
-
-
- hive.ppd.remove.duplicatefilters
- true
- Whether to push predicates down into storage handlers. Ignored when hive.optimize.ppd is false.
-
-
- hive.optimize.metadataonly
- true
-
-
-
- hive.optimize.null.scan
- true
- Dont scan relations which are guaranteed to not generate any rows
-
-
- hive.optimize.ppd.storage
- true
- Whether to push predicates down to storage handlers
-
-
- hive.optimize.groupby
- true
- Whether to enable the bucketed group by from bucketed partitions/tables.
-
-
- hive.optimize.bucketmapjoin
- false
- Whether to try bucket mapjoin
-
-
- hive.optimize.bucketmapjoin.sortedmerge
- false
- Whether to try sorted bucket merge map join
-
-
- hive.optimize.reducededuplication
- true
-
- Remove extra map-reduce jobs if the data is already clustered by the same key which needs to be used again.
- This should always be set to true. Since it is a new feature, it has been made configurable.
-
-
-
- hive.optimize.reducededuplication.min.reducer
- 4
-
- Reduce deduplication merges two RSs by moving key/parts/reducer-num of the child RS to parent RS.
- That means if reducer-num of the child RS is fixed (order by or forced bucketing) and small, it can make very slow, single MR.
- The optimization will be automatically disabled if number of reducers would be less than specified value.
-
-
-
- hive.optimize.sort.dynamic.partition
- true
-
- When enabled dynamic partitioning column will be globally sorted.
- This way we can keep only one record writer open for each partition value
- in the reducer thereby reducing the memory pressure on reducers.
-
-
-
- hive.optimize.sampling.orderby
- false
-
-
-
- hive.optimize.sampling.orderby.number
- 1000
-
-
-
- hive.optimize.sampling.orderby.percent
- 0.1
-
-
-
- hive.optimize.union.remove
- false
-
- Whether to remove the union and push the operators between union and the filesink above union.
- This avoids an extra scan of the output by union. This is independently useful for union
- queries, and specially useful when hive.optimize.skewjoin.compiletime is set to true, since an
- extra union is inserted.
-
- The merge is triggered if either of hive.merge.mapfiles or hive.merge.mapredfiles is set to true.
- If the user has set hive.merge.mapfiles to true and hive.merge.mapredfiles to false, the idea was the
- number of reducers are few, so the number of files anyway are small. However, with this optimization,
- we are increasing the number of files possibly by a big margin. So, we merge aggressively.
-
-
-
- hive.optimize.correlation
- false
- exploit intra-query correlations.
-
-
- hive.mapred.supports.subdirectories
- false
-
- Whether the version of Hadoop which is running supports sub-directories for tables/partitions.
- Many Hive optimizations can be applied if the Hadoop version supports sub-directories for
- tables/partitions. It was added by MAPREDUCE-1501
-
-
-
- hive.optimize.skewjoin.compiletime
- false
-
- Whether to create a separate plan for skewed keys for the tables in the join.
- This is based on the skewed keys stored in the metadata. At compile time, the plan is broken
- into different joins: one for the skewed keys, and the other for the remaining keys. And then,
- a union is performed for the 2 joins generated above. So unless the same skewed key is present
- in both the joined tables, the join for the skewed key will be performed as a map-side join.
-
- The main difference between this parameter and hive.optimize.skewjoin is that this parameter
- uses the skew information stored in the metastore to optimize the plan at compile time itself.
- If there is no skew information in the metadata, this parameter will not have any affect.
- Both hive.optimize.skewjoin.compiletime and hive.optimize.skewjoin should be set to true.
- Ideally, hive.optimize.skewjoin should be renamed as hive.optimize.skewjoin.runtime, but not doing
- so for backward compatibility.
-
- If the skew information is correctly stored in the metadata, hive.optimize.skewjoin.compiletime
- would change the query plan to take care of it, and hive.optimize.skewjoin will be a no-op.
-
-
-
- hive.optimize.index.filter.compact.minsize
- 5368709120
- Minimum size (in bytes) of the inputs on which a compact index is automatically used.
-
-
- hive.optimize.index.filter.compact.maxsize
- -1
- Maximum size (in bytes) of the inputs on which a compact index is automatically used. A negative number is equivalent to infinity.
-
-
- hive.index.compact.query.max.entries
- 10000000
- The maximum number of index entries to read during a query that uses the compact index. Negative value is equivalent to infinity.
-
-
- hive.index.compact.query.max.size
- 10737418240
- The maximum number of bytes that a query using the compact index can read. Negative value is equivalent to infinity.
-
-
- hive.index.compact.binary.search
- true
- Whether or not to use a binary search to find the entries in an index table that match the filter, where possible
-
-
- hive.stats.autogather
- true
- A flag to gather statistics automatically during the INSERT OVERWRITE command.
-
-
- hive.stats.dbclass
- fs
- The storage that stores temporary Hive statistics. Currently, jdbc, hbase, counter and custom type are supported.
-
-
- hive.stats.jdbcdriver
- org.apache.derby.jdbc.EmbeddedDriver
- The JDBC driver for the database that stores temporary Hive statistics.
-
-
- hive.stats.dbconnectionstring
- jdbc:derby:;databaseName=TempStatsStore;create=true
- The default connection string for the database that stores temporary Hive statistics.
-
-
- hive.stats.default.publisher
-
- The Java class (implementing the StatsPublisher interface) that is used by default if hive.stats.dbclass is custom type.
-
-
- hive.stats.default.aggregator
-
- The Java class (implementing the StatsAggregator interface) that is used by default if hive.stats.dbclass is custom type.
-
-
- hive.stats.jdbc.timeout
- 30
- Timeout value (number of seconds) used by JDBC connection and statements.
-
-
- hive.stats.atomic
- false
- whether to update metastore stats only if all stats are available
-
-
- hive.stats.retries.max
- 0
-
- Maximum number of retries when stats publisher/aggregator got an exception updating intermediate database.
- Default is no tries on failures.
-
-
-
- hive.stats.retries.wait
- 3000
- The base waiting window (in milliseconds) before the next retry. The actual wait time is calculated by baseWindow * failures baseWindow * (failure 1) * (random number between [0.0,1.0]).
-
-
- hive.stats.collect.rawdatasize
- true
- should the raw data size be collected when analyzing tables
-
-
- hive.client.stats.counters
-
-
- Subset of counters that should be of interest for hive.client.stats.publishers (when one wants to limit their publishing).
- Non-display names should be used
-
-
-
- hive.stats.reliable
- false
-
- Whether queries will fail because stats cannot be collected completely accurately.
- If this is set to true, reading/writing from/into a partition may fail because the stats
- could not be computed accurately.
-
-
-
- hive.stats.gather.num.threads
- 10
-
- Number of threads used by partialscan/noscan analyze command for partitioned tables.
- This is applicable only for file formats that implement StatsProvidingRecordReader (like ORC).
-
-
-
- hive.stats.collect.tablekeys
- false
-
- Whether join and group by keys on tables are derived and maintained in the QueryPlan.
- This is useful to identify how tables are accessed and to determine if they should be bucketed.
-
-
-
- hive.stats.collect.scancols
- false
-
- Whether column accesses are tracked in the QueryPlan.
- This is useful to identify how tables are accessed and to determine if there are wasted columns that can be trimmed.
-
-
-
- hive.stats.ndv.error
- 20.0
-
- Standard error expressed in percentage. Provides a tradeoff between accuracy and compute cost.
- A lower value for error indicates higher accuracy and a higher compute cost.
-
-
-
- hive.stats.key.prefix.max.length
- 150
-
- Determines if when the prefix of the key used for intermediate stats collection
- exceeds a certain length, a hash of the key is used instead. If the value < 0 then hashing
-
-
-
- hive.stats.key.prefix.reserve.length
- 24
-
- Reserved length for postfix of stats key. Currently only meaningful for counter type which should
- keep length of full stats key smaller than max length configured by hive.stats.key.prefix.max.length.
- For counter type, it should be bigger than the length of LB spec if exists.
-
-
-
- hive.stats.max.variable.length
- 100
-
- To estimate the size of data flowing through operators in Hive/Tez(for reducer estimation etc.),
- average row size is multiplied with the total number of rows coming out of each operator.
- Average row size is computed from average column size of all columns in the row. In the absence
- of column statistics, for variable length columns (like string, bytes etc.), this value will be
- used. For fixed length columns their corresponding Java equivalent sizes are used
- (float - 4 bytes, double - 8 bytes etc.).
-
-
-
- hive.stats.list.num.entries
- 10
-
- To estimate the size of data flowing through operators in Hive/Tez(for reducer estimation etc.),
- average row size is multiplied with the total number of rows coming out of each operator.
- Average row size is computed from average column size of all columns in the row. In the absence
- of column statistics and for variable length complex columns like list, the average number of
- entries/values can be specified using this config.
-
-
-
- hive.stats.map.num.entries
- 10
-
- To estimate the size of data flowing through operators in Hive/Tez(for reducer estimation etc.),
- average row size is multiplied with the total number of rows coming out of each operator.
- Average row size is computed from average column size of all columns in the row. In the absence
- of column statistics and for variable length complex columns like map, the average number of
- entries/values can be specified using this config.
-
-
-
- hive.stats.map.parallelism
- 1
-
- Hive/Tez optimizer estimates the data size flowing through each of the operators.
- For GROUPBY operator, to accurately compute the data size map-side parallelism needs to
- be known. By default, this value is set to 1 since optimizer is not aware of the number of
- mappers during compile-time. This Hive config can be used to specify the number of mappers
- to be used for data size computation of GROUPBY operator.
-
-
-
- hive.stats.fetch.partition.stats
- true
-
- Annotation of operator tree with statistics information requires partition level basic
- statistics like number of rows, data size and file size. Partition statistics are fetched from
- metastore. Fetching partition statistics for each needed partition can be expensive when the
- number of partitions is high. This flag can be used to disable fetching of partition statistics
- from metastore. When this flag is disabled, Hive will make calls to filesystem to get file sizes
- and will estimate the number of rows from row schema.
-
-
-
- hive.stats.fetch.column.stats
- false
-
- Annotation of operator tree with statistics information requires column statistics.
- Column statistics are fetched from metastore. Fetching column statistics for each needed column
- can be expensive when the number of columns is high. This flag can be used to disable fetching
- of column statistics from metastore.
-
-
-
- hive.stats.join.factor
- 1.1
-
- Hive/Tez optimizer estimates the data size flowing through each of the operators. JOIN operator
- uses column statistics to estimate the number of rows flowing out of it and hence the data size.
- In the absence of column statistics, this factor determines the amount of rows that flows out
- of JOIN operator.
-
-
-
- hive.stats.deserialization.factor
- 1.0
-
- Hive/Tez optimizer estimates the data size flowing through each of the operators. In the absence
- of basic statistics like number of rows and data size, file size is used to estimate the number
- of rows and data size. Since files in tables/partitions are serialized (and optionally
- compressed) the estimates of number of rows and data size cannot be reliably determined.
- This factor is multiplied with the file size to account for serialization and compression.
-
-
-
- hive.support.concurrency
- false
-
- Whether Hive supports concurrency control or not.
- A ZooKeeper instance must be up and running when using zookeeper Hive lock manager
-
-
-
- hive.lock.manager
- org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager
-
-
-
- hive.lock.numretries
- 100
- The number of times you want to try to get all the locks
-
-
- hive.unlock.numretries
- 10
- The number of times you want to retry to do one unlock
-
-
- hive.lock.sleep.between.retries
- 60
- The sleep time (in seconds) between various retries
-
-
- hive.lock.mapred.only.operation
- false
-
- This param is to control whether or not only do lock on queries
- that need to execute at least one mapred job.
-
-
-
- hive.zookeeper.quorum
-
- The list of ZooKeeper servers to talk to. This is only needed for read/write locks.
-
-
- hive.zookeeper.client.port
- 2181
- The port of ZooKeeper servers to talk to. This is only needed for read/write locks.
-
-
- hive.zookeeper.session.timeout
- 600000
-
- ZooKeeper client's session timeout. The client is disconnected, and as a result, all locks released,
- if a heartbeat is not sent in the timeout.
-
-
-
- hive.zookeeper.namespace
- hive_zookeeper_namespace
- The parent node under which all ZooKeeper nodes are created.
-
-
- hive.zookeeper.clean.extra.nodes
- false
- Clean extra nodes at the end of the session.
-
-
- hive.txn.manager
- org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager
-
-
-
- hive.txn.timeout
- 300
- time after which transactions are declared aborted if the client has not sent a heartbeat, in seconds.
-
-
- hive.txn.max.open.batch
- 1000
-
- Maximum number of transactions that can be fetched in one call to open_txns().
- Increasing this will decrease the number of delta files created when
- streaming data into Hive. But it will also increase the number of
- open transactions at any given time, possibly impacting read performance.
-
-
-
- hive.compactor.initiator.on
- false
- Whether to run the compactor's initiator thread in this metastore instance or not.
-
-
- hive.compactor.worker.threads
- 0
- Number of compactor worker threads to run on this metastore instance.
-
-
- hive.compactor.worker.timeout
- 86400
-
- Time in seconds, before a given compaction in working state is declared a failure
- and returned to the initiated state.
-
-
-
- hive.compactor.check.interval
- 300
-
- Time in seconds between checks to see if any partitions need compacted.
- This should be kept high because each check for compaction requires many calls against the NameNode.
-
-
-
- hive.compactor.delta.num.threshold
- 10
-
- Number of delta files that must exist in a directory before the compactor will attempt
- a minor compaction.
-
-
-
- hive.compactor.delta.pct.threshold
- 0.1
- Percentage (by size) of base that deltas can be before major compaction is initiated.
-
-
- hive.compactor.abortedtxn.threshold
- 1000
-
- Number of aborted transactions involving a particular table or partition before major
- compaction is initiated.
-
-
-
- hive.hbase.wal.enabled
- true
-
- Whether writes to HBase should be forced to the write-ahead log.
- Disabling this improves HBase write performance at the risk of lost writes in case of a crash.
-
-
-
- hive.hbase.generatehfiles
- false
- True when HBaseStorageHandler should generate hfiles instead of operate against the online table.
-
-
- hive.archive.enabled
- false
- Whether archiving operations are permitted
-
-
- hive.optimize.index.groupby
- false
- Whether to enable optimization of group-by queries using Aggregate indexes.
-
-
- hive.outerjoin.supports.filters
- true
-
-
-
- hive.fetch.task.conversion
- more
-
- Some select queries can be converted to single FETCH task minimizing latency.
- Currently the query should be single sourced not having any subquery and should not have
- any aggregations or distincts (which incurs RS), lateral views and joins.
- 1. minimal : SELECT STAR, FILTER on partition columns, LIMIT only
- 2. more : SELECT, FILTER, LIMIT only (support TABLESAMPLE and virtual columns)
-
-
-
- hive.fetch.task.conversion.threshold
- 1073741824
-
- Input threshold for applying hive.fetch.task.conversion. If target table is native, input length
- is calculated by summation of file lengths. If it's not native, storage handler for the table
- can optionally implement org.apache.hadoop.hive.ql.metadata.InputEstimator interface.
-
-
-
- hive.fetch.task.aggr
- false
-
- Aggregation queries with no group-by clause (for example, select count(*) from src) execute
- final aggregations in single reduce task. If this is set true, Hive delegates final aggregation
- stage to fetch task, possibly decreasing the query time.
-
-
-
- hive.compute.query.using.stats
- false
-
- When set to true Hive will answer a few queries like count(1) purely using stats
- stored in metastore. For basic stats collection turn on the config hive.stats.autogather to true.
- For more advanced stats collection need to run analyze table queries.
-
-
-
- hive.fetch.output.serde
- org.apache.hadoop.hive.serde2.DelimitedJSONSerDe
- The SerDe used by FetchTask to serialize the fetch output.
-
-
- hive.cache.expr.evaluation
- true
- If true, evaluation result of deterministic expression referenced twice or more will be cached.
-
-
- hive.variable.substitute
- true
- This enables substitution using syntax like ${var} ${system:var} and ${env:var}.
-
-
- hive.variable.substitute.depth
- 40
- The maximum replacements the substitution engine will do.
-
-
- hive.conf.validation
- true
- Enables type checking for registered Hive configurations
-
-
- hive.semantic.analyzer.hook
-
-
-
-
- hive.security.authorization.enabled
- false
- enable or disable the Hive client authorization
-
-
- hive.security.authorization.manager
- org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider
-
- The Hive client authorization manager class name. The user defined authorization class should implement
- interface org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider.
-
-
-
- hive.security.authenticator.manager
- org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator
-
- hive client authenticator manager class name. The user defined authenticator should implement
- interface org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider.
-
-
-
- hive.security.metastore.authorization.manager
- org.apache.hadoop.hive.ql.security.authorization.DefaultHiveMetastoreAuthorizationProvider
-
- authorization manager class name to be used in the metastore for authorization.
- The user defined authorization class should implement interface
- org.apache.hadoop.hive.ql.security.authorization.HiveMetastoreAuthorizationProvider.
-
-
-
- hive.security.metastore.authenticator.manager
- org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator
-
- authenticator manager class name to be used in the metastore for authentication.
- The user defined authenticator should implement interface org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider.
-
-
-
- hive.security.authorization.createtable.user.grants
-
-
- the privileges automatically granted to some users whenever a table gets created.
- An example like "userX,userY:select;userZ:create" will grant select privilege to userX and userY,
- and grant create privilege to userZ whenever a new table created.
-
-
-
- hive.security.authorization.createtable.group.grants
-
-
- the privileges automatically granted to some groups whenever a table gets created.
- An example like "groupX,groupY:select;groupZ:create" will grant select privilege to groupX and groupY,
- and grant create privilege to groupZ whenever a new table created.
-
-
-
- hive.security.authorization.createtable.role.grants
-
-
- the privileges automatically granted to some roles whenever a table gets created.
- An example like "roleX,roleY:select;roleZ:create" will grant select privilege to roleX and roleY,
- and grant create privilege to roleZ whenever a new table created.
-
-
-
- hive.security.authorization.createtable.owner.grants
-
-
- the privileges automatically granted to the owner whenever a table gets created.
- An example like "select,drop" will grant select and drop privilege to the owner of the table
-
-
-
- hive.security.authorization.sqlstd.confwhitelist
-
- interal variable. List of modifiable configurations by user.
-
-
- hive.cli.print.header
- false
- Whether to print the names of the columns in query output.
-
-
- hive.error.on.empty.partition
- false
- Whether to throw an exception if dynamic partition insert generates empty results.
-
-
- hive.index.compact.file
-
- internal variable
-
-
- hive.index.blockfilter.file
-
- internal variable
-
-
- hive.index.compact.file.ignore.hdfs
- false
-
- When true the HDFS location stored in the index file will be ignored at runtime.
- If the data got moved or the name of the cluster got changed, the index data should still be usable.
-
-
-
- hive.exim.uri.scheme.whitelist
- hdfs,pfile
- A comma separated list of acceptable URI schemes for import and export.
-
-
- hive.mapper.cannot.span.multiple.partitions
- false
-
-
-
- hive.rework.mapredwork
- false
-
- should rework the mapred work or not.
- This is first introduced by SymlinkTextInputFormat to replace symlink files with real paths at compile time.
-
-
-
- hive.exec.concatenate.check.index
- true
-
- If this is set to true, Hive will throw error when doing
- 'alter table tbl_name [partSpec] concatenate' on a table/partition
- that has indexes on it. The reason the user want to set this to true
- is because it can help user to avoid handling all index drop, recreation,
- rebuild work. This is very helpful for tables with thousands of partitions.
-
-
-
- hive.io.exception.handlers
-
-
- A list of io exception handler class names. This is used
- to construct a list exception handlers to handle exceptions thrown
- by record readers
-
-
-
- hive.log4j.file
-
-
- Hive log4j configuration file.
- If the property is not set, then logging will be initialized using hive-log4j.properties found on the classpath.
- If the property is set, the value must be a valid URI (java.net.URI, e.g. "file:///tmp/my-logging.properties"),
- which you can then extract a URL from and pass to PropertyConfigurator.configure(URL).
-
-
-
- hive.exec.log4j.file
-
-
- Hive log4j configuration file for execution mode(sub command).
- If the property is not set, then logging will be initialized using hive-exec-log4j.properties found on the classpath.
- If the property is set, the value must be a valid URI (java.net.URI, e.g. "file:///tmp/my-logging.properties"),
- which you can then extract a URL from and pass to PropertyConfigurator.configure(URL).
-
-
-
- hive.global.init.file.location
-
-
- The location of HS2 global init file (.hiverc).
- If the property is not set, then HS2 will search for the file in $HIVE_CONF_DIR/.
- If the property is set, the value must be a valid path where the init file is located.
-
-
-
- hive.autogen.columnalias.prefix.label
- _c
-
- String used as a prefix when auto generating column alias.
- By default the prefix label will be appended with a column position number to form the column alias.
- Auto generation would happen if an aggregate function is used in a select clause without an explicit alias.
-
-
-
- hive.autogen.columnalias.prefix.includefuncname
- false
- Whether to include function name in the column alias auto generated by Hive.
-
-
- hive.exec.perf.logger
- org.apache.hadoop.hive.ql.log.PerfLogger
-
- The class responsible for logging client side performance metrics.
- Must be a subclass of org.apache.hadoop.hive.ql.log.PerfLogger
-
-
-
- hive.start.cleanup.scratchdir
- false
- To cleanup the Hive scratchdir when starting the Hive Server
-
-
- hive.insert.into.multilevel.dirs
- false
-
- Where to insert into multilevel directories like
- "insert directory '/HIVEFT25686/chinna/' from table"
-
-
-
- hive.warehouse.subdir.inherit.perms
- false
-
- Set this to true if the the table directories should inherit the
- permission of the warehouse or database directory instead of being created
- with the permissions derived from dfs umask
-
-
-
- hive.insert.into.external.tables
- true
- whether insert into external tables is allowed
-
-
- hive.exec.driver.run.hooks
-
- A comma separated list of hooks which implement HiveDriverRunHook. Will be run at the beginning and end of Driver.run, these will be run in the order specified.
-
-
- hive.ddl.output.format
-
-
- The data format to use for DDL output. One of "text" (for human
- readable text) or "json" (for a json object).
-
-
-
- hive.entity.separator
- @
- Separator used to construct names of tables and partitions. For example, dbname@tablename@partitionname
-
-
- hive.display.partition.cols.separately
- true
-
- In older Hive version (0.10 and earlier) no distinction was made between
- partition columns or non-partition columns while displaying columns in describe
- table. From 0.12 onwards, they are displayed separately. This flag will let you
- get old behavior, if desired. See, test-case in patch for HIVE-6689.
-
-
-
- hive.server2.max.start.attempts
- 30
-
- This number of times HiveServer2 will attempt to start before exiting, sleeping 60 seconds between retries.
- The default of 30 will keep trying for 30 minutes.
-
-
-
- hive.server2.transport.mode
- binary
- Server transport mode. "binary" or "http"
-
-
- hive.server2.thrift.http.port
- 10001
- Port number when in HTTP mode.
-
-
- hive.server2.thrift.http.path
- cliservice
- Path component of URL endpoint when in HTTP mode.
-
-
- hive.server2.thrift.http.min.worker.threads
- 5
- Minimum number of worker threads when in HTTP mode.
-
-
- hive.server2.thrift.http.max.worker.threads
- 500
- Maximum number of worker threads when in HTTP mode.
-
-
- hive.server2.thrift.port
- 10000
-
- Port number of HiveServer2 Thrift interface.
- Can be overridden by setting $HIVE_SERVER2_THRIFT_PORT
-
-
-
- hive.server2.thrift.bind.host
-
-
- Bind host on which to run the HiveServer2 Thrift interface.
- Can be overridden by setting $HIVE_SERVER2_THRIFT_BIND_HOST
-
-
-
- hive.server2.thrift.sasl.qop
- auth
-
- Sasl QOP value; Set it to one of following values to enable higher levels of
- protection for HiveServer2 communication with clients.
- "auth" - authentication only (default)
- "auth-int" - authentication plus integrity protection
- "auth-conf" - authentication plus integrity and confidentiality protection
- This is applicable only if HiveServer2 is configured to use Kerberos authentication.
-
-
-
- hive.server2.thrift.min.worker.threads
- 5
- Minimum number of Thrift worker threads
-
-
- hive.server2.thrift.max.worker.threads
- 500
- Maximum number of Thrift worker threads
-
-
- hive.server2.async.exec.threads
- 100
- Number of threads in the async thread pool for HiveServer2
-
-
- hive.server2.async.exec.shutdown.timeout
- 10
- Time (in seconds) for which HiveServer2 shutdown will wait for async
-
-
- hive.server2.async.exec.wait.queue.size
- 100
-
- Size of the wait queue for async thread pool in HiveServer2.
- After hitting this limit, the async thread pool will reject new requests.
-
-
-
- hive.server2.async.exec.keepalive.time
- 10
-
- Time (in seconds) that an idle HiveServer2 async thread (from the thread pool) will wait
- for a new task to arrive before terminating
-
-
-
- hive.server2.long.polling.timeout
- 5000
-
- Time in milliseconds that HiveServer2 will wait,
- before responding to asynchronous calls that use long polling
-
-
-
- hive.server2.authentication
- NONE
-
- Client authentication types.
- NONE: no authentication check
- LDAP: LDAP/AD based authentication
- KERBEROS: Kerberos/GSSAPI authentication
- CUSTOM: Custom authentication provider
- (Use with property hive.server2.custom.authentication.class)
-
-
-
- hive.server2.allow.user.substitution
- true
- Allow alternate user to be specified as part of HiveServer2 open connection request.
-
-
- hive.server2.authentication.kerberos.keytab
-
- Kerberos keytab file for server principal
-
-
- hive.server2.authentication.kerberos.principal
-
- Kerberos server principal
-
-
- hive.server2.authentication.spnego.keytab
-
-
- keytab file for SPNego principal, optional,
- typical value would look like /etc/security/keytabs/spnego.service.keytab,
- This keytab would be used by HiveServer2 when Kerberos security is enabled and
- HTTP transport mode is used.
- This needs to be set only if SPNEGO is to be used in authentication.
- SPNego authentication would be honored only if valid
- hive.server2.authentication.spnego.principal
- and
- hive.server2.authentication.spnego.keytab
- are specified.
-
-
-
- hive.server2.authentication.spnego.principal
-
-
- SPNego service principal, optional,
- typical value would look like HTTP/_HOST@EXAMPLE.COM
- SPNego service principal would be used by HiveServer2 when Kerberos security is enabled
- and HTTP transport mode is used.
- This needs to be set only if SPNEGO is to be used in authentication.
-
-
-
- hive.server2.authentication.ldap.url
-
- LDAP connection URL
-
-
- hive.server2.authentication.ldap.baseDN
-
- LDAP base DN
-
-
- hive.server2.authentication.ldap.Domain
-
-
-
-
- hive.server2.custom.authentication.class
-
-
- Custom authentication class. Used when property
- 'hive.server2.authentication' is set to 'CUSTOM'. Provided class
- must be a proper implementation of the interface
- org.apache.hive.service.auth.PasswdAuthenticationProvider. HiveServer2
- will call its Authenticate(user, passed) method to authenticate requests.
- The implementation may optionally extend Hadoop's
- org.apache.hadoop.conf.Configured class to grab Hive's Configuration object.
-
-
-
- hive.server2.authentication.pam.services
-
-
- List of the underlying pam services that should be used when auth type is PAM
- A file with the same name must exist in /etc/pam.d
-
-
-
- hive.server2.enable.doAs
- true
-
- Setting this property to true will have HiveServer2 execute
- Hive operations as the user making the calls to it.
-
-
-
- hive.server2.table.type.mapping
- CLASSIC
-
- This setting reflects how HiveServer2 will report the table types for JDBC and other
- client implementations that retrieve the available tables and supported table types
- HIVE : Exposes Hive's native table types like MANAGED_TABLE, EXTERNAL_TABLE, VIRTUAL_VIEW
- CLASSIC : More generic types like TABLE and VIEW
-
-
-
- hive.server2.session.hook
-
-
-
-
- hive.server2.use.SSL
- false
-
-
-
- hive.server2.keystore.path
-
-
-
-
- hive.server2.keystore.password
-
-
-
-
- hive.security.command.whitelist
- set,reset,dfs,add,delete,compile
- Comma separated list of non-SQL Hive commands users are authorized to execute
-
-
- hive.conf.restricted.list
- hive.security.authenticator.manager,hive.security.authorization.manager
- Comma separated list of configuration options which are immutable at runtime
-
-
- hive.multi.insert.move.tasks.share.dependencies
- false
-
- If this is set all move tasks for tables/partitions (not directories) at the end of a
- multi-insert query will only begin once the dependencies for all these move tasks have been
- met.
- Advantages: If concurrency is enabled, the locks will only be released once the query has
- finished, so with this config enabled, the time when the table/partition is
- generated will be much closer to when the lock on it is released.
- Disadvantages: If concurrency is not enabled, with this disabled, the tables/partitions which
- are produced by this query and finish earlier will be available for querying
- much earlier. Since the locks are only released once the query finishes, this
- does not apply if concurrency is enabled.
-
-
-
- hive.exec.infer.bucket.sort
- false
-
- If this is set, when writing partitions, the metadata will include the bucketing/sorting
- properties with which the data was written if any (this will not overwrite the metadata
- inherited from the table if the table is bucketed/sorted)
-
-
-
- hive.exec.infer.bucket.sort.num.buckets.power.two
- false
-
- If this is set, when setting the number of reducers for the map reduce task which writes the
- final output files, it will choose a number which is a power of two, unless the user specifies
- the number of reducers to use using mapred.reduce.tasks. The number of reducers
- may be set to a power of two, only to be followed by a merge task meaning preventing
- anything from being inferred.
- With hive.exec.infer.bucket.sort set to true:
- Advantages: If this is not set, the number of buckets for partitions will seem arbitrary,
- which means that the number of mappers used for optimized joins, for example, will
- be very low. With this set, since the number of buckets used for any partition is
- a power of two, the number of mappers used for optimized joins will be the least
- number of buckets used by any partition being joined.
- Disadvantages: This may mean a much larger or much smaller number of reducers being used in the
- final map reduce job, e.g. if a job was originally going to take 257 reducers,
- it will now take 512 reducers, similarly if the max number of reducers is 511,
- and a job was going to use this many, it will now use 256 reducers.
-
-
-
- hive.merge.current.job.concatenate.list.bucketing
- true
-
-
-
- hive.merge.current.job.concatenate.list.bucketing.depth
- 0
-
-
-
- hive.optimize.listbucketing
- false
- Enable list bucketing optimizer. Default value is false so that we disable it by default.
-
-
- hive.server.read.socket.timeout
- 10
- Timeout for the HiveServer to close the connection if no response from the client in N seconds, defaults to 10 seconds.
-
-
- hive.server.tcp.keepalive
- true
- Whether to enable TCP keepalive for the Hive Server. Keepalive will prevent accumulation of half-open connections.
-
-
- hive.decode.partition.name
- false
- Whether to show the unquoted partition names in query results.
-
-
- hive.execution.engine
- mr
- Chooses execution engine. Options are: mr (Map reduce, default) or tez (hadoop 2 only)
-
-
- hive.jar.directory
-
-
- This is the location hive in tez mode will look for to find a site wide
- installed hive instance.
-
-
-
- hive.user.install.directory
- hdfs:///user/
-
- If hive (in tez mode only) cannot find a usable hive jar in "hive.jar.directory",
- it will upload the hive jar to "hive.user.install.directory/user.name"
- and use it to run queries.
-
-
-
- hive.vectorized.execution.enabled
- false
-
- This flag should be set to true to enable vectorized mode of query execution.
- The default value is false.
-
-
-
- hive.vectorized.groupby.checkinterval
- 100000
- Number of entries added to the group by aggregation hash before a recomputation of average entry size is performed.
-
-
- hive.vectorized.groupby.maxentries
- 1000000
-
- Max number of entries in the vector group by aggregation hashtables.
- Exceeding this will trigger a flush irrelevant of memory pressure condition.
-
-
-
- hive.vectorized.groupby.flush.percent
- 0.1
- Percent of entries in the group by aggregation hash flushed when the memory threshold is exceeded.
-
-
- hive.typecheck.on.insert
- true
-
-
-
- hive.rpc.query.plan
- false
- Whether to send the query plan via local resource or RPC
-
-
- hive.compute.splits.in.am
- true
- Whether to generate the splits locally or in the AM (tez only)
-
-
- hive.prewarm.enabled
- false
- Enables container prewarm for Tez (Hadoop 2 only)
-
-
- hive.prewarm.numcontainers
- 10
- Controls the number of containers to prewarm for Tez (Hadoop 2 only)
-
-
- hive.stageid.rearrange
- none
-
-
-
- hive.explain.dependency.append.tasktype
- false
-
-
-
- hive.counters.group.name
- HIVE
- The name of counter group for internal Hive variables (CREATED_FILE, FATAL_ERROR, etc.)
-
-
- hive.server2.tez.default.queues
-
-
- A list of comma separated values corresponding to YARN queues of the same name.
- When HiveServer2 is launched in Tez mode, this configuration needs to be set
- for multiple Tez sessions to run in parallel on the cluster.
-
-
-
- hive.server2.tez.sessions.per.default.queue
- 1
-
- A positive integer that determines the number of Tez sessions that should be
- launched on each of the queues specified by "hive.server2.tez.default.queues".
- Determines the parallelism on each queue.
-
-
-
- hive.server2.tez.initialize.default.sessions
- false
-
- This flag is used in HiveServer2 to enable a user to use HiveServer2 without
- turning on Tez for HiveServer2. The user could potentially want to run queries
- over Tez without the pool of sessions.
-
-
-
- hive.support.quoted.identifiers
- column
-
- Whether to use quoted identifier. 'none' ot 'column' can be used.
- none: default(past) behavior. Implies only alphaNumeric and underscore are valid characters in identifiers.
- column: implies column names can contain any character.
-
-
-
- hive.users.in.admin.role
-
-
- Comma separated list of users who are in admin role for bootstrapping.
- More users can be added in ADMIN role later.
-
-
-
- hive.compat
- 0.12
-
- Enable (configurable) deprecated behaviors by setting desired level of backward compatibility.
- Setting to 0.12:
- Maintains division behavior: int / int = double
-
-
-
- hive.convert.join.bucket.mapjoin.tez
- false
-
- Whether joins can be automatically converted to bucket map joins in hive
- when tez is used as the execution engine.
-
-
-
- hive.exec.check.crossproducts
- true
- Check if a plan contains a Cross Product. If there is one, output a warning to the Session's console.
-
-
- hive.localize.resource.wait.interval
- 5000
- Time in milliseconds to wait for another thread to localize the same resource for hive-tez.
-
-
- hive.localize.resource.num.wait.attempts
- 5
- The number of attempts waiting for localizing a resource in hive-tez.
-
-
- hive.tez.auto.reducer.parallelism
- false
-
- Turn on Tez' auto reducer parallelism feature. When enabled, Hive will still estimate data sizes
- and set parallelism estimates. Tez will sample source vertices' output sizes and adjust the estimates at runtime as
- necessary.
-
-
-
- hive.tez.max.partition.factor
- 2.0
- When auto reducer parallelism is enabled this factor will be used to over-partition data in shuffle edges.
-
-
- hive.tez.min.partition.factor
- 0.25
-
- When auto reducer parallelism is enabled this factor will be used to put a lower limit to the number
- of reducers that tez specifies.
-
-
-