package org.apache.hadoop.fs; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.util.Progressable; /** * The Files class provides an interface to the application writer for * using the Hadoop file system. * It provides a set of static methods for the usual operation: create, open, * list, etc * * *** Path Names *** * * The Hadoop file system supports a URI name space and URI names. * TBD - explain more here. * Two common Hadoop file systems implementations are * the local file system: file:///path * the hdfs file system hdfs://nnAddress:nnPort/path * * In addition, the user can set a default filesystem (i.e scheme and authority) * so that slash-relative names (such as /for/bar) are resolved relative to that default FS. * Similarly a user can also have working-directory-relative names (i.e. names * not starting with a slash). While the working directory is generally in the * same default FS, the wd can be in a different FS; in particular, changing * the default file system DOES NOT change the working directory, * * Hence Hadoop path names can be one of: * fully qualified URI: scheme://authority/path * slash relative names: /path - relative to the default file system * wd-relative names: path - relative to the working dir * * * * *** Configuration and defaults: *** * The default configuration is obtained from the application config * (@see xxx for details). * The config contains defaults for * - default file system * - the home directory (default is "/user/") * - replication factor * - block size * - buffer size * * These defaults can be overridden by specifying the parameters as part of the * the specific methods (such as create) or by setting them via the appropriate * setDefaultXX methods below. * * *** Usage Model for the File class *** * * Example 1: use the default config read from the $HADOOP_CONFIG/core.xml * unspecified values come from core-defaults.xml in the release jar. * * myFiles = new Files(); // uses the default config * myFiles.create(path, ...); * myFiles.setWorkingDir(path) * myFiles.open (path, ...); * myFiles.setDefaultReplicationFactor(..); // Can change some defaults * * Example 2: Use a specific config, ignoring $HADOOP_CONFIG * configX = someConfigSomeOnePassedToYou. * myFiles = new Files(theConfig); // Copies configX (not a pointer) * myFiles.create(path, ...); * myFiles.setWorkingDir(path) * myFiles.setDefaultReplicationFactor(..); // Can change some defaults * // But configX is unchanged * */ /** * TBD: The methods have not been filled out yet. * Also the exceptions thrown by the methods have better specified. */ public class Files { class FsConfig { // TODO Why URI in one case and Path in other?? private URI defaultFileSystem; private Path workingDir; private long defaultBlockSize; private short defaultReplicationFactor; public FsConfig(final Configuration conf) { defaultFileSystem = URI.create(FsConfigUtil.getDefaultFS(conf)); workingDir = new Path(defaultFileSystem.getPath(), FsConfigUtil.getHomeDir(conf)+System.getProperty("user.name")); defaultBlockSize = FsConfigUtil.getDefaultBlockSize(conf); defaultReplicationFactor = FsConfigUtil.getDefaultReplicationFactor(conf); } public void exportConfig(Configuration exportIntoThis) { FsConfigUtil.setDefaultFS(exportIntoThis, defaultFileSystem.toString()); FsConfigUtil.setDefaultBlockSize(exportIntoThis, defaultBlockSize); FsConfigUtil.setDefaultReplicationFactor(exportIntoThis, defaultReplicationFactor); } public URI getDefaultFS() { return defaultFileSystem; } public void setDefaultFS(URI uri) { defaultFileSystem = uri; } public void setDefaultFS(String uri) { defaultFileSystem = URI.create(uri); } public Path getWorkingDir() { return workingDir; } /* * newWdir can be one of * - relative path: "foo/bar"; * - absolute without scheme: "/foo/bar" * - fully qualified with scheme: "xx://auth/foo/bar" * Illegal WDs: * - relative with scheme: "xx:foo/bar" * * Set the wd to the absolute path. * This will ensure that the defaultFS can be changed while leaving * the wd unchanged. */ public void setWorkingDir(Path path) { if (path.toUri().isAbsolute() && !path.isPathComponentAbsolute()) { // path of type scheme:foo/bar are not supported throw new IllegalArgumentException( "Unsupported name: has scheme but relative path-part"); } workingDir = new Path(new Path(defaultFileSystem), path); } public short getDefaultReplicationFactor() { return defaultReplicationFactor; } public void setDefaultReplicationFactor(short rf) { defaultReplicationFactor = rf; } public long getDefaultBlockSize() { return defaultBlockSize; } public void setDefaultBlockSize(long bs) { defaultBlockSize = bs; } } private FsConfig myConfig; public static final String LOCAL_FS_URI = "file:///"; /** * To use the server side defaults for Replication factor and block size * specify SERVER_DEFAULT. */ public static final short SERVER_DEFAULT = -1; /** * To use the config defaults for buff size, Replication factor & block size * specify CONFIG_DEFAULT. */ public static final short CONFIG_DEFAULT = -2; public Files() { myConfig = new FsConfig(new Configuration()); }; public Files(final Configuration conf) { myConfig = new FsConfig(conf); }; /** * Slash-relative pathnames are opened relative to the default file system. * * This method sets the default file system, overiding what was derived from * the config file. * * @param uri - the default file system * * TBD * Q: Should the type be string or path instead? * Q: Shall we call this "setRootFileSystem" instead? */ public void setDefaultFileSystem(URI uri) { myConfig.setDefaultFS(uri); } /** * Set the working directory for wd-relative names (such a "foo/bar") * @param p * @throws IOException */ public void setWorkingDir(Path p) throws IOException { myConfig.setWorkingDir(p); } /** * Sets the default block size for newly created files, overriding what * was derived from the config file. * * @param blksize * value of SERVER_DEFAULT implies use the default block size of the * target file server at which the file is created. */ public void setDefaultBlockSize(final int blksize) { myConfig.setDefaultBlockSize(blksize); } /** * Sets the default replication factor newly created files, overriding what * was derived from the config file. * * @param repFac * value of SERVER_DEFAULT means use the default replication factor * of the target file server at which the file is created. */ public void setDefaultReplicationFactor(final short repFac) { myConfig.setDefaultReplicationFactor(repFac); } /** * This method exports the default config. * * The following keys are exported: * * * @param exportIntoThis - the key and values of the default config are * inserted into exportIntoThis. If it contained the same keys their values * would be replaced, other wise new keys and values would be inserted. */ public void exportConfig(Configuration exportIntoThis) { myConfig.exportConfig(exportIntoThis); } /** * This default config is derived from the supplied config. * @param config - from where to import. * If config does not have the required keys then defaults will * be added. */ public void importConfig(final Configuration config) { //TBD } /** * Create or overwrite file on indicated path and returns an output stream * for writing into the file. * @param f the file name to open * @param permission * @param overwrite if true and file exists, the file is overwritten, * if false and file exists, error is thrown. * @throws IOException * * @see #setPermission(Path, FsPermission) */ public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite) throws IOException { return null; //TBD } /** * Create or overwrite file on indicated path and returns an output stream * for writing into the file. * @param f the file name to open * @param permission * @param overwrite if true and file exists, the file is overwritten, * if false and file exists, error is thrown. * @param bufferSize the size of the buffer; * value of CONFIG_DEFAULT implies use default in config * @param replication block replication for file * value of SERVER_DEFAULT implies use S-Side default * value of CONFIG_DEFAULT implies use default in config * @param blockSize * value of SERVER_DEFAULT implies use S-Side default * value of CONFIG_DEFAULT implies use default in config * @param progress if non null, used to report progress * @throws IOException * * @see #setPermission(Path, FsPermission) */ public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { return null; //TBD } /** * Append to an existing file (optional operationX?XX). * @param f the existing file to be appended. * @param bufferSize the size of the buffer * value of CONFIG_DEFAULT implies use default in config * @param progress if non null, used to report progress * @throws IOException * @throws FileNotFoundException if file does not exist */ public static FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException, FileNotFoundException { return null; //TBD } /** * This append operation creates the file if does not exist * This functionality is not there today - shall we add it? XXXX * * Append to a file (create if it does not exist) * @param f the existing file to be appended. * @param bufferSize the size of the buffer; * value of CONFIG_DEFAULT implies use default in config * @param progress if non null, used to report progress * @param replication block replication for file * value of SERVER_DEFAULT implies use S-Side default * value of CONFIG_DEFAULT implies use default in config * @param blockSize * value of SERVER_DEFAULT implies use S-Side default * value of CONFIG_DEFAULT implies use default in config * @throws IOException */ public FSDataOutputStream append(Path f, int bufferSize, Progressable progress, short replication, long blockSize) throws IOException { return null; //TBD } /** * Make the given file and all non-existent parents into * directories. Has the semantics of Unix 'mkdir -p'. * Existence of the directory hierarchy is not an error. */ public static boolean mkdirs(Path f, FsPermission permission ) throws IOException { return false; //TBD } /** Delete a file. * * @param f the path to delete. * @param recursive if path is a directory and set to * true, the directory is deleted else throws an exception. In * case of a file the recursive can be set to either true or false. * @return true if delete is successful else false. * @throws IOException */ public boolean delete(Path f, boolean recursive) throws IOException { return false; //TBD } /** * Set replication for an existing file. * * @param src file name * @param replication new replication * @throws IOException * @return true if successful; * false if file does not exist or is a directory */ public boolean setReplication(Path src, short replication) throws IOException { return true; //TBD } /** * Renames Path src to Path dst. Can take place on local fs * or remote DFS. */ public boolean rename(Path src, Path dst) throws IOException { return false; //TBD } /** * Set permission of a path. * @param p * @param permission */ public void setPermission(Path p, FsPermission permission ) throws IOException { } /** * Set owner of a path (i.e. a file or a directory). * The parameters username and groupname cannot both be null. * @param p The path * @param username If it is null, the original username remains unchanged. * @param groupname If it is null, the original groupname remains unchanged. */ public void setOwner(Path p, String username, String groupname ) throws IOException { } /** * Opens an FSDataInputStream at the indicated Path. * @param f the file name to open * @param bufferSize the size of the buffer to be used. * value of CONFIG_DEFAULT implies use default in config */ public FSDataInputStream open(Path f, int bufferSize) throws IOException { return null; //TBD } /** * Return a file status object that represents the path. * @param f The path we want information from * @return a FileStatus object * @throws FileNotFoundException when the path does not exist; * IOException see specific implementation */ public FileStatus getFileStatus(Path f) throws IOException { return null; //TBD } /** * Return an array containing hostnames, offset and size of * portions of the given file. For a nonexistent * file or regions, null will be returned. * * This call is most helpful with DFS, where it returns * hostnames of machines that contain the given file. * * The FileSystem will simply return an elt containing 'localhost'. */ public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException { return null; //TBD } /** * Copy file from src to target * @param src * @param target * @throws IOException */ public void copy(final Path src, final Path target) throws IOException { // TBD } }