Index: src/contrib/fuse-dfs/test/TestFuseDFS.java =================================================================== --- src/contrib/fuse-dfs/test/TestFuseDFS.java (revision 0) +++ src/contrib/fuse-dfs/test/TestFuseDFS.java (revision 0) @@ -0,0 +1,225 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.hadoop.dfs.*; +import junit.framework.TestCase; +import java.io.*; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import java.net.*; + +/** + * This class tests that the Fuse module for DFS can mount properly + * and does a few simple commands: + * mkdir + * rmdir + * ls + * cat + * + * cp and touch are purposely not tested because they won't work with the current module + + * + */ +public class TestFuseDFS extends TestCase { + + /** + * mount the fuse file system using assumed fuse module library installed in /usr/local/lib or somewhere else on your + * pre-existing LD_LIBRARY_PATH + * + */ + private void mount(String mountpoint, URI dfs) throws IOException, InterruptedException { + String cp = System.getenv("CLASSPATH"); + String libhdfs = "../../c++/libhdfs/"; + String lp = System.getenv("LD_LIBRARY_PATH") + ":" + "/usr/local/lib:" + libhdfs; + + Runtime r = Runtime.getRuntime(); + String fuse_cmd = "../src/fuse_dfs"; + + String cmd = fuse_cmd; + cmd += " "; + // cmd += dfs.toASCIIString(); + cmd += "dfs://"; + cmd += dfs.getHost(); + cmd += ":" ; + cmd += String.valueOf(dfs.getPort()); + cmd += " "; + cmd += mountpoint; + final String [] envp = { + "CLASSPATH="+ cp, + "LD_LIBRARY_PATH=" + lp + }; + + + // ensure the mount point is not currently mounted + Process p = r.exec("sudo umount -l " + mountpoint); + p.waitFor(); + + // make the mount point if needed + p = r.exec("mkdir -p " + mountpoint); + assertTrue(p.waitFor() == 0); + + System.err.println("cmd=" + cmd); + // mount fuse to the mount point + p = r.exec(cmd, envp); + assertTrue(p.waitFor() == 0); + assertTrue(p.exitValue() == 0); + } + + /** + * unmounts fuse for before shutting down. + */ + private void umount(String mpoint) throws IOException, InterruptedException { + Runtime r= Runtime.getRuntime(); + Process p = r.exec("sudo umount -l " + mpoint); + p.waitFor(); + } + + /** + * Set things up - create mini dfs cluster and mount the fuse filesystem. + */ + public TestFuseDFS() throws IOException,InterruptedException { + Configuration conf = new Configuration(); + this.cluster = new MiniDFSCluster(conf, 1, true, null); + this.fileSys = this.cluster.getFileSystem(); + String mpoint = "/tmp/testfuse"; + this.mount(mpoint, fileSys.getUri()); + this.myPath = new Path("/test/mkdirs"); + } + + private MiniDFSCluster cluster; + private FileSystem fileSys; + private String mpoint = "/tmp/testfuse"; + private Path myPath; + + + /** + * use shell to create a dir and then use filesys to see it exists. + */ + public void testMkdir() throws IOException,InterruptedException { + // First create a new directory with mkdirs + Runtime r = Runtime.getRuntime(); + Process p = r.exec("mkdir -p " + mpoint + "/test/mkdirs"); + assertTrue(p.waitFor() == 0); + assertTrue(p.exitValue() == 0); + + assertTrue(this.fileSys.exists(myPath)); + + + } + + /** + * Test ls for dir already created in testMkdDir also tests bad ls + */ + public void testLs() throws IOException,InterruptedException { + // First create a new directory with mkdirs + Runtime r = Runtime.getRuntime(); + Process p = r.exec("ls " + mpoint + "/test/mkdirs"); + assertTrue(p.waitFor() == 0); + assertTrue(p.exitValue() == 0); + + p = r.exec("ls " + mpoint + "/test/mkdirsNotThere"); + assertFalse(p.waitFor() == 0); + assertFalse(p.exitValue() == 0); + + } + + /** + * Remove a dir using the shell and use filesys to see it no longer exists. + */ + public void testRmdir() throws IOException,InterruptedException { + // First create a new directory with mkdirs + Path myPath = new Path("/test/mkdirs"); + assertTrue(fileSys.exists(myPath)); + + Runtime r = Runtime.getRuntime(); + Process p = r.exec("rmdir " + mpoint + "/test/mkdirs"); + + assertTrue(p.waitFor() == 0); + assertTrue(p.exitValue() == 0); + + assertFalse(fileSys.exists(myPath)); + } + + + /** + * Use filesys to create the hello world! file and then cat it and see its contents are correct. + */ + public void testCat() throws IOException,InterruptedException { + // First create a new directory with mkdirs + + Path myPath = new Path("/test/hello"); + FSDataOutputStream s = fileSys.create(myPath); + String hello = "hello world!"; + + s.write(hello.getBytes()); + s.close(); + + assertTrue(fileSys.exists(myPath)); + + Runtime r = Runtime.getRuntime(); + Process p = r.exec("cat " + mpoint + "/test/hello"); + + assertTrue(p.waitFor() == 0); + assertTrue(p.exitValue() == 0); + InputStream i = p.getInputStream(); + byte b[] = new byte[1024]; + int length = i.read(b); + String s2 = new String(b,0,length); + assertTrue(s2.equals(hello)); + } + + + /** + * Unmount and close + */ + public void finalize() { + try { + this.close(); + } catch(Exception e) { } + } + + /** + * Unmount and close + */ + public void close() throws IOException, InterruptedException { + this.umount(mpoint); + if(this.fileSys != null) { + this.fileSys.close(); + this.fileSys = null; + } + if(this.cluster != null) { + this.cluster.shutdown(); + this.cluster = null; + } + } + + public static void main(String args[]) { + try { + TestFuseDFS d = new TestFuseDFS(); + d.testMkdir(); + d.testLs(); + d.testRmdir(); + d.testCat(); + d.close(); + } catch(Exception e) { + System.err.println("e=" + e.getMessage()); + e.printStackTrace(); + } + + } +} Index: src/contrib/fuse-dfs/configure.ac =================================================================== --- src/contrib/fuse-dfs/configure.ac (revision 0) +++ src/contrib/fuse-dfs/configure.ac (revision 0) @@ -0,0 +1,137 @@ +# +# Copyright 2005 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Autoconf input file +# $Id$ + +# AC - autoconf +# FB - facebook + +######################################################################### +# Section 1: +# DO NOT TOUCH EXCEPT TO CHANGE Product-Name and Rev# IN AC_INIT + +AC_PREREQ(2.52) +AC_INIT([fuse_dfs], [0.1.0]) +#AC_CONFIG_AUX_DIR([/usr/share/automake-1.9]) +# To install locally +AC_CANONICAL_TARGET() +FB_INITIALIZE([localinstall]) +AC_PREFIX_DEFAULT([`pwd`]) + + +case $target in +*64*intel) + JARCH=intel64 ;; +*64*amd* | *64*unknown*) + JARCH=amd64 ;; +esac +AC_SUBST(JARCH) + +# DEFS="" +# AC_SUBST([DEFS]) + +############################################################################ +# Section 2: +# User Configurable system defaults. Change With CAUTION! + +# User can include custom makefile rules. Uncomment and update only in PRODUCT_MK. +# Include where appropriate in any Makefile.am as @PRODUCT_MK@ + +# Default path to external Facebook components and shared build tools I.e fb303 etc. +# To point to other locations set environment variable EXTERNAL_PATH. +# DO NOT change default. Changing default value requires changing bootstrap.sh. +FB_WITH_EXTERNAL_PATH([`pwd`]) + +# Pre-defined macro to set optimized build mode. Configure with --disable-opt option to turn off optimization. Default CXXFLAGS set to '-Wall -O3'. In debug mode CXXFLAGS is '-Wall -g' +# FB_ENABLE_DEFAULT_DEBUG_BUILD +FB_ENABLE_DEFAULT_OPT_BUILD + +# Predefined macro to set static library mode. Configure with --disable-static option to turn off static lib mode. +# FB_ENABLE_DEFAULT_SHARED +FB_ENABLE_DEFAULT_STATIC + +########################################################################## +# Section 3: +# User Configurable + +# Personalized FLAG setting macro. Sets FLAG to user specifed value overriding any default. +# $(FLAG) can be used in Makefile.am and global mk's. +# FB_SET_FLAG_VALUE([], []) +# FB_SET_FLAG_VALUE([CXXFLAGS], [-g -O3]) + +# Personalized feature generator. Creates defines/conditionals and --enable --disable command line options. Doesn't enable until configured with --enable- option. +# FB_ENABLE_FEATURE([FEATURE], [feature]) OR FB_ENABLE_FEATURE([FEATURE], [feature], [\"\"]) +# Example: Macro supplies -DFACEBOOK at compile time and "if FACEBOOK endif" capabilities. + +# FB_ENABLE_FEATURE([FACEBOOK], [facebook]) +#FB_ENABLE_FEATURE([HDFS], [hdfs]) + +# Personalized path generator Sets default paths. Provides --with-xx=DIR options. +# FB_WITH_PATH([_home], [path], [] +# Example: sets $(thrift_dir) variable with default path. +FB_WITH_PATH([dfs_home], [dfspath], [/usr/local/libexec/hadoop/dfs]) +FB_WITH_PATH([external_home], [externalpath], [${EXTERNAL_PATH}/external]) +FB_WITH_PATH([fuse_home], [fusehome], [/usr/local]) +FB_WITH_PATH([jdk_home], [jdkhome], [/usr/local/jdk]) +FB_WITH_PATH([protected_paths], [protectedpaths], [/,/Trash,/user]) + +# Set default location of "php-config". User can also use "--with-php-config=" to point to another location. +# Run "./configure --help" to see --with options. +# PHPCONFIGLIBS and PHPCONFIGINCS are generated. +# FB_SET_PHP([PHPCONFIGDIR], [/usr/local/bin]) + +# Generates Makefile from Makefile.am. Modify when new subdirs are added. +# Change Makefile.am also to add subdirectly. +AC_CONFIG_FILES(Makefile src/Makefile) + + +############################################################################ +# Section 4: +# DO NOT TOUCH. + +AC_SUBST(PRODUCT_MK) +AC_OUTPUT + +############################################################################# +######### FINISH ############################################################ + +echo "EXTERNAL_PATH $EXTERNAL_PATH" + +make clean + +# +# NOTES FOR USER +# Short cut to create conditional flags. +#enable_facebook="yes" +#AM_CONDITIONAL([FACEBOOK], [test "$enable_facebook" = yes]) +#enable_hdfs="yes" +#AM_CONDITIONAL([HDFS], [test "$enable_hdfs" = yes]) + +# Enable options with --enable and --disable configurable. +#AC_MSG_CHECKING([whether to enable FACEBOOK]) +#FACEBOOK="" +#AC_ARG_ENABLE([facebook], +# [ --enable-facebook Enable facebook.], +# [ +# ENABLE_FACEBOOK=$enableval +# ], +# [ +# ENABLE_FACEBOOK="no" +# ] +#) +#AM_CONDITIONAL([FACEBOOK], [test "$ENABLE_FACEBOOK" = yes]) +#AC_MSG_RESULT($ENABLE_FACEBOOK) + Index: src/contrib/fuse-dfs/global_footer.mk =================================================================== --- src/contrib/fuse-dfs/global_footer.mk (revision 0) +++ src/contrib/fuse-dfs/global_footer.mk (revision 0) @@ -0,0 +1,17 @@ +# +# Copyright 2005 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +thriftstyle : $(XBUILT_SOURCES) + Index: src/contrib/fuse-dfs/acinclude.m4 =================================================================== --- src/contrib/fuse-dfs/acinclude.m4 (revision 0) +++ src/contrib/fuse-dfs/acinclude.m4 (revision 0) @@ -0,0 +1,268 @@ +# +# Copyright 2005 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +AC_DEFUN([FB_INITIALIZE], +[ +AM_INIT_AUTOMAKE([ foreign 1.9.5 no-define ]) +if test "x$1" = "xlocalinstall"; then +wdir=`pwd` +# To use $wdir undef quote. +# +########## +AC_PREFIX_DEFAULT([`pwd`/install]) +echo +fi +AC_PROG_CC +AC_PROG_CXX +AC_PROG_RANLIB(RANLIB, ranlib) +AC_PATH_PROGS(BASH, bash) +AC_PATH_PROGS(PERL, perl) +AC_PATH_PROGS(PYTHON, python) +AC_PATH_PROGS(AR, ar) +AC_PATH_PROGS(ANT, ant) +PRODUCT_MK="" +]) + +AC_DEFUN([FB_WITH_EXTERNAL_PATH], +[ +cdir=`pwd` +AC_MSG_CHECKING([Checking EXTERNAL_PATH set to]) +AC_ARG_WITH([externalpath], + [ --with-externalpath=DIR User specified path to external facebook components.], + [ + if test "x${EXTERNAL_PATH}" != "x"; then + echo "" + echo "ERROR: You have already set EXTERNAL_PATH in your environment" + echo "Cannot override it using --with-externalpath. Unset EXTERNAL_PATH to use this option" + exit 1 + fi + EXTERNAL_PATH=$withval + ], + [ + if test "x${EXTERNAL_PATH}" = "x"; then + EXTERNAL_PATH=$1 + fi + ] +) +if test "x${EXTERNAL_PATH}" = "x"; then + export EXTERNAL_PATH="$cdir/external" + GLOBAL_HEADER_MK="include ${EXTERNAL_PATH}/global_header.mk" + GLOBAL_FOOTER_MK="include ${EXTERNAL_PATH}/global_footer.mk" +else + export EXTERNAL_PATH + GLOBAL_HEADER_MK="include ${EXTERNAL_PATH}/global_header.mk" + GLOBAL_FOOTER_MK="include ${EXTERNAL_PATH}/global_footer.mk" +fi +AC_MSG_RESULT($EXTERNAL_PATH) +if test ! -d ${EXTERNAL_PATH}; then + echo "" + echo "ERROR: EXTERNAL_PATH set to an nonexistent directory ${EXTERNAL_PATH}" + exit 1 +fi +AC_SUBST(EXTERNAL_PATH) +AC_SUBST(GLOBAL_HEADER_MK) +AC_SUBST(GLOBAL_FOOTER_MK) +]) + +# Set option to enable shared mode. Set DEBUG and OPT for use in Makefile.am. +AC_DEFUN([FB_ENABLE_DEFAULT_OPT_BUILD], +[ +AC_MSG_CHECKING([whether to enable optimized build]) +AC_ARG_ENABLE([opt], + [ --disable-opt Set up debug mode.], + [ + ENABLED_OPT=$enableval + ], + [ + ENABLED_OPT="yes" + ] +) +if test "$ENABLED_OPT" = "yes" +then + CFLAGS="-Wall -O3" + CXXFLAGS="-Wall -O3" +else + CFLAGS="-Wall -g" + CXXFLAGS="-Wall -g" +fi +AC_MSG_RESULT($ENABLED_OPT) +AM_CONDITIONAL([OPT], [test "$ENABLED_OPT" = yes]) +AM_CONDITIONAL([DEBUG], [test "$ENABLED_OPT" = no]) +]) + +# Set option to enable debug mode. Set DEBUG and OPT for use in Makefile.am. +AC_DEFUN([FB_ENABLE_DEFAULT_DEBUG_BUILD], +[ +AC_MSG_CHECKING([whether to enable debug build]) +AC_ARG_ENABLE([debug], + [ --disable-debug Set up opt mode.], + [ + ENABLED_DEBUG=$enableval + ], + [ + ENABLED_DEBUG="yes" + ] +) +if test "$ENABLED_DEBUG" = "yes" +then + CFLAGS="-Wall -g" + CXXFLAGS="-Wall -g" +else + CFLAGS="-Wall -O3" + CXXFLAGS="-Wall -O3" +fi +AC_MSG_RESULT($ENABLED_DEBUG) +AM_CONDITIONAL([DEBUG], [test "$ENABLED_DEBUG" = yes]) +AM_CONDITIONAL([OPT], [test "$ENABLED_DEBUG" = no]) +]) + +# Set option to enable static libs. +AC_DEFUN([FB_ENABLE_DEFAULT_STATIC], +[ +SHARED="" +STATIC="" +AC_MSG_CHECKING([whether to enable static mode]) +AC_ARG_ENABLE([static], + [ --disable-static Set up shared mode.], + [ + ENABLED_STATIC=$enableval + ], + [ + ENABLED_STATIC="yes" + ] +) +if test "$ENABLED_STATIC" = "yes" +then + LTYPE=".a" +else + LTYPE=".so" + SHARED_CXXFLAGS="-fPIC" + SHARED_CFLAGS="-fPIC" + SHARED_LDFLAGS="-shared -fPIC" + AC_SUBST(SHARED_CXXFLAGS) + AC_SUBST(SHARED_CFLAGS) + AC_SUBST(SHARED_LDFLAGS) +fi +AC_MSG_RESULT($ENABLED_STATIC) +AC_SUBST(LTYPE) +AM_CONDITIONAL([STATIC], [test "$ENABLED_STATIC" = yes]) +AM_CONDITIONAL([SHARED], [test "$ENABLED_STATIC" = no]) +]) + +# Set option to enable shared libs. +AC_DEFUN([FB_ENABLE_DEFAULT_SHARED], +[ +SHARED="" +STATIC="" +AC_MSG_CHECKING([whether to enable shared mode]) +AC_ARG_ENABLE([shared], + [ --disable-shared Set up static mode.], + [ + ENABLED_SHARED=$enableval + ], + [ + ENABLED_SHARED="yes" + ] +) +if test "$ENABLED_SHARED" = "yes" +then + LTYPE=".so" + SHARED_CXXFLAGS="-fPIC" + SHARED_CFLAGS="-fPIC" + SHARED_LDFLAGS="-shared -fPIC" + AC_SUBST(SHARED_CXXFLAGS) + AC_SUBST(SHARED_CFLAGS) + AC_SUBST(SHARED_LDFLAGS) +else + LTYPE=".a" +fi +AC_MSG_RESULT($ENABLED_SHARED) +AC_SUBST(LTYPE) +AM_CONDITIONAL([SHARED], [test "$ENABLED_SHARED" = yes]) +AM_CONDITIONAL([STATIC], [test "$ENABLED_SHARED" = no]) +]) + +# Generates define flags and conditionals as specified by user. +# This gets enabled *only* if user selects --enable- otion. +AC_DEFUN([FB_ENABLE_FEATURE], +[ +ENABLE="" +flag="$1" +value="$3" +AC_MSG_CHECKING([whether to enable $1]) +AC_ARG_ENABLE([$2], + [ --enable-$2 Enable $2.], + [ + ENABLE=$enableval + ], + [ + ENABLE="no" + ] +) +AM_CONDITIONAL([$1], [test "$ENABLE" = yes]) +if test "$ENABLE" = "yes" +then + if test "x${value}" = "x" + then + AC_DEFINE([$1]) + else + AC_DEFINE_UNQUOTED([$1], [$value]) + fi +fi +AC_MSG_RESULT($ENABLE) +]) + + +# can also use eval $2=$withval;AC_SUBST($2) +AC_DEFUN([FB_WITH_PATH], +[ +USRFLAG="" +USRFLAG=$1 +AC_MSG_CHECKING([Checking $1 set to]) +AC_ARG_WITH([$2], + [ --with-$2=DIR User specified path.], + [ + LOC=$withval + eval $USRFLAG=$withval + ], + [ + LOC=$3 + eval $USRFLAG=$3 + ] +) +AC_SUBST([$1]) +AC_MSG_RESULT($LOC) +]) + +AC_DEFUN([FB_SET_FLAG_VALUE], +[ +SETFLAG="" +AC_MSG_CHECKING([Checking $1 set to]) +SETFLAG=$1 +eval $SETFLAG=\"$2\" +AC_SUBST([$SETFLAG]) +AC_MSG_RESULT($2) +]) + +# NOTES +# if using if else bourne stmt you must have more than a macro in it. +# EX1 is not correct. EX2 is correct +# EX1: if test "$XX" = "yes"; then +# AC_SUBST(xx) +# fi +# EX2: if test "$XX" = "yes"; then +# xx="foo" +# AC_SUBST(xx) +# fi Index: src/contrib/fuse-dfs/src/fuse_dfs.c =================================================================== --- src/contrib/fuse-dfs/src/fuse_dfs.c (revision 0) +++ src/contrib/fuse-dfs/src/fuse_dfs.c (revision 0) @@ -0,0 +1,1064 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define FUSE_USE_VERSION 26 + +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef linux +/* For pread()/pwrite() */ +#define _XOPEN_SOURCE 500 +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_SETXATTR +#include +#endif +#include // for ceil +#include +#include +#include +#include + +#include + +// Constants +// +static const int default_id = 99; // nobody - not configurable since soon uids in dfs, yeah! +static const size_t rd_buf_size = 128 * 1024; +static const int blksize = 512; +static const size_t rd_cache_buf_size = 10*1024*1024;//how much of reads to buffer here + +/** options for fuse_opt.h */ +struct options { + char* server; + int port; + int debug; + int nowrites; + int no_trash; +}options; + + +typedef struct dfs_fh_struct { + hdfsFile hdfsFH; + char *buf; + tSize sizeBuffer; //what is the size of the buffer we have + off_t startOffset; //where the buffer starts in the file +} dfs_fh; + +#include + +/** macro to define options */ +#define DFSFS_OPT_KEY(t, p, v) { t, offsetof(struct options, p), v } + +/** keys for FUSE_OPT_ options */ +static void print_usage(const char *pname) +{ + fprintf(stdout,"USAGE: %s [--debug] [--help] [--version] [--nowrites] [--notrash] --server= --port= [fuse options]\n",pname); + fprintf(stdout,"NOTE: a useful fuse option is -o allow_others and -o default_permissions\n"); + fprintf(stdout,"NOTE: optimizations include -o entry_timeout=500 -o attr_timeout=500\n"); + fprintf(stdout,"NOTE: debugging option for fuse is -debug\n"); +} + + +#define OPTIMIZED_READS 1 + + +enum + { + KEY_VERSION, + KEY_HELP, + }; + +static struct fuse_opt dfs_opts[] = + { + DFSFS_OPT_KEY("--server=%s", server, 0), + DFSFS_OPT_KEY("--port=%d", port, 0), + DFSFS_OPT_KEY("--debug", debug, 1), + DFSFS_OPT_KEY("--nowrites", nowrites, 1), + DFSFS_OPT_KEY("--notrash", no_trash, 1), + + FUSE_OPT_KEY("-v", KEY_VERSION), + FUSE_OPT_KEY("--version", KEY_VERSION), + FUSE_OPT_KEY("-h", KEY_HELP), + FUSE_OPT_KEY("--help", KEY_HELP), + FUSE_OPT_END + }; + +static const char *program; + +int dfs_options(void *data, const char *arg, int key, struct fuse_args *outargs) +{ + + if (key == KEY_VERSION) { + fprintf(stdout,"%s %s\n",program,_FUSE_DFS_VERSION); + exit(0); + } else if (key == KEY_HELP) { + print_usage(program); + exit(0); + } else { + // try and see if the arg is a URI for DFS + int tmp_port; + char tmp_server[1024]; + + if (!sscanf(arg,"dfs://%1024[a-zA-Z0-9_.-]:%d",tmp_server,&tmp_port)) { + printf("didn't recognize %s\n",arg); + fuse_opt_add_arg(outargs,arg); + } else { + options.port = tmp_port; + options.server = strdup(tmp_server); + } + } + return 0; +} + + +// +// Structure to store fuse_dfs specific data +// this will be created and passed to fuse at startup +// and fuse will pass it back to us via the context function +// on every operation. +// +typedef struct dfs_context_struct { + int debug; + char *nn_hostname; + int nn_port; + hdfsFS fs; + int nowrites; + int no_trash; + + // todo: + // total hack city - use this to strip off the dfs url from the filenames + // that the dfs API is now providing in 0.14.5 + // Will do a better job of fixing this once I am back from vacation + // + char dfs_uri[1024]; + int dfs_uri_len; +} dfs_context; + + +// +// Start of read-only functions +// + +static int dfs_getattr(const char *path, struct stat *st) +{ + // retrieve dfs specific data + dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; + + // check params and the context var + assert(dfs); + assert(path); + assert(st); + + // if not connected, try to connect and fail out if we can't. + if (NULL == dfs->fs && NULL == (dfs->fs = hdfsConnect(dfs->nn_hostname,dfs->nn_port))) { + syslog(LOG_ERR, "ERROR: could not connect to dfs %s:%d\n", __FILE__, __LINE__); + return -EIO; + } + + // call the dfs API to get the actual information + hdfsFileInfo *info = hdfsGetPathInfo(dfs->fs,path); + + if (NULL == info) { + return -ENOENT; + } + + // initialize the stat structure + memset(st, 0, sizeof(struct stat)); + + // setup hard link info - for a file it is 1 else num entries in a dir + 2 (for . and ..) + if (info[0].mKind == kObjectKindDirectory) { + int numEntries = 0; + hdfsFileInfo *info = hdfsListDirectory(dfs->fs,path,&numEntries); + + if (info) { + hdfsFreeFileInfo(info,numEntries); + } + st->st_nlink = numEntries + 2; + } else { + // not a directory + st->st_nlink = 1; + } + + // set stat metadata + st->st_size = (info[0].mKind == kObjectKindDirectory) ? 4096 : info[0].mSize; + st->st_blksize = blksize; + st->st_blocks = ceil(st->st_size/st->st_blksize); + st->st_mode = (info[0].mKind == kObjectKindDirectory) ? (S_IFDIR | 0777) : (S_IFREG | 0666); + st->st_uid = default_id; + st->st_gid = default_id; + st->st_atime = info[0].mLastMod; + st->st_mtime = info[0].mLastMod; + st->st_ctime = info[0].mLastMod; + + // free the info pointer + hdfsFreeFileInfo(info,1); + + return 0; +} + +static int dfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, + off_t offset, struct fuse_file_info *fi) +{ + (void) offset; + (void) fi; + + // retrieve dfs specific data + dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; + + // check params and the context var + assert(dfs); + assert(path); + assert(buf); + + // if not connected, try to connect and fail out if we can't. + if (NULL == dfs->fs && NULL == (dfs->fs = hdfsConnect(dfs->nn_hostname,dfs->nn_port))) { + syslog(LOG_ERR, "ERROR: could not connect to dfs %s:%d\n", __FILE__, __LINE__); + return -EIO; + } + + int path_len = strlen(path); + + // call dfs to read the dir + int numEntries = 0; + hdfsFileInfo *info = hdfsListDirectory(dfs->fs,path,&numEntries); + + // NULL means either the directory doesn't exist or maybe IO error. + if (NULL == info) { + return -ENOENT; + } + + int i ; + for (i = 0; i < numEntries; i++) { + + // check the info[i] struct + if (NULL == info[i].mName) { + syslog(LOG_ERR,"ERROR: for <%s> info[%d].mName==NULL %s:%d", path, i, __FILE__,__LINE__); + continue; + } + + struct stat st; + memset(&st, 0, sizeof(struct stat)); + + // set to 0 to indicate not supported for directory because we cannot (efficiently) get this info for every subdirectory + st.st_nlink = (info[i].mKind == kObjectKindDirectory) ? 0 : 1; + + // setup stat size and acl meta data + st.st_size = info[i].mSize; + st.st_blksize = 512; + st.st_blocks = ceil(st.st_size/st.st_blksize); + st.st_mode = (info[i].mKind == kObjectKindDirectory) ? (S_IFDIR | 0777) : (S_IFREG | 0666); + st.st_uid = default_id; + st.st_gid = default_id; + st.st_atime = info[i].mLastMod; + st.st_mtime = info[i].mLastMod; + st.st_ctime = info[i].mLastMod; + + // hack city: todo fix the below to something nicer and more maintainable but + // with good performance + // strip off the path but be careful if the path is solely '/' + // NOTE - this API started returning filenames as full dfs uris + const char *const str = info[i].mName + dfs->dfs_uri_len + path_len + ((path_len == 1 && *path == '/') ? 0 : 1); + + // pack this entry into the fuse buffer + int res = 0; + if ((res = filler(buf,str,&st,0)) != 0) { + syslog(LOG_ERR, "ERROR: readdir filling the buffer %d %s:%d\n",res, __FILE__, __LINE__); + } + + } + + // insert '.' and '..' + const char *const dots [] = { ".",".."}; + for (i = 0 ; i < 2 ; i++) + { + struct stat st; + memset(&st, 0, sizeof(struct stat)); + + // set to 0 to indicate not supported for directory because we cannot (efficiently) get this info for every subdirectory + st.st_nlink = 0; + + // setup stat size and acl meta data + st.st_size = 512; + st.st_blksize = 512; + st.st_blocks = 1; + st.st_mode = (S_IFDIR | 0777); + st.st_uid = default_id; + st.st_gid = default_id; + // todo fix below times + st.st_atime = 0; + st.st_mtime = 0; + st.st_ctime = 0; + + const char *const str = dots[i]; + + // flatten the info using fuse's function into a buffer + int res = 0; + if ((res = filler(buf,str,&st,0)) != 0) { + syslog(LOG_ERR, "ERROR: readdir filling the buffer %d %s:%d", res, __FILE__, __LINE__); + } + } + + // free the info pointers + hdfsFreeFileInfo(info,numEntries); + + return 0; +} + +static int dfs_read(const char *path, char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) +{ + // retrieve dfs specific data + dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; + + // check params and the context var + assert(dfs); + assert(path); + assert(buf); + + // if not connected, try to connect and fail out if we can't. + if (NULL == dfs->fs && NULL == (dfs->fs = hdfsConnect(dfs->nn_hostname,dfs->nn_port))) { + syslog(LOG_ERR, "ERROR: could not connect to dfs %s:%d\n", __FILE__, __LINE__); + return -EIO; + } + + +#ifdef OPTIMIZED_READS + dfs_fh *fh = (dfs_fh*)fi->fh; + //fprintf(stderr, "Cache bounds for %s: %llu -> %llu (%d bytes). Check for offset %llu\n", path, fh->startOffset, fh->startOffset + fh->sizeBuffer, fh->sizeBuffer, offset); + if (fh->sizeBuffer == 0 || offset < fh->startOffset || offset > (fh->startOffset + fh->sizeBuffer) ) + { + // do the actual read + //fprintf (stderr,"Reading %s from HDFS, offset %llu, amount %d\n", path, offset, rd_cache_buf_size); + const tSize num_read = hdfsPread(dfs->fs, fh->hdfsFH, offset, fh->buf, rd_cache_buf_size); + if (num_read < 0) { + syslog(LOG_ERR, "Read error - pread failed for %s with return code %d %s:%d", path, num_read, __FILE__, __LINE__); + hdfsDisconnect(dfs->fs); + dfs->fs = NULL; + return -EIO; + } + fh->sizeBuffer = num_read; + fh->startOffset = offset; + //fprintf (stderr,"Read %d bytes of %s from HDFS\n", num_read, path); + } + + char* local_buf = fh->buf; + const tSize cacheLookupOffset = offset - fh->startOffset; + local_buf += cacheLookupOffset; + //fprintf(stderr,"FUSE requested %d bytes of %s for offset %d in file\n", size, path, offset); + const tSize amount = cacheLookupOffset + size > fh->sizeBuffer + ? fh->sizeBuffer - cacheLookupOffset + : size; + //fprintf(stderr,"Reading %s from cache, %d bytes from position %d\n", path, amount, cacheLookupOffset); + //fprintf(stderr,"Cache status for %s: %d bytes cached from offset %llu\n", path, fh->sizeBuffer, fh->startOffset); + memcpy(buf, local_buf, amount); + //fprintf(stderr,"Read %s from cache, %d bytes from position %d\n", path, amount, cacheLookupOffset); + //fprintf(stderr,"Cache status for %s: %d bytes cached from offset %llu\n", path, fh->sizeBuffer, fh->startOffset); + return amount; + +#else + // NULL means either file doesn't exist or maybe IO error - i.e., the dfs_open must have failed + if (NULL == (void*)fi->fh) { + // should never happen + return -EIO; + } + syslog(LOG_DEBUG,"buffer size=%d\n",(int)size); + + // do the actual read + const tSize num_read = hdfsPread(dfs->fs, (hdfsFile)fi->fh, offset, buf, size); + + // handle errors + if (num_read < 0) { + syslog(LOG_ERR, "Read error - pread failed for %s with return code %d %s:%d", path, num_read, __FILE__, __LINE__); + hdfsDisconnect(dfs->fs); + dfs->fs = NULL; + return -EIO; + } + return num_read; +#endif + +} + +static int dfs_statfs(const char *path, struct statvfs *st) +{ + // retrieve dfs specific data + dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; + + // check params and the context var + assert(path); + assert(st); + assert(dfs); + + // init the stat structure + memset(st,0,sizeof(struct statvfs)); + + // if not connected, try to connect and fail out if we can't. + if (NULL == dfs->fs && NULL == (dfs->fs = hdfsConnect(dfs->nn_hostname,dfs->nn_port))) { + syslog(LOG_ERR, "ERROR: could not connect to dfs %s:%d\n", __FILE__, __LINE__); + return -EIO; + } + + const long cap = hdfsGetCapacity(dfs->fs); + const long used = hdfsGetUsed(dfs->fs); + const long bsize = hdfsGetDefaultBlockSize(dfs->fs); + + // fill in the statvfs structure + + /* FOR REFERENCE: + struct statvfs { + unsigned long f_bsize; // file system block size + unsigned long f_frsize; // fragment size + fsblkcnt_t f_blocks; // size of fs in f_frsize units + fsblkcnt_t f_bfree; // # free blocks + fsblkcnt_t f_bavail; // # free blocks for non-root + fsfilcnt_t f_files; // # inodes + fsfilcnt_t f_ffree; // # free inodes + fsfilcnt_t f_favail; // # free inodes for non-root + unsigned long f_fsid; // file system id + unsigned long f_flag; / mount flags + unsigned long f_namemax; // maximum filename length + }; + */ + + st->f_bsize = bsize; + st->f_frsize = st->f_bsize; + st->f_blocks = cap/st->f_bsize; + st->f_bfree = (cap-used)/st->f_bsize; + st->f_bavail = st->f_bfree; + st->f_files = 1000; + st->f_ffree = 500; + st->f_favail = 500; + st->f_fsid = 1023; + st->f_flag = ST_RDONLY | ST_NOSUID; + st->f_namemax = 1023; + + return 0; +} + +static int dfs_access(const char *path, int mask) +{ + // no permissions on dfs, always a success + return 0; +} + +// +// The remainder are write functionality and therefore not implemented right now +// + + +static char **protectedpaths; + + +static int dfs_mkdir(const char *path, mode_t mode) +{ + // retrieve dfs specific data + dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; + + // check params and the context var + assert(path); + assert(dfs); + + // if not connected, try to connect and fail out if we can't. + if (NULL == dfs->fs && NULL == (dfs->fs = hdfsConnect(dfs->nn_hostname,dfs->nn_port))) { + syslog(LOG_ERR, "ERROR: could not connect to dfs %s:%d\n", __FILE__, __LINE__); + return -EIO; + } + + assert('/' == *path); + + int i ; + for (i = 0; protectedpaths[i]; i++) { + if (strcmp(path, protectedpaths[i]) == 0) { + syslog(LOG_ERR,"ERROR: hdfs trying to create the directory: %s", path); + return -EACCES; + } + } + + + if (dfs->nowrites || hdfsCreateDirectory(dfs->fs, path)) { + syslog(LOG_ERR,"ERROR: hdfs trying to create directory %s",path); + return -EIO; + } + + return 0; + +} + +static int dfs_rename(const char *from, const char *to) +{ + // retrieve dfs specific data + dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; + + // check params and the context var + assert(from); + assert(to); + assert(dfs); + + // if not connected, try to connect and fail out if we can't. + if (NULL == dfs->fs && NULL == (dfs->fs = hdfsConnect(dfs->nn_hostname,dfs->nn_port))) { + syslog(LOG_ERR, "ERROR: could not connect to dfs %s:%d\n", __FILE__, __LINE__); + return -EIO; + } + + assert('/' == *from); + assert('/' == *to); + + int i ; + for (i = 0; protectedpaths[i] != NULL; i++) { + if (strcmp(from, protectedpaths[i]) == 0) { + syslog(LOG_ERR,"ERROR: hdfs trying to rename directories %s to %s",from,to); + return -EACCES; + } + if (strcmp(to, protectedpaths[i]) == 0) { + syslog(LOG_ERR,"ERROR: hdfs trying to rename directories %s to %s",from,to); + return -EACCES; + } + } + + if (dfs->nowrites || hdfsRename(dfs->fs, from, to)) { + syslog(LOG_ERR,"ERROR: hdfs trying to rename %s to %s",from, to); + return -EIO; + } + return 0; + +} + + +static int dfs_rmdir(const char *path) +{ + // retrieve dfs specific data + dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; + + // check params and the context var + assert(path); + assert(dfs); + + // if not connected, try to connect and fail out if we can't. + if (NULL == dfs->fs && NULL == (dfs->fs = hdfsConnect(dfs->nn_hostname,dfs->nn_port))) { + syslog(LOG_ERR, "ERROR: could not connect to dfs %s:%d\n", __FILE__, __LINE__); + return -EIO; + } + + assert('/' == *path); + + int i ; + for (i = 0; protectedpaths[i]; i++) { + if (strcmp(path, protectedpaths[i]) == 0) { + syslog(LOG_ERR,"ERROR: hdfs trying to delete the directory: %s ",path); + return -EACCES; + } + } + + int numEntries = 0; + hdfsFileInfo *info = hdfsListDirectory(dfs->fs,path,&numEntries); + + // free the info pointers + hdfsFreeFileInfo(info,numEntries); + + if (numEntries) { + return -ENOTEMPTY; + } + + + + // since these commands go through the programmatic hadoop API, there is no + // trash feature. So, force it here. + // But make sure the person isn't deleting from Trash itself :) + // NOTE: /Trash is in protectedpaths so they cannot delete all of trash + if (!dfs->no_trash && strncmp(path, "/Trash", strlen("/Trash")) != 0) { + + char target[4096]; + char dir[4096]; + int status; + + { + // find the directory and full targets in Trash + + sprintf(target, "/Trash/Current%s",path); + + // strip off the actual file or directory name from the fullpath + char *name = rindex(path, '/'); + assert(name); + *name = 0; + + // use that path to ensure the directory exists in the Trash dir + // prepend Trash to the directory + sprintf(dir,"/Trash/Current%s/",path); + + // repair the path not used again but in case the caller expects it. + *name = '/'; + } + + // if the directory doesn't already exist in the Trash + // then we go through with the rename + if ( hdfsExists(dfs->fs, target) != 0) { // 0 means it exists. weird + // make the directory to put it in in the Trash + if ((status = dfs_mkdir(dir,0)) != 0) { + return status; + } + + // do the rename + return dfs_rename(path,target); + + } + // if the directory exists in the Trash, then we don't bother doing the rename + // and just delete the existing one by falling though. + } + + if (dfs->nowrites || hdfsDelete(dfs->fs, path)) { + syslog(LOG_ERR,"ERROR: hdfs trying to delete the directory %s",path); + return -EIO; + } + return 0; +} + + +static int dfs_unlink(const char *path) +{ + // retrieve dfs specific data + dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; + + // check params and the context var + assert(path); + assert(dfs); + + // if not connected, try to connect and fail out if we can't. + if (NULL == dfs->fs && NULL == (dfs->fs = hdfsConnect(dfs->nn_hostname,dfs->nn_port))) { + syslog(LOG_ERR, "ERROR: could not connect to dfs %s:%d\n", __FILE__, __LINE__); + return -EIO; + } + + assert('/' == *path); + + int i ; + for (i = 0; protectedpaths[i]; i++) { + if (strcmp(path, protectedpaths[i]) == 0) { + syslog(LOG_ERR,"ERROR: hdfs trying to delete the directory: %s ",path); + return -EACCES; + } + } + + + + // since these commands go through the programmatic hadoop API, there is no + // trash feature. So, force it here. + // But make sure the person isn't deleting from Trash itself :) + // NOTE: /Trash is in protectedpaths so they cannot delete all of trash + if (!dfs->no_trash && strncmp(path, "/Trash", strlen("/Trash")) != 0) { + + char target[4096]; + char dir[4096]; + int status; + + { + // find the directory and full targets in Trash + + sprintf(target, "/Trash/Current%s",path); + + // strip off the actual file or directory name from the fullpath + char *name = rindex(path, '/'); + assert(name); + *name = 0; + + // use that path to ensure the directory exists in the Trash dir + // prepend Trash to the directory + sprintf(dir,"/Trash/Current%s/",path); + + // repair the path not used again but in case the caller expects it. + *name = '/'; + } + + // if this is a file and it's already got a copy in the Trash, to be conservative, we + // don't do the delete. + if (hdfsExists(dfs->fs, target) == 0) { + syslog(LOG_ERR,"ERROR: hdfs trying to delete a file that was already deleted so cannot back it to Trash: %s",target); + return -EIO; + } + + // make the directory to put it in in the Trash + if ((status = dfs_mkdir(dir,0)) != 0) { + return status; + } + + // do the rename + return dfs_rename(path,target); + } + + if (dfs->nowrites || hdfsDelete(dfs->fs, path)) { + syslog(LOG_ERR,"ERROR: hdfs trying to delete the file %s",path); + return -EIO; + } + return 0; + +} + +static int dfs_chmod(const char *path, mode_t mode) +{ + (void)path; + (void)mode; + return -ENOTSUP; +} + +static int dfs_chown(const char *path, uid_t uid, gid_t gid) +{ + (void)path; + (void)uid; + (void)gid; + return -ENOTSUP; +} + +static int dfs_truncate(const char *path, off_t size) +{ + (void)path; + (void)size; + return -ENOTSUP; +} + +long tempfh = 0; + +static int dfs_open(const char *path, struct fuse_file_info *fi) +{ + dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; + + // check params and the context var + assert(path); + assert('/' == *path); + assert(dfs); + + int ret = 0; + + // if not connected, try to connect and fail out if we can't. + if (NULL == dfs->fs && NULL == (dfs->fs = hdfsConnect(dfs->nn_hostname,dfs->nn_port))) { + syslog(LOG_ERR, "ERROR: could not connect to dfs %s:%d\n", __FILE__, __LINE__); + return -EIO; + } + + // 0x8000 is always passed in and hadoop doesn't like it, so killing it here + // bugbug figure out what this flag is and report problem to Hadoop JIRA + int flags = (fi->flags & 0x7FFF); + +#ifdef OPTIMIZED_READS + // retrieve dfs specific data + dfs_fh *fh = (dfs_fh*)malloc(sizeof (dfs_fh)); + fi->fh = (uint64_t)fh; + fh->hdfsFH = (hdfsFile)hdfsOpenFile(dfs->fs, path, flags, 0, 3, 0); + fh->buf = (char*)malloc(rd_cache_buf_size*sizeof (char)); + fh->startOffset = 0; + fh->sizeBuffer = 0; + + if (0 == fh->hdfsFH) { + syslog(LOG_ERR, "ERROR: could not open file %s dfs %s:%d\n", path,__FILE__, __LINE__); + ret = -EIO; + } +#else + + // retrieve dfs specific data + fi->fh = (uint64_t)hdfsOpenFile(dfs->fs, path, flags, 0, 3, 0); + + if (0 == fi->fh) { + syslog(LOG_ERR, "ERROR: could not open file %s dfs %s:%d\n", path,__FILE__, __LINE__); + ret = -EIO; + } + +#endif + + return ret; +} + +static int dfs_write(const char *path, const char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) +{ + // retrieve dfs specific data + dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; + + // check params and the context var + assert(path); + assert(dfs); + assert('/' == *path); + + // if not connected, try to connect and fail out if we can't. + if (NULL == dfs->fs && NULL == (dfs->fs = hdfsConnect(dfs->nn_hostname,dfs->nn_port))) { + syslog(LOG_ERR, "ERROR: could not connect to dfs %s:%d\n", __FILE__, __LINE__); + return -EIO; + } +#ifdef OPTIMIZED_READS + + dfs_fh *fh = (dfs_fh*)fi->fh; + hdfsFile file_handle = (hdfsFile)fh->hdfsFH; + +#else + hdfsFile file_handle = (hdfsFile)fi->fh; + + if (NULL == file_handle) { + syslog(LOG_ERR, "ERROR: fuse problem - no file_handle for %s %s:%d\n",path, __FILE__, __LINE__); + return -EIO; + } +#endif + + // syslog(LOG_DEBUG,"hdfsTell(dfs,%ld)\n",(long)file_handle); +// tOffset cur_offset = hdfsTell(dfs->fs, file_handle); + + // if (cur_offset != offset) { + // syslog(LOG_ERR, "ERROR: user trying to random access write to a file %d!=%d for %s %s:%d\n",(int)cur_offset, (int)offset,path, __FILE__, __LINE__); +// return -EIO; +// } + + + syslog(LOG_DEBUG,"hdfsWrite(dfs,%ld,'%s',%d)\n",(long)file_handle,buf,(int)size); + tSize length = hdfsWrite(dfs->fs, file_handle, buf, size); + + + if (length != size) { + syslog(LOG_ERR, "ERROR: fuse problem - could not write all the bytes for %s %d!=%d%s:%d\n",path,length,(int)size, __FILE__, __LINE__); + return -EIO; + } + return 0; + +} + +int dfs_release (const char *path, struct fuse_file_info *fi) { + + // retrieve dfs specific data + dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; + + // check params and the context var + assert(path); + assert(dfs); + assert('/' == *path); + // if not connected, try to connect and fail out if we can't. + if (NULL == dfs->fs && NULL == (dfs->fs = hdfsConnect(dfs->nn_hostname,dfs->nn_port))) { + syslog(LOG_ERR, "ERROR: could not connect to dfs %s:%d\n", __FILE__, __LINE__); + return -EIO; + } + + if (NULL == (void*)fi->fh) { + return 0; + } + +#ifdef OPTIMIZED_READS + dfs_fh *fh = (dfs_fh*)fi->fh; + hdfsFile file_handle = (hdfsFile)fh->hdfsFH; + free(fh->buf); + free(fh); + +#else + hdfsFile file_handle = (hdfsFile)fi->fh; +#endif + + if (NULL == file_handle) { + return 0; + } + + if (hdfsCloseFile(dfs->fs, file_handle) != 0) { + syslog(LOG_ERR, "ERROR: dfs problem - could not close file_handle for %s %s:%d\n",path, __FILE__, __LINE__); + return -EIO; + } + + fi->fh = (uint64_t)0; + return 0; +} + +static int dfs_mknod(const char *path, mode_t mode, dev_t rdev) { + syslog(LOG_DEBUG,"in dfs_mknod"); + return 0; +} + +static int dfs_create(const char *path, mode_t mode, struct fuse_file_info *fi) +{ + syslog(LOG_DEBUG,"in dfs_create"); + fi->flags |= mode; + + return dfs_open(path, fi); +} +int dfs_flush(const char *path, struct fuse_file_info *fi) { + return 0; +} + + +void dfs_setattr(struct stat *attr, int to_set, struct fuse_file_info *fi) +{ + +} + +void dfs_destroy (void *ptr) +{ + dfs_context *dfs = (dfs_context*)ptr; + hdfsDisconnect(dfs->fs); + dfs->fs = NULL; +} + + +// Hacked up function to basically do: +// protectedpaths = split(PROTECTED_PATHS,','); + +static void init_protectedpaths() { + // PROTECTED_PATHS should be a #defined value from autoconf + // set it with configure --with-protectedpaths=/,/user,/user/foo + // note , seped with no other spaces and no quotes around it + char *tmp = PROTECTED_PATHS; + + assert(tmp); + + // handle degenerate case up front. + if (0 == *tmp) { + protectedpaths = (char**)malloc(sizeof(char*)); + protectedpaths[0] = NULL; + return; + } + + int i = 0; + while (tmp && (NULL != (tmp = index(tmp,',')))) { + tmp++; // pass the , + i++; + } + i++; // for the last entry + i++; // for the final NULL + protectedpaths = (char**)malloc(sizeof(char*)*i); + printf("i=%d\n",i); + tmp = PROTECTED_PATHS; + int j = 0; + while (NULL != tmp && j < i) { + int length; + char *eos = index(tmp,','); + if (NULL != eos) { + length = eos - tmp; // length of this value + } else { + length = strlen(tmp); + } + protectedpaths[j] = (char*)malloc(sizeof(char)*length+1); + strncpy(protectedpaths[j], tmp, length); + protectedpaths[j][length] = '\0'; + if (eos) { + tmp = eos + 1; + } else { + tmp = NULL; + } + j++; + } + protectedpaths[j] = NULL; + /* + j = 0; + while (protectedpaths[j]) { + printf("protectedpaths[%d]=%s\n",j,protectedpaths[j]); + fflush(stdout); + j++; + } + exit(1); + */ +} + + + +void *dfs_init() +{ + + // + // Create a private struct of data we will pass to fuse here and which + // will then be accessible on every call. + // + dfs_context *dfs = (dfs_context*)malloc(sizeof (dfs_context)); + + if (NULL == dfs) { + syslog(LOG_ERR, "FATAL: could not malloc fuse dfs context struct - out of memory %s:%d", __FILE__, __LINE__); + exit(1); + } + + // initialize the context + dfs->debug = options.debug; + dfs->nn_hostname = options.server; + dfs->nn_port = options.port; + dfs->fs = NULL; + dfs->nowrites = options.nowrites; + dfs->no_trash = options.no_trash; + + bzero(dfs->dfs_uri,0); + sprintf(dfs->dfs_uri,"dfs://%s:%d/",dfs->nn_hostname,dfs->nn_port); + dfs->dfs_uri_len = strlen(dfs->dfs_uri); + + // use ERR level to ensure it makes it into the log. + syslog(LOG_ERR, "mounting %s", dfs->dfs_uri); + + init_protectedpaths(); + + return (void*)dfs; +} + + +static struct fuse_operations dfs_oper = { + .getattr = dfs_getattr, + .access = dfs_access, + .readdir = dfs_readdir, + .destroy = dfs_destroy, + .init = dfs_init, + .open = dfs_open, + .read = dfs_read, + .statfs = dfs_statfs, + .mkdir = dfs_mkdir, + .rmdir = dfs_rmdir, + .rename = dfs_rename, + .unlink = dfs_unlink, + .release = dfs_release, + // .create = dfs_create, + // .write = dfs_write, + // .flush = dfs_flush, + //.xsetattr = dfs_setattr, + // .mknod = dfs_mknod, + .chmod = dfs_chmod, + .chown = dfs_chown, + // .truncate = dfs_truncate, +}; + + +int main(int argc, char *argv[]) +{ + umask(0); + + program = argv[0]; + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); + + /* clear structure that holds our options */ + memset(&options, 0, sizeof(struct options)); + + if (fuse_opt_parse(&args, &options, dfs_opts, dfs_options) == -1) + /** error parsing options */ + return -1; + + if (options.server == NULL || options.port == 0) { + print_usage(argv[0]); + exit(0); + } + int ret = fuse_main(args.argc, args.argv, &dfs_oper, NULL); + + if (ret) printf("\n"); + + /** free arguments */ + fuse_opt_free_args(&args); + + return ret; +} Index: src/contrib/fuse-dfs/src/fuse_dfs_wrapper.sh =================================================================== --- src/contrib/fuse-dfs/src/fuse_dfs_wrapper.sh (revision 0) +++ src/contrib/fuse-dfs/src/fuse_dfs_wrapper.sh (revision 0) @@ -0,0 +1,21 @@ +# +# Copyright 2005 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +export HADOOP_HOME=/mnt/vol/hive/stable/cluster +export LD_LIBRARY_PATH=/usr/local/java/jre/lib/amd64/server:/usr/local/share/hdfs/libhdfs/:/usr/local/lib +export CLASSPATH=/usr/java/jre1.6.0_01/lib/ext/:/usr/java/jre1.6.0_01/lib/:$HADOOP_HOME/lib/commons-logging-1.0.4.jar:$HADOOP_HOME/lib/commons-logging-api-1.0.4.jar:$HADOOP_HOME/lib:.:$HADOOP_HOME/lib/log4j-1.2.13.jar:$HADOOP_HOME/hadoop-core.jar:$HADOOP_HOME/lib/commons-logging-1.0.4.jar:$HADOOP_HOME/lib/log4j-1.2.13.jar + +./fuse_dfs $1 $2 -o-o allow_other Property changes on: src/contrib/fuse-dfs/src/fuse_dfs_wrapper.sh ___________________________________________________________________ Name: svn:executable + * Index: src/contrib/fuse-dfs/src/Makefile.am =================================================================== --- src/contrib/fuse-dfs/src/Makefile.am (revision 0) +++ src/contrib/fuse-dfs/src/Makefile.am (revision 0) @@ -0,0 +1,19 @@ +# +# Copyright 2005 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +bin_PROGRAMS = fuse_dfs +fuse_dfs_SOURCES = fuse_dfs.c +AM_CPPFLAGS= -D_FILE_OFFSET_BITS=64 -I/usr/local -I/home/pwyckoff/projects/hadoop/VENDOR/hadoop-0.15.3/src/c++/libhdfs/ -I/usr/local/java/include/linux/ -I$(dfs_home)/include -D_FUSE_DFS_VERSION=\"$(PACKAGE_VERSION)\" -DPROTECTED_PATHS=\"$(protected_paths)\" +AM_LDFLAGS= -L/home/pwyckoff/projects/hadoop/VENDOR/hadoop-0.15.3/src/c++/libhdfs -lhdfs -L$(fuse_home)/lib -lfuse -L$(jdk_home)/jre/lib/$(JARCH)/server -ljvm Index: src/contrib/fuse-dfs/plain_bootstrap.sh =================================================================== --- src/contrib/fuse-dfs/plain_bootstrap.sh (revision 0) +++ src/contrib/fuse-dfs/plain_bootstrap.sh (revision 0) @@ -0,0 +1,22 @@ +# +# Copyright 2005 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#!/bin/sh + +aclocal +automake -a +autoconf +./configure --with-dfspath=/usr/local/hdfs --with-jdkhome=/usr/local/jdk1.5.0_07 --with-protectedpaths=/,/user +make clean Property changes on: src/contrib/fuse-dfs/plain_bootstrap.sh ___________________________________________________________________ Name: svn:executable + * Index: src/contrib/fuse-dfs/Makefile.am =================================================================== --- src/contrib/fuse-dfs/Makefile.am (revision 0) +++ src/contrib/fuse-dfs/Makefile.am (revision 0) @@ -0,0 +1,23 @@ +# +# Copyright 2005 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +@GLOBAL_HEADER_MK@ + +@PRODUCT_MK@ + +SUBDIRS = . src + +@GLOBAL_FOOTER_MK@ Index: src/contrib/fuse-dfs/README.BUILD =================================================================== --- src/contrib/fuse-dfs/README.BUILD (revision 0) +++ src/contrib/fuse-dfs/README.BUILD (revision 0) @@ -0,0 +1,19 @@ +# +# Copyright 2005 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +run ./bootstrap.sh and then make + +NOTE: you will need to edit bootstrap.sh to reflect the correct hdfs/fuse/jdk paths. + Index: src/contrib/fuse-dfs/README =================================================================== --- src/contrib/fuse-dfs/README (revision 0) +++ src/contrib/fuse-dfs/README (revision 0) @@ -0,0 +1,83 @@ +# +# Copyright 2005 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +This is a FUSE module for Hadoop's HDFS. + +It allows one to mount HDFS as a Unix filesystem and optionally export that mount point to other machines. + +For now, writes are disabled as this requires Hadoop-1700 - file appends which I guess won't be ready till 0.18 ish ??. + +rmdir, mv, mkdir, rm are all supported. just not cp, touch, ... + +BUILDING: + + +What you need to build it: + +1. a Linux kernel > 2.6.9 or a kernel module from FUSE - i.e., you compile it yourself and then modprobe it. Better off with the former option if possible. + +Note for now if you use the kernel with fuse included, it doesn't allow you to export this through NFS so be warned. See the FUSE email list for more about this. + +2. Hadoop hdfs c API header files and the library. This is in hadoop_root/src/c++/libhdfs along with a Makefile that is invoked from Hadoop's top level build.xml, so everything should be built already. + +3. The jdk installed with the jre installed. This is needed for #2. + +These things can be passed to configure via the following options: + + --with-dfspath=DIR User specified path. + --with-fusehome=DIR User specified path. + --with-jdkhome=DIR User specified path. + +first run ./plain_bootsrap.sh and then ./configure --with-dfspath= --with-fusehome= --with-jdkhome= + or edit plain_bootstrap.sh to specify your paths in its call to configure. + +-------------------------------------------------------------------------------------------------------------------------------- + +INSTALLING: + +1. mkdir /mnt/dfs (or wherever you want to mount it) +2. ./fuse_dfs dfs://hadoop_server1.foo.com:9000 /mnt/dfs -o allow_other -d + +You may want to use the wrapper so you can set the right LD_LIBRARY_PATH and CLASSPATH - esp if you will be invoking this from fstab or as root. The wrapper is fuse_dfs_wrapper.sh - edit HADOOP_HOME in the wrapper before using it !!!! + +The -d means debug - i.e., don't daemonize and print debugging stmts + +(note - common problems are that you don't have libhdfs.so or libjvm.so or libfuse.so on your LD_LIBRARY_PATH. Add them or use ldconfig to add them to the global path. + +Try ls /mnt/dfs - if this works, you're in pretty good condition. + +--------------------------------------------------------------------------------------------------------------------------------- + +DEPLOYING: + +in a root shell do the following: + +1. add the following to /etc/fstab - + fuse_dfs#dfs://hadoop_server.foo.com:9000 /mnt/dfs fuse allow_other,rw 0 0 + +2. mount /mnt/dfs + Expect problems with not finding fuse_dfs. You will need to probably add this to /sbin and then problems finding the above 3 libraries. Add these using ldconfig. + +--------------------------------------------------------------------------------------------------------------------------------- + +EXPORTING: + +Add the following to /etc/exports: + + /mnt/hdfs *.foo.com(no_root_squash,rw,fsid=1,sync) + +To be honest, of late I've had some permission problems with my export and am still debugging it, so you may have problems here! + Index: src/contrib/fuse-dfs/global_header.mk =================================================================== --- src/contrib/fuse-dfs/global_header.mk (revision 0) +++ src/contrib/fuse-dfs/global_header.mk (revision 0) @@ -0,0 +1,50 @@ +# +# Copyright 2005 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +ifneq ($$(XBUILT_SOURCES),) + XBUILT_SOURCES := $$(XBUILT_SOURCES) $$(XTARGET) +else + XBUILT_SOURCES := $$(XTARGET) +endif + +showvars: + @echo BUILD_SOURCES = $(BUILT_SOURCES) + @echo XBUILTSOURCES = $(XBUILT_SOURCES) + @echo DEFS = $(DEFS) + @echo CXXFLAGS = $(CXXFLAGS) + @echo AM_CXXFLAGS = $(AM_CXXFLAGS) + @echo CPPFLAGS = $(CPPFLAGS) + @echo AM_CPPFLAGS = $(AM_CPPFLAGS) + @echo LDFLAGS = $(LDFLAGS) + @echo AM_LDFLAGS = $(AM_LDFLAGS) + @echo LDADD = $(LDADD) + @echo LIBS = $(LIBS) + @echo EXTERNAL_LIBS = $(EXTERNAL_LIBS) + @echo EXTERNAL_PATH = $(EXTERNAL_PATH) + @echo MAKE = $(MAKE) + @echo MAKE_FLAGS = $(MAKE_FLAGS) + @echo AM_MAKEFLAGS = $(AM_MAKEFLAGS) + @echo top_builddir = $(top_builddir) + @echo top_srcdir = $(top_srcdir) + @echo srcdir = $(srcdir) + @echo PHPVAL = $(PHPVAL) + @echo PHPCONFIGDIR = $(PHPCONFIGDIR) + @echo PHPCONFIGINCLUDEDIR = $(PHPCONFIGINCLUDEDIR) + @echo PHPCONFIGINCLUDES = $(PHPCONFIGINCLUDES) + @echo PHPCONFIGLDFLAGS = $(PHPCONFIGLDFLAGS) + @echo PHPCONFIGLIBS = $(PHPCONFIGLIBS) + +clean-common: + rm -rf gen-*