XMLWordPrintableJSON

    Details

    • Type: Sub-task
    • Status: Open
    • Priority: Minor
    • Resolution: Unresolved
    • Affects Version/s: 3.2.0
    • Fix Version/s: None
    • Component/s: fs/azure
    • Labels:
      None

      Description

          • NOTE: apparently a hack around is use any string as password. Azure will allow access with wrong password to open SA.

      It does not seem possible to access storage accounts without passwords using abfs, but it is possible using wasb.

       

      This sample code (Spark based) to illustrate, the following code using abfs_path with throw an exception

      Exception in thread "main" java.lang.IllegalArgumentException: Invalid account key.
              at org.apache.hadoop.fs.azurebfs.services.SharedKeyCredentials.<init>(SharedKeyCredentials.java:70)
              at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.initializeClient(AzureBlobFileSystemStore.java:812)
              at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.<init>(AzureBlobFileSystemStore.java:149)
              at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.initialize(AzureBlobFileSystem.java:108)
              at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3303)
              at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:124)
              at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3352)
              at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3320)
              at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:479)
              at org.apache.hadoop.fs.Path.getFileSystem(Path.java:361)
      

        While using the wasb_path will work normally,

      import org.apache.spark.api.java.function.FilterFunction;
      import org.apache.spark.sql.RuntimeConfig;
      import org.apache.spark.sql.SparkSession;
      import org.apache.spark.sql.Dataset;
      import org.apache.spark.sql.Row;
      
      public class SimpleApp {
      
          static String blob_account_name = "azureopendatastorage";
          static String blob_container_name = "gfsweatherdatacontainer";
          static String blob_relative_path = "GFSWeather/GFSProcessed";
          static String blob_sas_token = "";
          static String abfs_path = "abfs://"+blob_container_name+"@"+blob_account_name+".dfs.core.windows.net/"+blob_relative_path;
          static String wasbs_path = "wasbs://"+blob_container_name + "@"+blob_account_name+".blob.core.windows.net/" + blob_relative_path;
      
      
          public static void main(String[] args) {
             
              SparkSession spark = SparkSession.builder().appName("NOAAGFS Run").getOrCreate();
              configureAzureHadoopConnetor(spark);
              RuntimeConfig conf = spark.conf();
      
              conf.set("fs.azure.account.key."+blob_account_name+".dfs.core.windows.net", blob_sas_token);
              conf.set("fs.azure.account.key."+blob_account_name+".blob.core.windows.net", blob_sas_token);
      
              System.out.println("Creating parquet dataset");
              Dataset<Row> logData = spark.read().parquet(abfs_path);
      
              System.out.println("Creating temp view");
              logData.createOrReplaceTempView("source");
      
              System.out.println("SQL");
              spark.sql("SELECT * FROM source LIMIT 10").show();
              spark.stop();
          }
      
          public static void configureAzureHadoopConnetor(SparkSession session) {
              RuntimeConfig conf = session.conf();
      
              conf.set("fs.AbstractFileSystem.wasb.impl","org.apache.hadoop.fs.azure.Wasb");
              conf.set("fs.AbstractFileSystem.wasbs.impl","org.apache.hadoop.fs.azure.Wasbs");
              conf.set("fs.wasb.impl","org.apache.hadoop.fs.azure.NativeAzureFileSystem");
              conf.set("fs.wasbs.impl","org.apache.hadoop.fs.azure.NativeAzureFileSystem$Secure");
      
              conf.set("fs.azure.secure.mode", false);
      
              conf.set("fs.abfs.impl",  "org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem");
              conf.set("fs.abfss.impl", "org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem");
      
              conf.set("fs.AbstractFileSystem.abfs.impl","org.apache.hadoop.fs.azurebfs.Abfs");
              conf.set("fs.AbstractFileSystem.abfss.impl","org.apache.hadoop.fs.azurebfs.Abfss");
      
              // Works in conjuction with fs.azure.secure.mode. Setting this config to true
              //    results in fs.azure.NativeAzureFileSystem using the local SAS key generation
              //    where the SAS keys are generating in the same process as fs.azure.NativeAzureFileSystem.
              //    If fs.azure.secure.mode flag is set to false, this flag has no effect.
              conf.set("fs.azure.local.sas.key.mode", false);
          }
      }
      

      Sample build.gradle

      plugins {
          id 'java'
      }
      
      group 'org.samples'
      version '1.0-SNAPSHOT'
      
      sourceCompatibility = 1.8
      
      repositories {
          mavenCentral()
      }
      
      dependencies {
          compile  'org.apache.spark:spark-sql_2.12:2.4.3'
      }
      

        Attachments

        1. HADOOP-16417.000.patch
          5 kB
          Masatake Iwasaki

          Activity

            People

            • Assignee:
              iwasakims Masatake Iwasaki
              Reporter:
              jlpedrosa Jose Luis Pedrosa
            • Votes:
              1 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

              • Created:
                Updated: