FROM debian:stretch MAINTAINER Getty Images "https://github.com/gettyimages" RUN apt-get update \ && apt-get install -y curl unzip \ python3 python3-setuptools \ && ln -s /usr/bin/python3 /usr/bin/python \ && easy_install3 pip py4j \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* ENV PYSPARK_PYTHON python3.5 ENV PYSPARK_DRIVER_PYTHON python3.5 RUN ln -s /usr/bin/python3.5 /usr/local/bin/python # http://blog.stuart.axelbrooke.com/python-3-on-spark-return-of-the-pythonhashseed ENV PYTHONHASHSEED 0 ENV PYTHONIOENCODING UTF-8 ENV PIP_DISABLE_PIP_VERSION_CHECK 1 RUN addgroup --gid 99 nobody \ && echo "nobody:x:99:99:nobody:/nonexistent:/usr/sbin/nologin" >> /etc/passwd \ && usermod -a -G users nobody # JAVA RUN apt-get update && apt-get install -y wget \ && wget http://10.79.23.151/jdk/jdk-8u211-linux-x64.tar.gz \ && mkdir /var/local/JAVA_HOME \ && tar zxvf ./jdk-8u211-linux-x64.tar.gz -C /var/local/JAVA_HOME \ && rm -f ./jdk-8u211-linux-x64.tar.gz RUN chown -R nobody /var/local/JAVA_HOME # HADOOP ENV HADOOP_VERSION 3.0.0 ENV HADOOP_HOME /usr/hadoop-$HADOOP_VERSION ENV HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop ENV PATH $PATH:$HADOOP_HOME/bin RUN curl -sL --retry 3 \ "http://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz" \ | gunzip \ | tar -x -C /usr/ \ && rm -rf $HADOOP_HOME/share/doc \ && chown -R root:root $HADOOP_HOME # SPARK ENV SPARK_VERSION 2.3.2 ENV SPARK_PACKAGE spark-${SPARK_VERSION}-bin-without-hadoop ENV SPARK_HOME /usr/spark-${SPARK_VERSION} ENV SPARK_DIST_CLASSPATH="$HADOOP_HOME/etc/hadoop/*:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/tools/lib/*" ENV PATH $PATH:${SPARK_HOME}/bin RUN curl -sL --retry 3 \ "https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_PACKAGE}.tgz" \ | gunzip \ | tar x -C /usr/ \ && mv /usr/$SPARK_PACKAGE $SPARK_HOME \ && chown -R root:root $SPARK_HOME COPY test.py /tmp/test.py WORKDIR $SPARK_HOME CMD ["bin/spark-class", "org.apache.spark.deploy.master.Master"]