执行任务
./spark-submit \ --class cn.com.dtmobile.spark.DebugTest \ --master yarn \ --deploy-mode client \ --num-executors 3 \ --executor-cores 2 \ --executor-memory 1G \ /home/etluser/kong/debugTest/pucchSinr.jar
${SPARK_HOME}/bin/spark-submit脚本
if [ -z "${SPARK_HOME}" ]; then source "$(dirname "$0")"/find-spark-home fi # disable randomized hash for string in Python 3.3+ export PYTHONHASHSEED=0 exec "${SPARK_HOME}"/bin/spark-class org.apache.spark.deploy.SparkSubmit "$@"
$@表示所有接收的参数:
$@=
--class cn.com.dtmobile.spark.DebugTest --master yarn --deploy-mode client --num-executors 3 --executor-cores 2 --executor-memory 1G /home/etluser/kong/debugTest/pucchSinr.jar
最后exec执行的内容:
exec=
/home/etluser/kong/spark/spark-2.3.4-bin/spark-2.3.4-bin-hadoop2.6/bin/spark-class org.apache.spark.deploy.SparkSubmit --class cn.com.dtmobile.spark.DebugTest --master yarn --deploy-mode client --num-executors 3 --executor-cores 2 --executor-memory 1G /home/etluser/kong/debugTest/pucchSinr.jar
所以传给spark-class的参数为:
org.apache.spark.deploy.SparkSubmit --class cn.com.dtmobile.spark.DebugTest --master yarn --deploy-mode client --num-executors 3 --executor-cores 2 --executor-memory 1G /home/etluser/kong/debugTest/pucchSinr.jar
${SPARK_HOME}/bin/spark-class脚本
if [ -z "${SPARK_HOME}" ]; then #如果${SPARK_HOME}长度为0 source "$(dirname "$0")"/find-spark-home fi . "${SPARK_HOME}"/bin/load-spark-env.sh # Find the java binary if [ -n "${JAVA_HOME}" ]; then #如果${JAVA_HOME}长度不为0 RUNNER="${JAVA_HOME}/bin/java" else if [ "$(command -v java)" ]; then RUNNER="java" else echo "JAVA_HOME is not set" >&2 exit 1 fi fi # Find Spark jars. if [ -d "${SPARK_HOME}/jars" ]; then #如果${SPARK_HOME}/jars文件夹存在的话 SPARK_JARS_DIR="${SPARK_HOME}/jars" else SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars" #不存在的话就去assembly目录下找 fi if [ ! -d "$SPARK_JARS_DIR" ] && [ -z "$SPARK_TESTING$SPARK_SQL_TESTING" ]; then #如果SPARK_JARS_DIR目录不存在,并且相关测试目录也为空 echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2 echo "You need to build Spark with the target \"package\" before running this program." 1>&2 exit 1 else LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*" #指定加载时候的类路径为SPARK_JARS_DIR/所有jar包 fi # Add the launcher build dir to the classpath if requested. if [ -n "$SPARK_PREPEND_CLASSES" ]; then LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH" fi # For tests if [[ -n "$SPARK_TESTING" ]]; then unset YARN_CONF_DIR unset HADOOP_CONF_DIR fi # The launcher library will print arguments separated by a NULL character, to allow arguments with # characters that would be otherwise interpreted by the shell. Read that in a while loop, populating # an array that will be used to exec the final command. # # The exit code of the launcher is appended to the output, so the parent shell removes it from the # command array and checks the value to see if the launcher succeeded. build_command() { #$RUNNER为java,调用类路径中的org.apache.spark.launcher.Main类 参数为spark-submit指定的所有参数,在这里调用launcher生成下面jvm command "$RUNNER" -Xmx128m -cp "$LAUNCH_CLASSPATH" org.apache.spark.launcher.Main "$@" printf "%d\0" $? } # Turn off posix mode since it does not allow process substitution set +o posix CMD=() while IFS= read -d '' -r ARG; do CMD+=("$ARG") done < <(build_command "$@") COUNT=${#CMD[@]} LAST=$((COUNT - 1)) LAUNCHER_EXIT_CODE=${CMD[$LAST]} # Certain JVM failures result in errors being printed to stdout (instead of stderr), which causes # the code that parses the output of the launcher to get confused. In those cases, check if the # exit code is an integer, and if it's not, handle it as a special error case. if ! [[ $LAUNCHER_EXIT_CODE =~ ^[0-9]+$ ]]; then echo "${CMD[@]}" | head -n-1 1>&2 exit 1 fi if [ $LAUNCHER_EXIT_CODE != 0 ]; then exit $LAUNCHER_EXIT_CODE fi CMD=("${CMD[@]:0:$LAST}") exec "${CMD[@]}"
最后exec执行的CMD为:
${CMD[@]}=
/usr/lib/java/jdk1.8.0_144/bin/java -cp \
/home/etluser/kong/spark/spark-2.3.4-bin/spark-2.3.4-bin-hadoop2.6/conf/:/home/etluser/kong/spark/spark-2.3.4-bin/spark-2.3.4-bin-hadoop2.6/jars/* \
-Xmx1g \
org.apache.spark.deploy.SparkSubmit \
--master yarn \
--deploy-mode client \
--class cn.com.dtmobile.spark.DebugTest \
--num-executors 3 \
--executor-cores 2 \
--executor-memory 1G \
/home/etluser/kong/debugTest/pucchSinr.jar
也就是通过上面launcher生成的cmd
org.apache.spark.launcher.Main类
1.set -o posix
set命令是shell解释器的一个内置命令,用来设置shell解释器的属性,从而能够控制shell解释器的一些行为。
在set命令中,选项前面跟着 - 号表示开启这个选项, + 表示关闭这个选项。
POSIX,Portable Operating System Interface。
是UNIX系统的一个设计标准,很多类UNIX系统也在支持兼容这个标准,如Linux。
遵循这个标准的好处是软件可以跨平台。
所以windows也支持就很容易理解了,那么多优秀的开源软件,支持了这个这些软件就可能有windows版本,就可以完善丰富windows下的软件。
set -o posix:开启bash的posix模式。
2.command -v java
command [-pVv] command [arg ...]
用command指定可取消正常的shell function寻找。只有内建命令及在PATH中找得到的才会被执行。
"-p"选项,搜寻命令的方式是用PATH来找。"-V"或"-v"选项,会显示出该命令的一些简约描述。
4.read -d
-d :表示delimiter,即定界符,一般情况下是以IFS为参数的间隔,但是通过-d,我们可以定义一直读到出现执行的字符位置。例如read –d madfds value,读到有m的字符的时候就不在继续向后读,例如输入为 hello m,有效值为“hello”,请注意m前面的空格等会被删除。这种方式可以输入多个字符串,例如定义“.”作为结符号等等
read命令 -n(不换行) -p(提示语句) -n(字符个数) -t(等待时间) -s(不回显)