赞
踩
目录
基于前面Spark Local环境部署Spark StandAlone环境部署Spark StandAlone HA 环境搭建
一般我们都是用Spark On YARN跑代码,所有说王炸都在后面
- cd /export/spark/conf
- vim spark-env.sh
- #设置JAVA安装目录
- JAVA_HOME=/export/jdk
-
- #HADOOP软件配置文件目录,读取HDFS上文件和运行YARN集群
- HADOOP_CONF_DIR=/export/hadoop/etc/hadoop
- YARN_CONF_DIR=/export/hadoop/etc/hadoop
- #export SPARK_MASTER_HOST=master
-
- # 告知sparkmaster的通讯端口
- export SPARK_MASTER_PORT=7077
- # 告知spark master的 webui端口
- SPARK_MASTER_WEBUI_PORT=8080
-
- # worker cpu可用核数
- SPARK_WORKER_CORES=1
- # worker可用内存
- SPARK_WORKER_MEMORY=1g
- # worker的工作通讯地址
- SPARK_WORKER_PORT=7078
- # worker的 webui地址
- SPARK_WORKER_WEBUI_PORT=8081
-
- ## 设置历史服务器
- # 配置的意思是 将spark程序运行的历史日志 存到hdfs的/sparklog文件夹中
- SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=hdfs://master:8020/sparklog/ -Dspark.history.fs.cleaner.enabled=true"
-
- SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=master:2181,slave1:2181,slave2:2181 -Dspark.deploy.zookeeper.dir=/spark-ha"

vim /export/hadoop/etc/hadoop/yarn-site.xml
- <configuration>
- <!-- 配置yarn主节点的位置 -->
- <property>
- <name>yarn.resourcemanager.hostname</name>
- <value>master</value>
- </property>
- <property>
- <name>yarn.nodemanager.aux-services</name>
- <value>mapreduce_shuffle</value>
- </property>
- <!-- 设置yarn集群的内存分配方案 -->
- <property>
- <name>yarn.nodemanager.resource.memory-mb</name>
- <value>20480</value>
- </property>
- <property>
- <name>yarn.scheduler.minimum-allocation-mb</name>
- <value>2048</value>
- </property>
- <property>
- <name>yarn.nodemanager.vmem-pmem-ratio</name>
- <value>2.1</value>
- </property>
- <!-- 开启日志聚合功能 -->
- <property>
- <name>yarn.log-aggregation-enable</name>
- <value>true</value>
- </property>
- <!-- 设置聚合日志在hdfs上的保存时间 -->
- <property>
- <name>yarn.log-aggregation.retain-seconds</name>
- <value>604800</value>
- </property>
-
- <!-- 设置yarn历史服务器地址 -->
- <property>
- <name>yarn.log.server.url</name>
- <value>master:19888/jobhistory/logs</value>
- </property>
- <!-- 关闭yarn内存检查 -->
- <property>
- <name>yarn.nodemanager.pmem-check-enabled</name>
- <value>false</value>
- </property>
- <property>
- <name>yarn.nodemanager.vmem-check-enabled</name>
- <value>false</value>
- </property>
- </configuration>

- hadoop fs -mkdir -p /spark/jars/
- hadoop fs -put /export/spark/jars/* /spark/jars/
- cd /export/spark/conf
- vim spark-defaults.conf
spark.yarn.jars hdfs://master:8020/spark/jars/*
- stop-all.sh
- /export/spark/sbin/stop-all.sh
- cd /export/spark/conf
- scp -r spark-defaults.conf slave1:/export/spark/conf
- scp -r spark-defaults.conf slave2:/export/spark/conf
-
- start-all.sh
- /export/spark/sbin/start-all.sh
- cd /export/spark
- bin/pyspark --master yarn
sc.parallelize([1,23,4,45,65,6,7]).map(lambda x: x*10).collect()
- cd /export/spark
- bin/spark-submit --master yarn /export/spark/examples/src/main/python/pi.py 100
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。