当前位置:   article > 正文

使用ansible脚本搭建zookeeper集群和hadoop高可用集群_yrczxo

yrczxo

1、配置zookeeper
# vi /etc/ansible/hosts
[zk_servers]
172.16.206.27 myid=1
172.16.206.28 myid=2
172.16.206.29 myid=3

# vi vars/main.yml
server1_hostname: hadoop27
server2_hostname: hadoop28
server3_hostname: hadoop29

# vi templates/zoo.cfg.j2
tickTime=2000
initLimit=10
syncLimit=5
# 存放数据文件
dataDir=/usr/local/zookeeper-3.4.6/dataDir
dataLogDir=/usr/local/zookeeper-3.4.6/dataLogDir
clientPort=2181
# zookeeper cluster,2888为选举端口,3888为心跳端口
server.1={{ server1_hostname }}:2888:3888
server.2={{ server2_hostname }}:2888:3888
server.3={{ server3_hostname }}:2888:3888


# vi tasks/main.yml
- name: install zookeeper   #解压
  unarchive: src=zookeeper-3.4.6.tar.gz dest=/usr/local/
  
- name: install configuration file for zookeeper  #使用templates模块将zoo.conf.j2复制到每一台主机上
  template: src=zoo.cfg.j2 dest=/usr/local/zookeeper-3.4.6/conf/zoo.cfg
  
- name: add myid file
  shell: echo {{ myid }} > /usr/local/zookeeper-3.4.6/dataDir/myid
  
- name: copy script to clear zookeeper logs.
  copy: src=clean_zklog.sh dest=/usr/local/zookeeper-3.4.6/clean_zklog.sh mode=755
  
- name: crontab task
  cron: name="clear zk logs" weekday="0" hour="0" minute="0" job="/usr/local/zookeeper-3.4.6/clean_zklog.sh"

- name: start zookeeper
  shell: /usr/local/zookeeper-3.4.6/bin/zkServer.sh start
  tags:start
  

2、配置hadoop
修改 /etc/ansible/hosts
[hadoop]
172.16.7.151 namenode_active=true  namenode_standby=false datanode=false
172.16.7.152 namenode_active=false namenode_standby=true  datanode=false
172.16.7.153 namenode_active=false namenode_standby=false datanode=true
172.16.7.154 namenode_active=false namenode_standby=false datanode=true
172.16.7.155 namenode_active=false namenode_standby=false datanode=true

修改 vars/main.yml
env_file: /etc/profile
# hadoop-env.sh.j2 file variables.
JAVA_HOME: /usr/java/jdk1.8.0_73
# core-site.xml.j2 file variables.
ZK_NODE1: node1:2181
ZK_NODE2: node2:2181
ZK_NODE3: node3:2181
# hdfs-site.xml.j2 file variables.
NAMENODE1_HOSTNAME: node1
NAMENODE2_HOSTNAME: node2
DATANODE1_HOSTNAME: node3
DATANODE2_HOSTNAME: node4
DATANODE3_HOSTNAME: node5
# mapred-site.xml.j2 file variables.
MR_MODE: yarn
# yarn-site.xml.j2 file variables.
RM1_HOSTNAME: node1
RM2_HOSTNAME: node2 

1、修改 templates/core-site.xml.j2
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
    <!-- 指定hdfs的nameservice为ns1 -->
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://ns1</value>
    </property>
    <!-- 指定HDFS存放数据的地方 -->
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/usr/local/hadoop/tmp</value>
    </property>
    <!-- 指定zookeeper地址 -->
    <property>
        <name>ha.zookeeper.quorum</name>
        <value>{{ZK_NODE1}},{{ZK_NODE2}},{{ZK_NODE3}}</value>
    </property>
    <property>
        <name>hadoop.proxyuser.root.groups</name>
        <value>*</value>
    </property>
     <property>
        <name>hadoop.proxyuser.root.hosts</name>
        <value>*</value>
    </property>
</configuration>

2、配置hdfs-site.xml.j2
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->

<configuration>
    <!--指定hdfs的nameservice为ns1,需要和core-site.xml中的保持一致 -->
    <property>
        <name>dfs.nameservices</name>
        <value>ns1</value>
    </property>
    <!-- ns1下面有两个NameNode,分别是nn1,nn2 -->
    <property>
        <name>dfs.ha.namenodes.ns1</name>
        <value>nn1,nn2</value>
    </property>
    <!-- nn1的RPC通信地址 -->
    <property>
        <name>dfs.namenode.rpc-address.ns1.nn1</name>
        <value>{{NAMENODE1_HOSTNAME}}:9000</value>
    </property>
    <!-- nn1的http通信地址 -->
    <property>
        <name>dfs.namenode.http-address.ns1.nn1</name>
        <value>{{NAMENODE1_HOSTNAME}}:50070</value>
    </property>
    <!-- nn2的RPC通信地址 -->
    <property>
        <name>dfs.namenode.rpc-address.ns1.nn2</name>
        <value>{{NAMENODE2_HOSTNAME}}:9000</value>
    </property>
    <!-- nn2的http通信地址 -->
    <property>
        <name>dfs.namenode.http-address.ns1.nn2</name>
        <value>{{NAMENODE2_HOSTNAME}}:50070</value>
    </property>
    <!-- 指定NameNode的元数据在JournalNode上的存放位置 -->  #用JournalNode来存放NameNode的元数据??
    <property>
        <name>dfs.namenode.shared.edits.dir</name>
        <value>qjournal://{{DATANODE1_HOSTNAME}}:8485;{{DATANODE2_HOSTNAME}}:8485;{{DATANODE3_HOSTNAME}}:8485/ns1</value>
    </property>
    <!-- 指定JournalNode在本地磁盘存放数据的位置 -->         #指定JournalNode 存放元数据的地方 ??
    <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/usr/local/hadoop/journaldata</value>
    </property>
    <!-- 开启NameNode失败自动切换 -->
    <property>
        <name>dfs.ha.automatic-failover.enabled</name>
        <value>true</value>
    </property>
    <!-- 配置失败自动切换实现方式 -->
    <property>
        <name>dfs.client.failover.proxy.provider.ns1</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
    </property>
    <!-- 配置隔离机制方法,多个机制用换行分割,即每个机制暂用一行-->
    <property>
        <name>dfs.ha.fencing.methods</name>
        <value>
            sshfence
            shell(/bin/true)
        </value>
    </property>
    <!-- 使用sshfence隔离机制时需要ssh免登陆 -->
    <property>
        <name>dfs.ha.fencing.ssh.private-key-files</name>
        <value>/home/hadoop/.ssh/id_rsa</value>
    </property>
    <!-- 配置sshfence隔离机制超时时间 -->
    <property>
        <name>dfs.ha.fencing.ssh.connect-timeout</name>
        <value>30000</value>
    </property>
    <property>
        <name>dfs.permissions</name>
        <value>false</value>
    </property>  
</configuration>

3、配置 mapred-site.xml.j2
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <!-- 指定mr框架为yarn方式 -->
    <property>
        <name>mapreduce.framework.name</name>
        <value>{{MR_MODE}}</value>
    </property>
</configuration>

4、配置 yarn-site.xml
<?xml version="1.0"?>
<configuration>
    <!-- 开启RM高可用 -->  # 配置ResourceManager的高可用
    <property>
       <name>yarn.resourcemanager.ha.enabled</name>
       <value>true</value>
    </property>
    <!-- 指定RM的cluster id -->
    <property>
       <name>yarn.resourcemanager.cluster-id</name>
       <value>yrc</value>
    </property>
    <!-- 指定RM的名字 -->
    <property>
       <name>yarn.resourcemanager.ha.rm-ids</name>
       <value>rm1,rm2</value>
    </property>
    <!-- 分别指定RM的地址 -->
    <property>
       <name>yarn.resourcemanager.hostname.rm1</name>
       <value>{{RM1_HOSTNAME}}</value>
    </property>
    <property>
       <name>yarn.resourcemanager.hostname.rm2</name>
       <value>{{RM2_HOSTNAME}}</value>
    </property>
    <!-- 指定zk集群地址 -->
    <property>
       <name>yarn.resourcemanager.zk-address</name>
       <value>{{ZK_NODE1}},{{ZK_NODE2}},{{ZK_NODE3}}</value>
    </property>
    <!-- #NodeManager上运行的附属服务。需配置成mapreduce_shuffle,才可运行MapReduce程序-->
    <property>
       <name>yarn.nodemanager.aux-services</name>   
       <value>mapreduce_shuffle</value> 
    </property>
</configuration>

5、配置hadoop-env.sh.j2
# The java implementation to use.
export JAVA_HOME={{JAVA_HOME}}   #加入JAVA_HOME的环境变量

# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol.  Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}

export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}

# Extra Java CLASSPATH elements.  Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
  if [ "$HADOOP_CLASSPATH" ]; then
    export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
  else
    export HADOOP_CLASSPATH=$f
  fi
done

# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""

# Extra Java runtime options.  Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"

# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"

export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"

export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"

# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"

# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol.  This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}

# Where log files are stored.  $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER

# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}

###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_MOVER_OPTS=""

###
# Advanced Users Only!
###

# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by 
#       the user that will run the hadoop daemons.  Otherwise there is the
#       potential for a symlink attack.
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}

# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER

6、配置slaves.j2文件
{{DATANODE1_HOSTNAME}}
{{DATANODE2_HOSTNAME}}
{{DATANODE3_HOSTNAME}}

 

vi tasks/main.yml
- name: install dependency package
  yum: name={{ item }} state=present  #安装openssh服务和rsync同步中
  with_items:
    - openssh
    - rsync
    
- name: create hadoop user
  user: name=hadoop password={{password}}
  vars:
    # created with:
    # python -c 'import crypt; print crypt.crypt("This is my Password", "$1$SomeSalt$")'
    # >>> import crypt
    # >>> crypt.crypt('wisedu123', '$1$bigrandomsalt$') #使用crypt生成密文密码
    # '$1$bigrando$wzfZ2ifoHJPvaMuAelsBq0'
    password: $1$bigrando$wzfZ2ifoHJPvaMuAelsBq0
    
- name: copy and unzip hadoop
  #unarchive module owner and group only effect on directory.
  unarchive: src=hadoop-2.7.2.tar.gz dest=/usr/local/
  
- name: create hadoop soft link
  file: src=/usr/local/hadoop-2.7.2 dest=/usr/local/hadoop state=link
  
- name: create hadoop logs directory
  file: dest=/usr/local/hadoop/logs mode=0775 state=directory
  
- name: change hadoop soft link owner and group
  #recurse=yes make all files in a directory changed.
  file: path=/usr/local/hadoop owner=hadoop group=hadoop recurse=yes
  
- name: change hadoop-2.7.2 directory owner and group
  #recurse=yes make all files in a directory changed.
  file: path=/usr/local/hadoop-2.7.2 owner=hadoop group=hadoop recurse=yes
  
- name: set hadoop env
  lineinfile: dest={{env_file}} insertafter="{{item.position}}" line="{{item.value}}" state=present
  with_items:
  - {position: EOF, value: "\n"}
  - {position: EOF, value: "# Hadoop environment"}
  - {position: EOF, value: "export HADOOP_HOME=/usr/local/hadoop"}
  - {position: EOF, value: "export PATH=$PATH:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin"}
  
- name: enforce env
  shell: source {{env_file}}
  
- name: install configuration file hadoop-env.sh.j2 for hadoop
  template: src=hadoop-env.sh.j2 dest=/usr/local/hadoop/etc/hadoop/hadoop-env.sh owner=hadoop group=hadoop
  
- name: install configuration file core-site.xml.j2 for hadoop
  template: src=core-site.xml.j2 dest=/usr/local/hadoop/etc/hadoop/core-site.xml owner=hadoop group=hadoop
  
- name: install configuration file hdfs-site.xml.j2 for hadoop
  template: src=hdfs-site.xml.j2 dest=/usr/local/hadoop/etc/hadoop/hdfs-site.xml owner=hadoop group=hadoop
  
- name: install configuration file mapred-site.xml.j2 for hadoop
  template: src=mapred-site.xml.j2 dest=/usr/local/hadoop/etc/hadoop/mapred-site.xml owner=hadoop group=hadoop
  
- name: install configuration file yarn-site.xml.j2 for hadoop
  template: src=yarn-site.xml.j2 dest=/usr/local/hadoop/etc/hadoop/yarn-site.xml owner=hadoop group=hadoop
  
- name: install configuration file slaves.j2 for hadoop
  template: src=slaves.j2 dest=/usr/local/hadoop/etc/hadoop/slaves owner=hadoop group=hadoop
  
- name: install configuration file hadoop-daemon.sh.j2 for hadoop
  template: src=hadoop-daemon.sh.j2 dest=/usr/local/hadoop/sbin/hadoop-daemon.sh owner=hadoop group=hadoop
- name: install configuration file yarn-daemon.sh.j2 for hadoop
  template: src=yarn-daemon.sh.j2 dest=/usr/local/hadoop/sbin/yarn-daemon.sh owner=hadoop group=hadoop
# make sure zookeeper started, and then start hadoop.
# 一定确保 zookeeper集群启动后,才能启动hadoop集群
# start journalnode
- name: start journalnode
  shell: /usr/local/hadoop/sbin/hadoop-daemon.sh start journalnode #启动journalnode进程
  become: true
  become_method: su
  become_user: hadoop
  when: {{ datanode }} == "true"   #调用主机变量,只在datanode节点上启动journalnode进程
  
# format namenode
- name: format active namenode hdfs
  shell: /usr/local/hadoop/bin/hdfs namenode -format  #格式化namenode
  become: true
  become_method: su
  become_user: hadoop
  when: {{ namenode_active }} == "true"
  
- name: start active namenode hdfs
  shell: /usr/local/hadoop/sbin/hadoop-daemon.sh start namenode  # 启动 active namenode进程
  become: true
  become_method: su
  become_user: hadoop
  when: {{ namenode_active }} == "true"
  
- name: format standby namenode hdfs
  shell: /usr/local/hadoop/bin/hdfs namenode -bootstrapStandby #同步active namenode的信息到standy namenode上
  become: true
  become_method: su
  become_user: hadoop
  when: {{ namenode_standby }} == "true"
  
- name: stop active namenode hdfs
  shell: /usr/local/hadoop/sbin/hadoop-daemon.sh stop namenode # 停掉了active namenode ,确保zkfc先启动
  become: true
  become_method: su
  become_user: hadoop
  when: namenode_active == "true"
  
# format ZKFC
- name: format ZKFC
  shell: /usr/local/hadoop/bin/hdfs zkfc -formatZK    #只格式化主namenode上的zkfc
  become: true
  become_method: su
  become_user: hadoop
  when:{{ namenode_active }} == "true"   
  
# start hadoop cluster
- name: start namenode
  shell: /usr/local/hadoop/sbin/start-dfs.sh    #开启hdfs进程  ,这个命令会启动 active 和standy namenode 以及所有datanode进程 ,还有所有的journalnode进程,还有两个namenode上的zkfc  
  become: true
  become_method: su
  become_user: hadoop
  when: {{ namenode_active }} == "true"
  
- name: start yarn
  shell: /usr/local/hadoop/sbin/start-yarn.sh   #开启yarn集群
  become: true
  become_method: su
  become_user: hadoop
  when: {{ namenode_active }} == "true"
  
- name: start standby rm
  shell: /usr/local/hadoop/sbin/yarn-daemon.sh start resourcemanager  #然后再单独开启 standby resourcemanager
  become: true
  become_method: su
  become_user: hadoop
  when: {{ namenode_standby }} == "true" 

  
   
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/一键难忘520/article/detail/791000
推荐阅读
相关标签
  

闽ICP备14008679号