hadoop ha安装配置

余生长醉 提交于 2019-12-21 15:06:53

集群部署规划

hadoop11 hadoop12 hadoop13 hadoop14
NameNode
DataNode
ResourceManager
NodeManager
ZooKeeper
journalnode
zkfc
Hmaster
HRegionServer
scala
spark master
spark slaver
mysql master
mysql slave
hive
flume
kafka
elasticsearch
kibana
logstash
storm nimbus
storm supervisor
storm ui
flink jobmanager
flink taskmanager
mongodb PRIMARY
mongodb SECONDARY
mongodb ARBITER
hue
livy
impala master
impala slave

防火墙

免密码登录

集群时间同步

JDK部署

zookeeper集群部署

解压缩

tar zxvf apache-zookeeper-3.5.5-bin.tar.gz -C /opt/module/

hadoop-env.sh增加JAVA_HOME

cat etc/hadoop/hadoop-env.sh
......
export JAVA_HOME=/opt/module/jdk1.8.0_211

设置/etc/profile

......
## JAVA_HOME
export JAVA_HOME=/opt/module/jdk1.8.0_211
export PATH=$PATH:$JAVA_HOME/bin
## ZOOKEEPER_HOME
export ZOOKEEPER_HOME=/opt/module/apache-zookeeper-3.5.5-bin
export PATH=$PATH:$ZOOKEEPER_HOME/bin
## HADOOP_HOME
export HADOOP_HOME=/opt/module/hadoop-3.2.0
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

生效

source /etc/profile

配置core-site.xml

cat core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>


<configuration>
    <!-- 指定hdfs的nameservice为xiechuan -->
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://xiechuan/</value>
    </property>

    <!-- 指定hadoop临时目录 -->
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/opt/module/hadoop-3.2.0/data/hadoopdata</value>
    </property>

    <!-- 指定zookeeper地址 -->
    <property>
        <name>ha.zookeeper.quorum</name>
        <value>hadoop11:2181,hadoop12:2181,hadoop13:2181</value>
    </property>

    <!-- hadoop链接zookeeper的超时时长设置 -->
    <property>
        <name>ha.zookeeper.session-timeout.ms</name>
        <value>1000</value>
        <description>ms</description>
    </property>
</configuration>

配置hdfs-site.xml

cat hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>

    <!-- 指定副本数 -->
    <property>
        <name>dfs.replication</name>
        <value>2</value>
    </property>

    <!-- 配置namenode和datanode的工作目录-数据存储目录 -->
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>/opt/module/hadoop-3.2.0/data/hadoopdata/dfs/name</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>/opt/module/hadoop-3.2.0/data/hadoopdata/dfs/data</value>
    </property>

    <!-- 启用webhdfs -->
    <property>
        <name>dfs.webhdfs.enabled</name>
        <value>true</value>
    </property>

    <!--指定hdfs的nameservice为xiechuan,需要和core-site.xml中的保持一致
                          dfs.ha.namenodes.[nameservice id]为在nameservice中的每一个NameNode设置唯一标示符。
        配置一个逗号分隔的NameNode ID列表。这将是被DataNode识别为所有的NameNode。
        例如,如果使用"xiechuan"作为nameservice ID,并且使用"nn1"和"nn2"作为NameNodes标示符
    -->
    <property>
        <name>dfs.nameservices</name>
        <value>xiechuan</value>
    </property>

    <!-- xiechuan下面有两个NameNode,分别是nn1,nn2 -->
    <property>
        <name>dfs.ha.namenodes.xiechuan</name>
        <value>nn1,nn2</value>
    </property>

    <!-- nn1的RPC通信地址 -->
    <property>
        <name>dfs.namenode.rpc-address.xiechuan.nn1</name>
        <value>hadoop11:9000</value>
    </property>

    <!-- nn1的http通信地址 -->
    <property>
        <name>dfs.namenode.http-address.xiechuan.nn1</name>
        <value>hadoop11:9870</value>
    </property>

    <!-- nn2的RPC通信地址 -->
    <property>
        <name>dfs.namenode.rpc-address.xiechuan.nn2</name>
        <value>hadoop12:9000</value>
    </property>

    <!-- nn2的http通信地址 -->
    <property>
        <name>dfs.namenode.http-address.xiechuan.nn2</name>
        <value>hadoop12:9870</value>
    </property>

    <!-- 指定NameNode的edits元数据的共享存储位置。也就是JournalNode列表
                          该url的配置格式:qjournal://host1:port1;host2:port2;host3:port3/journalId
        journalId推荐使用nameservice,默认端口号是:8485 -->
    <property>
        <name>dfs.namenode.shared.edits.dir</name>
        <value>qjournal://hadoop11:8485;hadoop12:8485;hadoop13:8485/xiechuan</value>
    </property>

    <!-- 指定JournalNode在本地磁盘存放数据的位置 -->
    <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/opt/module/hadoop-3.2.0/data/journaldata</value>
    </property>

    <!-- 开启NameNode失败自动切换 -->
    <property>
        <name>dfs.ha.automatic-failover.enabled</name>
        <value>true</value>
    </property>

    <!-- 配置失败自动切换实现方式 -->
    <property>
        <name>dfs.client.failover.proxy.provider.xiechuan</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
    </property>

    <!-- 配置隔离机制方法,多个机制用换行分割,即每个机制暂用一行 -->
    <property>
        <name>dfs.ha.fencing.methods</name>
        <value>
            sshfence
            shell(/bin/true)
        </value>
    </property>

    <!-- 使用sshfence隔离机制时需要ssh免登陆 -->
    <property>
        <name>dfs.ha.fencing.ssh.private-key-files</name>
        <value>/home/hadoop/.ssh/id_rsa</value>
    </property>

    <!-- 配置sshfence隔离机制超时时间 -->
    <property>
        <name>dfs.ha.fencing.ssh.connect-timeout</name>
        <value>30000</value>
    </property>

    <property>
        <name>ha.failover-controller.cli-check.rpc-timeout.ms</name>
        <value>60000</value>
    </property>
</configuration>

配置mapred-site.xml

cat mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
    <!-- 指定mr框架为yarn方式 -->
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>

    <!-- 指定mapreduce jobhistory地址 -->
    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>hadoop11:10020</value>
    </property>

    <!-- 任务历史服务器的web地址 -->
    <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>hadoop11:19888</value>
    </property>
    <property>
       <name>yarn.app.mapreduce.am.env</name>
       <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
    </property>
    <property>
       <name>mapreduce.map.env</name>
       <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
    </property>
    <property>
      <name>mapreduce.reduce.env</name>
      <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
    </property>
</configuration>

配置yarn-site.xml

cat yarn-site.xml
<?xml version="1.0"?>

<configuration>
    <!-- 开启RM高可用 -->
    <property>
        <name>yarn.resourcemanager.ha.enabled</name>
        <value>true</value>
    </property>

    <!-- 指定RM的cluster id -->
    <property>
        <name>yarn.resourcemanager.cluster-id</name>
        <value>yrc</value>
    </property>

    <!-- 指定RM的名字 -->
    <property>
        <name>yarn.resourcemanager.ha.rm-ids</name>
        <value>rm1,rm2</value>
    </property>

    <!-- 分别指定RM的地址 -->
    <property>
        <name>yarn.resourcemanager.hostname.rm1</name>
        <value>hadoop13</value>
    </property>

    <property>
        <name>yarn.resourcemanager.hostname.rm2</name>
        <value>hadoop14</value>
    </property>

    <property>
        <name>yarn.resourcemanager.webapp.address.rm1</name>  
        <value>hadoop13:8088</value>
    </property>  

    <property>
        <name>yarn.resourcemanager.webapp.address.rm2</name>  
        <value>hadoop14:8088</value>
    </property>

    <!-- 指定zk集群地址 -->
    <property>
        <name>hadoop.zk.address</name>
        <value>hadoop11:2181,hadoop12:2181,hadoop13:2181</value>
    </property>

    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>

    <property>
        <name>yarn.log-aggregation-enable</name>
        <value>true</value>
    </property>

    <property>
        <name>yarn.log-aggregation.retain-seconds</name>
        <value>86400</value>
    </property>

    <!-- 启用自动恢复 -->
    <property>
        <name>yarn.resourcemanager.recovery.enabled</name>
        <value>true</value>
    </property>

    <!-- 制定resourcemanager的状态信息存储在zookeeper集群上 -->
    <property>
        <name>yarn.resourcemanager.store.class</name>
        <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
    </property>
</configuration>

修改workers

[hadoop@hadoop11 hadoop]$ cat workers
hadoop11
hadoop12
hadoop13
hadoop14

同步到其它机器

xsync.sh hadoop-3.2.0

验证hadoop

[hadoop@hadoop11 module]$ xcall.sh hadoop version
================== hadoop11 hadoop version==================
Hadoop 3.2.0
Source code repository https://github.com/apache/hadoop.git -r e97acb3bd8f3befd27418996fa5d4b50bf2e17bf
Compiled by sunilg on 2019-01-08T06:08Z
Compiled with protoc 2.5.0
From source with checksum d3f0795ed0d9dc378e2c785d3668f39
This command was run using /opt/module/hadoop-3.2.0/share/hadoop/common/hadoop-common-3.2.0.jar
================== hadoop12 hadoop version==================
Hadoop 3.2.0
Source code repository https://github.com/apache/hadoop.git -r e97acb3bd8f3befd27418996fa5d4b50bf2e17bf
Compiled by sunilg on 2019-01-08T06:08Z
Compiled with protoc 2.5.0
From source with checksum d3f0795ed0d9dc378e2c785d3668f39
This command was run using /opt/module/hadoop-3.2.0/share/hadoop/common/hadoop-common-3.2.0.jar
================== hadoop13 hadoop version==================
Hadoop 3.2.0
Source code repository https://github.com/apache/hadoop.git -r e97acb3bd8f3befd27418996fa5d4b50bf2e17bf
Compiled by sunilg on 2019-01-08T06:08Z
Compiled with protoc 2.5.0
From source with checksum d3f0795ed0d9dc378e2c785d3668f39
This command was run using /opt/module/hadoop-3.2.0/share/hadoop/common/hadoop-common-3.2.0.jar
================== hadoop14 hadoop version==================
Hadoop 3.2.0
Source code repository https://github.com/apache/hadoop.git -r e97acb3bd8f3befd27418996fa5d4b50bf2e17bf
Compiled by sunilg on 2019-01-08T06:08Z
Compiled with protoc 2.5.0
From source with checksum d3f0795ed0d9dc378e2c785d3668f39
This command was run using /opt/module/hadoop-3.2.0/share/hadoop/common/hadoop-common-3.2.0.jar
[hadoop@hadoop11 module]$

启动journalnode

xcall.sh hadoop-daemon.sh start journalnode
================== hadoop11 hadoop-daemon.sh start journalnode==================
WARNING: Use of this script to start HDFS daemons is deprecated.
WARNING: Attempting to execute replacement "hdfs --daemon start" instead.
================== hadoop12 hadoop-daemon.sh start journalnode==================
WARNING: Use of this script to start HDFS daemons is deprecated.
WARNING: Attempting to execute replacement "hdfs --daemon start" instead.
================== hadoop13 hadoop-daemon.sh start journalnode==================
WARNING: Use of this script to start HDFS daemons is deprecated.
WARNING: Attempting to execute replacement "hdfs --daemon start" instead.
================== hadoop14 hadoop-daemon.sh start journalnode==================
WARNING: Use of this script to start HDFS daemons is deprecated.
WARNING: Attempting to execute replacement "hdfs --daemon start" instead.

查看启动结果

xcall.sh jps
================== hadoop11 jps==================
13761 QuorumPeerMain
14742 JournalNode
14811 Jps
================== hadoop12 jps==================
13376 JournalNode
13440 Jps
12839 QuorumPeerMain
================== hadoop13 jps==================
12848 QuorumPeerMain
13383 JournalNode
13447 Jps
================== hadoop14 jps==================
13395 Jps
13338 JournalNode

格式化namenode

hadoop namenode -format

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-5OoMtLYS-1576901729075)(…/img/hadoop/01_01.png)]

复制数据到另一个namenode节点上

scp -r data/hadoopdata/ hadoop12:/opt/module/hadoop-3.2.0/data/
VERSION                                                                                                 100%  216     0.2KB/s   00:00    
seen_txid                                                                                               100%    2     0.0KB/s   00:00    
fsimage_0000000000000000000.md5                                                                         100%   62     0.1KB/s   00:00    
fsimage_0000000000000000000

格式化zkfc(只能在namenode节点上进行)

hdfs zkfc -formatZK

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-QIYmoZn9-1576901729077)(…/img/hadoop/01_02.png)]

启动集群的hdfs

[hadoop@hadoop11 hadoop-3.2.0]$ start-dfs.sh
Starting namenodes on [hadoop11 hadoop12]
Starting datanodes
Starting journal nodes [hadoop13 hadoop12 hadoop11]
hadoop12: journalnode is running as process 2614.  Stop it first.
hadoop13: journalnode is running as process 2604.  Stop it first.
hadoop11: journalnode is running as process 2664.  Stop it first.
Starting ZK Failover Controllers on NN hosts [hadoop11 hadoop12]

查看启动结果

xcall.sh jps
================== hadoop11 jps==================
2498 QuorumPeerMain
3060 NameNode
3574 DFSZKFailoverController
3654 Jps
2664 JournalNode
3180 DataNode
================== hadoop12 jps==================
3105 Jps
3044 DFSZKFailoverController
2453 QuorumPeerMain
2614 JournalNode
2766 NameNode
2846 DataNode
================== hadoop13 jps==================
2450 QuorumPeerMain
2886 Jps
2747 DataNode
2604 JournalNode
================== hadoop14 jps==================
2803 DataNode
2890 Jps

查看zookeeper数据

[zk: localhost:2181(CONNECTED) 0] ls /
[hadoop-ha, rmstore, yarn-leader-election, zookeeper]

启动yarn(在主备resourcemanager中随便选择一台启动)

[hadoop@hadoop13 hadoop-3.2.0]$ start-yarn.sh
Starting resourcemanagers on [ hadoop13 hadoop14]
Starting nodemanagers

查看启动结果

xcall.sh jps
================== hadoop11 jps==================
2498 QuorumPeerMain
3922 Jps
3060 NameNode
3574 DFSZKFailoverController
2664 JournalNode
3180 DataNode
3806 NodeManager
================== hadoop12 jps==================
3044 DFSZKFailoverController
2453 QuorumPeerMain
2614 JournalNode
3404 Jps
2766 NameNode
2846 DataNode
3279 NodeManager
================== hadoop13 jps==================
2450 QuorumPeerMain
3747 ResourceManager
3864 NodeManager
2747 DataNode
2604 JournalNode
4047 Jps
================== hadoop14 jps==================
2803 DataNode
3011 ResourceManager
3403 Jps
3086 NodeManager

启动历史服务器

mr-jobhistory-daemon.sh start historyserver
WARNING: Use of this script to start the MR JobHistory daemon is deprecated.
WARNING: Attempting to execute replacement "mapred --daemon start" instead.

查看

jps
2498 QuorumPeerMain
3060 NameNode
4084 Jps
3574 DFSZKFailoverController
4023 JobHistoryServer
2664 JournalNode
3180 DataNode
3806 NodeManager

查看hdfs各主节点状态

[hadoop@hadoop11 hadoop-3.2.0]$ hdfs haadmin -getServiceState nn1
active
[hadoop@hadoop11 hadoop-3.2.0]$ hdfs haadmin -getServiceState nn2
standby

查看yarn各节点状态

[hadoop@hadoop11 hadoop-3.2.0]$ yarn rmadmin -getServiceState rm1
standby
[hadoop@hadoop11 hadoop-3.2.0]$ yarn rmadmin -getServiceState rm2
active

web查看hdfs

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-WoJKErxI-1576901729077)(…/img/hadoop/01_03.png)]

web查看yarn(standby节点会自动跳到active节点)

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-ILdutSlB-1576901729077)(…/img/hadoop/01_04.png)]

web查看历史服务器

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-9vCbit4w-1576901729077)(…/img/hadoop/01_05.png)]

hdfs切换测试

[hadoop@hadoop11 hadoop-3.2.0]$ hdfs haadmin -transitionToActive --forcemanual nn2
[hadoop@hadoop11 hadoop-3.2.0]$ hadoop-daemon.sh stop namenode

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-xcLFHrZx-1576901729078)(…/img/hadoop/01_06.png)]

yarn切换测试

[hadoop@hadoop14 ~]$ yarn-daemon.sh stop resourcemanager
WARNING: Use of this script to stop YARN daemons is deprecated.
WARNING: Attempting to execute replacement "yarn --daemon stop" instead.

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-xxghMd0q-1576901729078)(…/img/hadoop/01_07.png)]

运行wordcount

hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.0.jar wordcount /user/hadoop/input /user/hadoop/output

查看日志

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-Q2pa1vrD-1576901729078)(…/img/hadoop/01_08.png)]
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-yOaAnlqx-1576901729079)(…/img/hadoop/01_09.png)]

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!