# zookeeper集群

# 创建hadoop

sudo groupadd hadoop
sudo useradd -m -g hadoop -s /bin/bash hadoop
sudo passwd hadoop
id hadoop
su - hadoop

# ssh登录

ssh-keygen -t rsa
cd ~/.ssh/
cp id_rsa.pub authorized_keys
chmod 700 .ssh/
chmod 600 .ssh/*
ssh localhost

# ssh登录

#slave1 slave2 slave3
cat ~/.ssh/id_rsa.pub | ssh hadoop@master 'cat >> ~/.ssh/authorized_keys'

#master
scp -r authorized_keys hadoop@slave1:~/.ssh/
scp -r authorized_keys hadoop@slave2:~/.ssh/
scp -r authorized_keys hadoop@slave3:~/.ssh/


ssh master
ssh slave1
ssh slave2
ssh slave3

# jdk

sudo usermod -aG sudo hadoop
su - hadoop
sudo whoami
sudo apt install openjdk-8-jdk
java -version

vi ~/.bashrc
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export PATH=$JAVA_HOME/bin:$PATH
source ~/.bashrc

# zookeeper

wget https://downloads.apache.org/zookeeper/zookeeper-3.8.4/apache-zookeeper-3.8.4-bin.tar.gz
tar -zxvf apache-zookeeper-3.8.4-bin.tar.gz
sudo mv apache-zookeeper-3.8.4-bin /opt/zookeeper

sudo mkdir -p /var/lib/zookeeper
sudo mkdir -p /var/log/zookeeper
sudo chown -R hadoop:hadoop /var/lib/zookeeper
sudo chown -R hadoop:hadoop /var/log/zookeeper

vi /opt/zookeeper/conf/zoo.cfg
tickTime=2000
dataDir=/var/lib/zookeeper
dataLogDir=/var/log/zookeeper
clientPort=2181
initLimit=5
syncLimit=2
server.1=master:2888:3888
server.2=slave1:2888:3888
server.3=slave2:2888:3888

#master 
echo "1" | sudo tee /var/lib/zookeeper/myid

#slave1
echo "2" | sudo tee /var/lib/zookeeper/myid

#slave2
echo "3" | sudo tee /var/lib/zookeeper/myid

sudo vi /etc/systemd/system/zookeeper.service
[Unit]
Description=Zookeeper
Documentation=https://zookeeper.apache.org
After=network.target

[Service]
Type=simple
User=hadoop
ExecStart=/opt/zookeeper/bin/zkServer.sh start-foreground /opt/zookeeper/conf/zoo.cfg
ExecStop=/opt/zookeeper/bin/zkServer.sh stop /opt/zookeeper/conf/zoo.cfg
Restart=on-abnormal

[Install]
WantedBy=multi-user.target

sudo systemctl daemon-reload
sudo systemctl enable zookeeper
sudo systemctl start zookeeper
sudo systemctl status zookeeper
sudo systemctl stop zookeeper

/opt/zookeeper/bin/zkServer.sh start /opt/zookeeper/conf/zoo.cfg
/opt/zookeeper/bin/zkServer.sh stop /opt/zookeeper/conf/zoo.cfg
/opt/zookeeper/bin/zkServer.sh start-foreground /opt/zookeeper/conf/zoo.cfg
/opt/zookeeper/bin/zkServer.sh status

/opt/zookeeper/bin/zkCli.sh -server master:2181

/opt/zookeeper/bin/zkCli.sh -server localhost:2181

# 错误

Cannot open channel to 2 at election address

https://www.cnblogs.com/tocode/p/10693715.html (opens new window)

server.1=0.0.0.0:2888:3888 本机器上ip地址改成0.0.0.0

# 启动成功

zookeeper

# hadoop

# 架构图

                  +-----------------+
                  |   Zookeeper     |
                  |   Ensemble      |
                  |                 |
                  | master:2181     |
                  | slave1:2181     |
                  | slave2:2181     |
                  +--------+--------+
                           |
                           |
          +----------------+------------------+
          |                |                  |
          |                |                  |
+---------+------+ +-------+---------+ +------+---------+
|  master         | |    slave1       | |    slave2      |
| 192.168.3.201   | |  192.168.3.202  | |  192.168.3.203 |
|                 | |                 | |                |
| +-------------+ | | +-------------+ | | +-------------+|
| |NameNode (nn1)| | |NameNode (nn2)| | | DataNode      ||
| |ResourceManager| | |ResourceManager| | | NodeManager  ||
| |JournalNode    | | |JournalNode    | | | JournalNode  ||
| |DataNode       | | |DataNode       | | |              ||
| |NodeManager    | | |NodeManager    | | |              ||
| +-------------+ | | +-------------+ | | +-------------+|
|     HDFS        | |      HDFS      | |      HDFS      |
|  High Availability |  High Availability |  Data Storage |
+-----------------+ +-----------------+ +----------------+

# chatgpt提示词

已知环境:
本实验所用的3台机器版本号为:Ubuntu 22.04.4 LTS,机器规划如下:
| Hostname | IP Address      | User |
|----------|-----------------|------|
| master   | 192.168.3.201 | hadoop |
| slave1   | 192.168.3.202 | hadoop |
| slave2   | 192.168.3.203 | hadoop |

三台机器hadoop用户登录,已经配置好ssh免密访问,三台机器的/etc/hosts如下:
192.168.3.201 master
192.168.3.202 slave1
192.168.3.203 slave2,
并且可以通过机器名称访问

三台机器java已经安装好,其中JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64, 可以跳过
三台机器zookeeper集群已经安装好,zookeeper安装目录如下/opt/zookeeper/,可以跳过


要求:
hadoop用户下,安装hadoop 3.4.0集群,其中/opt/hadoop为hadoop安装路径,包括配置hdfs ha和yarn ha, 集群角色如下:

master:NameNode (nn1) ResourceManager JournalNode NodeManager DataNode
slave1:NameNode (nn2) ResourceManager JournalNode NodeManager DataNode
slave2:                               JournalNode NodeManager DataNode

# 在所有节点安装hadoop

wget https://downloads.apache.org/hadoop/common/hadoop-3.4.0/hadoop-3.4.0.tar.gz
tar -xzvf hadoop-3.4.0.tar.gz
sudo mv hadoop-3.4.0 /opt/hadoop
sudo chown -R hadoop:hadoop /opt/hadoop

# ~/.bashrc
export HADOOP_HOME=/opt/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
source ~/.bashrc

# 配置core-site.xml

在所有节点的 /opt/hadoop/etc/hadoop/core-site.xml 文件中添加以下内容:

<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://mycluster</value>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/opt/hadoop/tmp</value>
    </property>
    <property>
        <name>ha.zookeeper.quorum</name>
        <value>master:2181,slave1:2181,slave2:2181</value>
    </property>
</configuration>

# 配置hdfs-site.xml

在所有节点的 /opt/hadoop/etc/hadoop/hdfs-site.xml 文件中添加以下内容:

<configuration>
    <property>
        <name>dfs.nameservices</name>
        <value>mycluster</value>
    </property>
    <property>
        <name>dfs.ha.namenodes.mycluster</name>
        <value>nn1,nn2</value>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.mycluster.nn1</name>
        <value>master:8020</value>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.mycluster.nn2</name>
        <value>slave1:8020</value>
    </property>
    <property>
        <name>dfs.namenode.http-address.mycluster.nn1</name>
        <value>master:9870</value>
    </property>
    <property>
        <name>dfs.namenode.http-address.mycluster.nn2</name>
        <value>slave1:9870</value>
    </property>
    <property>
        <name>dfs.namenode.shared.edits.dir</name>
        <value>qjournal://master:8485;slave1:8485;slave2:8485/mycluster</value>
    </property>
    <property>
        <name>dfs.client.failover.proxy.provider.mycluster</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
    </property>
 	<property>
        <name>dfs.ha.fencing.methods</name>
        <value>shell(/bin/true)</value>
    </property>
    <property>
        <name>dfs.ha.automatic-failover.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/opt/hadoop/journal</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>/opt/hadoop/hdfs/data</value>
    </property>
    <property>
        <name>dfs.replication</name>
        <value>3</value>
    </property>
</configuration>

# 配置workers

在所有节点的 /opt/hadoop/etc/hadoop/workers 文件中,添加以下内容:

master
slave1
slave2

# 在所有节点的start journalnode

hdfs --daemon start journalnode

# master节点设置namenode

hdfs namenode -format
hdfs zkfc -formatZK
hdfs namenode

# slave1节点设置namenode

hdfs namenode -bootstrapStandby

namenode

error分析

ha.BootstrapStandby: Unable to fetch namespace information from any remote NN. Possible NameNodes: [RemoteNameNodeInfo [nnId=nn1, ipcAddress=master/192.168.3.201:8020, httpAddress=http://master:9870]]

删除127.0.0.1 master 删除127.0.0.1 slave1 删除127.0.0.1 slave2

https://cloud.tencent.com/developer/article/1913706 (opens new window)

# 在所有节点stop journalnode

hdfs --daemon stop journalnode

# 在所有节点设置hadoop-env.sh

/opt/hadoop/etc/hadoop/hadoop-env.sh

export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export HADOOP_HOME=/opt/hadoop

# master节点启动集群

start-dfs.sh
hdfs dfsadmin -report

# 启动成功

start-dfs

# web访问

http://192.168.3.201:9870 (opens new window) active

http://192.168.3.202:9870 (opens new window) standby

# 上传文件

vi myfile.txt
hdfs dfs -mkdir /test
hdfs dfs -put myfile.txt /test
hdfs dfs -ls /test

# 配置mapred-site

在所有节点的 /opt/hadoop/etc/hadoop/mapred-site.xml 文件中添加以下内容:

<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>
        <name>yarn.app.mapreduce.am.env</name>
        <value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
    </property>
    <property>
        <name>mapreduce.map.env</name>
        <value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
    </property>
    <property>
        <name>mapreduce.reduce.env</name>
        <value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
    </property>
</configuration>

# 配置yarn-site

在所有节点的 /opt/hadoop/etc/hadoop/yarn-site.xml 文件中添加以下内容:

<configuration>
    <property>
        <name>yarn.resourcemanager.ha.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.cluster-id</name>
        <value>yarn-cluster</value>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.rm-ids</name>
        <value>rm1,rm2</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname.rm1</name>
        <value>master</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname.rm2</name>
        <value>slave1</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address.rm1</name>
        <value>master:8032</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address.rm2</name>
        <value>slave1:8032</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm1</name>
        <value>master:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm2</name>
        <value>slave1:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
        <value>master:8031</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm2</name>
        <value>slave1:8031</value>
    </property>
    <property>
        <name>yarn.resourcemanager.admin.address.rm1</name>
        <value>master:8033</value>
    </property>
    <property>
        <name>yarn.resourcemanager.admin.address.rm2</name>
        <value>slave1:8033</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm1</name>
        <value>master:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm2</name>
        <value>slave1:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.recovery.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.zk-address</name>
        <value>master:2181,slave1:2181,slave2:2181</value>
    </property>
    <property>
        <name>yarn.resourcemanager.store.class</name>
        <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
    </property>
    <property>
        <name>yarn.resourcemanager.zk-state-store.parent-path</name>
        <value>/rmstore</value>
    </property>
 	<property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
</configuration>

# 启动yarn

在 master 节点上

start-yarn.sh


yarn --daemon stop resourcemanager
yarn --daemon start resourcemanager

启动日志

hadoop@master:~$ start-yarn.sh
Starting resourcemanagers on [ master slave1]
Starting nodemanagers

jps

jpsall

rmadmin

# web访问

http://192.168.3.201:8088 (opens new window) rm1

http://192.168.3.202:8088 (opens new window) rm2

备注: 在 YARN 高可用配置中,只有处于 active 状态的 ResourceManager 的 Web 界面通常是完全可访问的。处于 standby 状态的 ResourceManager 可能会限制 Web 界面的访问。

# wordcount单词统计

vi wordcount.txt

hadoop crudapi1
hadoop crudapi2
hadoop crudapi3
hadoop crudapi4
yarn zookeeper hdfs
yarn zookeeper hdfs
hdfs dfs -mkdir /mpdata
hdfs dfs -mkdir /mpdata/input
hdfs dfs -put wordcount.txt /mpdata/input
hdfs dfs -ls /mpdata/input
hadoop jar /opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.4.0.jar wordcount /mpdata/input/wordcount.txt /mpdata/output

hadooprun

检查任务 wordcount

检查输出, 结果正确 output

crudapi1    1
crudapi2    1
crudapi3    1
crudapi4    1
hadoop  4
hdfs    2
yarn    2
zookeeper   2