文章目录
Docker安装Hadoop
1 安装docker
1.1 添加docker到yum源
1)安装指令,找不到yum-config-manager命令
[root@hadoop ~]# yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
-bash: yum-config-manager: 未找到命令
2)尝试安装yum-config-manager
[root@hadoop apps]# yum install -y yum-config-manager
已加载插件:fastestmirror
Loading mirror speeds from cached hostfile
* base: mirrors.ustc.edu.cn
* epel: mirrors.ustc.edu.cn
* extras: mirrors.ustc.edu.cn
* updates: mirrors.ustc.edu.cn
没有可用软件包 yum-config-manager。
错误:无须任何处理
3)经查询,系统没有这个yum-config-manager命令,在yum-utils包里
[root@hadoop apps]# yum -y install yum-utils
已加载插件:fastestmirror
Loading mirror speeds from cached hostfile
已安装:
yum-utils.noarch 0:1.1.31-54.el7_8
完毕!
4)再次安装docker
[root@hadoop apps]# yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
已加载插件:fastestmirror
adding repo from: http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
grabbing file http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo to /etc/yum.repos.d/docker-ce.repo
repo saved to /etc/yum.repos.d/docker-ce.repo
1.2 安装docker
1)安装docker-ce
[root@hadoop yum.repos.d]# yum -y install docker-ce
2)配置路径
[root@hadoop yum.repos.d]# mkdir -p /etc/docker
[root@hadoop yum.repos.d]# mkdir -p /data/apps/docker
3)将daemon.json拷贝到/data/apps/docker
4)启动docker
[root@hadoop yum.repos.d]# systemctl start docker
5)查看docker启动状态
[root@hadoop docker]# ps -ef | grep docker
root 5791 1 0 09:04 ? 00:00:00 /usr/bin/dockerd -H fd:// --containerd=/run/containerd/containerd.sock
root 6014 1896 0 09:05 pts/0 00:00:00 grep --color=auto docker
2 安装Hadoop
2.1 使用docker自带的hadoop安装
1)查找docker镜像
[root@hadoop docker]# docker search hadoop
NAME DESCRIPTION STARS OFFICIAL AUTOMATED
sequenceiq/hadoop-docker An easy way to try Hadoop 661 [OK]
uhopper/hadoop Base Hadoop image with dynamic configuration… 103 [OK]
......
2)下载docker镜像
[root@hadoop docker]# docker pull sequenceiq/hadoop-docker
Using default tag: latest
latest: Pulling from sequenceiq/hadoop-docker
Image docker.io/sequenceiq/hadoop-docker:latest uses outdated schema1 manifest format. Please upgrade to a schema2 image for better future compatibility. More information at https://docs.docker.com/registry/spec/deprecated-schema-v1/
......
3)查看镜像
[root@hadoop docker]# docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
sequenceiq/hadoop-docker latest 5c3cc170c6bc 6 years ago 1.77GB
4)创建master节点
[root@hadoop docker]# docker run --name hadoop -d -h master sequenceiq/hadoop-docker
43c17d7c556a1e7ca9e6cfd7d91ebbde22db2f5fb223956bf670710d733d0979
5)创建slave1节点
[root@hadoop docker]# docker run --name hadoop1 -d -h slave1 sequenceiq/hadoop-docker
697e4f687e0502f2b7727bdee6f9861cd515c55c1b38d5e43f09c29acc7989f3
6)创建slave2节点
[root@hadoop docker]# docker run --name hadoop2 -d -h slave2 sequenceiq/hadoop-docker
938f3a29ea7b9e99e02b0df4a448a75a9264d7d74c348dfc577fdbd01380e3e3
7)进入docker并查看Java JDK
[root@hadoop docker]# docker exec -it hadoop bash
bash-4.1# java -version
java version "1.7.0_71"
Java(TM) SE Runtime Environment (build 1.7.0_71-b14)
Java HotSpot(TM) 64-Bit Server VM (build 24.71-b01, mixed mode)
2.2 免密操作
docker exec -it hadoop bash进入容器
2.2.1 master节点
## 1.master节点配置免密
[root@hadoop docker]# docker exec -it hadoop bash
bash-4.1# /etc/init.d/sshd start
bash-4.1# ssh-keygen -t rsa
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
/root/.ssh/id_rsa already exists.
Overwrite (y/n)? y
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
......
## 2.将/root/.ssh下的id_rsa.pub添加到authorized_keys
bash-4.1# cd /root/.ssh
bash-4.1# ls
authorized_keys config id_rsa id_rsa.pub
bash-4.1# cat id_rsa.pub > authorized_keys
bash-4.1# cat authorized_keys
ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAwOpTpijrGlJTnlFEBjIRq9m24KIQ9oc50javrRpWBqQxhUrjIuVQTaOqiW6Dpgo8zDVNRratkB+HnlNQ8q3L0kE+IvlZINrCijZAGksZJpgbyuhqHKdf8Tmdco90FhAENQc54pcCvpDCD4dukSuICN3533rXIBxJU7omHnTQMORo+AMyGDTWW7pNgNDQoC7iZsjE+GcpF9Aq2+joQqYwOOrTDmQ2HI6TFomnM02PERlwZkbM/5ELZsb616JPu9QMNuv8BDHgRF87PtzZEI1vBEDeNfBAc3/J5vuirlqqxgS+zk5DFiWD0jcstJG1hTX5qRCXmvEyHMfE2kEtgkAXYQ== root@master
2.2.2 slave1节点
## 1. 配置slave1的免密操作
[root@hadoop ~]# docker exec -it hadoop1 bash
bash-4.1# /etc/init.d/sshd start
bash-4.1# ssh-keygen -t rsa
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
/root/.ssh/id_rsa already exists.
Overwrite (y/n)? y
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
......
## 2.将/root/.ssh下的id_rsa.pub添加到authorized_keys
bash-4.1# cd /root/.ssh
bash-4.1# ls
authorized_keys config id_rsa id_rsa.pub
bash-4.1# cat id_rsa.pub > authorized_keys
bash-4.1# cat authorized_keys
ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAxdKMpxzEHotmob4Oi12G6pGoF+z39b0zZF3mWwDMtwX6dNpKTqeoaFDmkKUnov7PEGYKPehT1rPSv0pDxrNcMylsNGYsNt2jhcR0k5YyIAcJKjjYzM3ifaHj4Pce7PtLyr9drqH39cES336xHiawfzKppMKa6tsbEUI5d4YhYRMAGhbDdDw6vVte9NmbPkos4yK272dlAQjwep5TiE63D3OwVP4TAdum99PhVim63ZO0RJYf8es5plAay33OmEUw1WeeXB/0EL/symE/eWlIXtDUbz2c1KHi8yviNB8qDb2YOsRTV8vO1OiRF/nXv6EKorDaeIYvwtDKyxT5ieENEQ== root@slave1
2.2.3 slave2节点
## 1.配置slave2的免密操作
[root@hadoop ~]# docker exec -it hadoop2 bash
bash-4.1# /etc/init.d/sshd start
bash-4.1# ssh-keygen -t rsa
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
/root/.ssh/id_rsa already exists.
Overwrite (y/n)? y
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
......
## 2.将/root/.ssh下的id_rsa.pub添加到authorized_keys
bash-4.1# cd /root/.ssh
bash-4.1# ls
authorized_keys config id_rsa id_rsa.pub
bash-4.1# cat id_rsa.pub > authorized_keys
bash-4.1# cat authorized_keys
ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAr4EKb4wjGwb1sKUX3iZ92T38ZqentL83Yok4tJ8k0UBdCCbOO7qgTGkZwg7gIhO0LBZ9YQDk6uR5hBJ8f3UN38l7MyJM9yYZl65dh04ZXqs0wRUMELYMBzTBKbIFdU9uwy07c8YMJbBvFBnBlzA764rFzGivFgoP4Kk9iIEFO/YQ/EwHyDpFhfRUVPEhjiJK4x7zS0sZAlhSSgk05fKQSw6qlXVr4JOGn5TWU7qylaj9lKABR9tI1WH4WoGx4zE8KDjgdUhtvygSmS4LO8JBOn1JPnqQq8PNWRI0xjWAohw3GnmUu54nKN5cjJjHvZPeXt0Y7B8YtI0+/DCQglo1zw== root@slave2
bash-4.1#
2.2.4 将三个容器中的authorized_keys拷贝到本地合并
## 1.拷贝到本地
[root@hadoop ssh]# docker cp hadoop:/root/.ssh/authorized_keys ./authorized_keys_master
[root@hadoop ssh]# docker cp hadoop1:/root/.ssh/authorized_keys ./authorized_keys_slave1
[root@hadoop ssh]# docker cp hadoop2:/root/.ssh/authorized_keys ./authorized_keys_slave2
[root@hadoop ssh]# ls
authorized_keys_master authorized_keys_slave1 authorized_keys_slave2
## 2.合并
[root@hadoop ssh]# touch authorized_keys
[root@hadoop ssh]# ls
authorized_keys authorized_keys_master authorized_keys_slave1 authorized_keys_slave2
[root@hadoop ssh]# cat authorized_keys_master authorized_keys_slave1 authorized_keys_slave2 > authorized_keys
[root@hadoop ssh]# cat authorized_keys
ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAwOpTpijrGlJTnlFEBjIRq9m24KIQ9oc50javrRpWBqQxhUrjIuVQTaOqiW6Dpgo8zDVNRratkB+HnlNQ8q3L0kE+IvlZINrCijZAGksZJpgbyuhqHKdf8Tmdco90FhAENQc54pcCvpDCD4dukSuICN3533rXIBxJU7omHnTQMORo+AMyGDTWW7pNgNDQoC7iZsjE+GcpF9Aq2+joQqYwOOrTDmQ2HI6TFomnM02PERlwZkbM/5ELZsb616JPu9QMNuv8BDHgRF87PtzZEI1vBEDeNfBAc3/J5vuirlqqxgS+zk5DFiWD0jcstJG1hTX5qRCXmvEyHMfE2kEtgkAXYQ== root@master
ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAxdKMpxzEHotmob4Oi12G6pGoF+z39b0zZF3mWwDMtwX6dNpKTqeoaFDmkKUnov7PEGYKPehT1rPSv0pDxrNcMylsNGYsNt2jhcR0k5YyIAcJKjjYzM3ifaHj4Pce7PtLyr9drqH39cES336xHiawfzKppMKa6tsbEUI5d4YhYRMAGhbDdDw6vVte9NmbPkos4yK272dlAQjwep5TiE63D3OwVP4TAdum99PhVim63ZO0RJYf8es5plAay33OmEUw1WeeXB/0EL/symE/eWlIXtDUbz2c1KHi8yviNB8qDb2YOsRTV8vO1OiRF/nXv6EKorDaeIYvwtDKyxT5ieENEQ== root@slave1
ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAr4EKb4wjGwb1sKUX3iZ92T38ZqentL83Yok4tJ8k0UBdCCbOO7qgTGkZwg7gIhO0LBZ9YQDk6uR5hBJ8f3UN38l7MyJM9yYZl65dh04ZXqs0wRUMELYMBzTBKbIFdU9uwy07c8YMJbBvFBnBlzA764rFzGivFgoP4Kk9iIEFO/YQ/EwHyDpFhfRUVPEhjiJK4x7zS0sZAlhSSgk05fKQSw6qlXVr4JOGn5TWU7qylaj9lKABR9tI1WH4WoGx4zE8KDjgdUhtvygSmS4LO8JBOn1JPnqQq8PNWRI0xjWAohw3GnmUu54nKN5cjJjHvZPeXt0Y7B8YtI0+/DCQglo1zw== root@slave2
[root@hadoop ssh]#
2.2.5 将本地authorized_keys文件分别拷贝到3个容器中
## 1.上传
[root@hadoop ssh]# docker cp ./authorized_keys hadoop:/root/.ssh/authorized_keys
[root@hadoop ssh]# docker cp ./authorized_keys hadoop1:/root/.ssh/authorized_keys
[root@hadoop ssh]# docker cp ./authorized_keys hadoop2:/root/.ssh/authorized_keys
## 2.查看
bash-4.1# cat authorized_keys
ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAwOpTpijrGlJTnlFEBjIRq9m24KIQ9oc50javrRpWBqQxhUrjIuVQTaOqiW6Dpgo8zDVNRratkB+HnlNQ8q3L0kE+IvlZINrCijZAGksZJpgbyuhqHKdf8Tmdco90FhAENQc54pcCvpDCD4dukSuICN3533rXIBxJU7omHnTQMORo+AMyGDTWW7pNgNDQoC7iZsjE+GcpF9Aq2+joQqYwOOrTDmQ2HI6TFomnM02PERlwZkbM/5ELZsb616JPu9QMNuv8BDHgRF87PtzZEI1vBEDeNfBAc3/J5vuirlqqxgS+zk5DFiWD0jcstJG1hTX5qRCXmvEyHMfE2kEtgkAXYQ== root@master
ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAxdKMpxzEHotmob4Oi12G6pGoF+z39b0zZF3mWwDMtwX6dNpKTqeoaFDmkKUnov7PEGYKPehT1rPSv0pDxrNcMylsNGYsNt2jhcR0k5YyIAcJKjjYzM3ifaHj4Pce7PtLyr9drqH39cES336xHiawfzKppMKa6tsbEUI5d4YhYRMAGhbDdDw6vVte9NmbPkos4yK272dlAQjwep5TiE63D3OwVP4TAdum99PhVim63ZO0RJYf8es5plAay33OmEUw1WeeXB/0EL/symE/eWlIXtDUbz2c1KHi8yviNB8qDb2YOsRTV8vO1OiRF/nXv6EKorDaeIYvwtDKyxT5ieENEQ== root@slave1
ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAr4EKb4wjGwb1sKUX3iZ92T38ZqentL83Yok4tJ8k0UBdCCbOO7qgTGkZwg7gIhO0LBZ9YQDk6uR5hBJ8f3UN38l7MyJM9yYZl65dh04ZXqs0wRUMELYMBzTBKbIFdU9uwy07c8YMJbBvFBnBlzA764rFzGivFgoP4Kk9iIEFO/YQ/EwHyDpFhfRUVPEhjiJK4x7zS0sZAlhSSgk05fKQSw6qlXVr4JOGn5TWU7qylaj9lKABR9tI1WH4WoGx4zE8KDjgdUhtvygSmS4LO8JBOn1JPnqQq8PNWRI0xjWAohw3GnmUu54nKN5cjJjHvZPeXt0Y7B8YtI0+/DCQglo1zw== root@slave2
bash-4.1#
2.3 容器IP
2.3.1 查看容器IP
## master(inet addr:172.17.0.2)
bash-4.1# ifconfig
eth0 Link encap:Ethernet HWaddr 02:42:AC:11:00:02
inet addr:172.17.0.2 Bcast:172.17.255.255 Mask:255.255.0.0
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:34 errors:0 dropped:0 overruns:0 frame:0
TX packets:19 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:3225 (3.1 KiB) TX bytes:1444 (1.4 KiB)
lo Link encap:Local Loopback
inet addr:127.0.0.1 Mask:255.0.0.0
UP LOOPBACK RUNNING MTU:65536 Metric:1
RX packets:51037 errors:0 dropped:0 overruns:0 frame:0
TX packets:51037 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:7152430 (6.8 MiB) TX bytes:7152430 (6.8 MiB)
bash-4.1#
## slave1(inet addr:172.17.0.3)
bash-4.1# ifconfig
eth0 Link encap:Ethernet HWaddr 02:42:AC:11:00:03
inet addr:172.17.0.3 Bcast:172.17.255.255 Mask:255.255.0.0
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:30 errors:0 dropped:0 overruns:0 frame:0
TX packets:21 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:2821 (2.7 KiB) TX bytes:1528 (1.4 KiB)
lo Link encap:Local Loopback
inet addr:127.0.0.1 Mask:255.0.0.0
UP LOOPBACK RUNNING MTU:65536 Metric:1
RX packets:32490 errors:0 dropped:0 overruns:0 frame:0
TX packets:32490 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:4477081 (4.2 MiB) TX bytes:4477081 (4.2 MiB)
## slave2(inet addr:172.17.0.4)
bash-4.1# ifconfig
eth0 Link encap:Ethernet HWaddr 02:42:AC:11:00:04
inet addr:172.17.0.4 Bcast:172.17.255.255 Mask:255.255.0.0
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:29 errors:0 dropped:0 overruns:0 frame:0
TX packets:21 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:2779 (2.7 KiB) TX bytes:1528 (1.4 KiB)
lo Link encap:Local Loopback
inet addr:127.0.0.1 Mask:255.0.0.0
UP LOOPBACK RUNNING MTU:65536 Metric:1
RX packets:32301 errors:0 dropped:0 overruns:0 frame:0
TX packets:32301 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:4457148 (4.2 MiB) TX bytes:4457148 (4.2 MiB)
bash-4.1#
2.3.2 修改hosts映射IP
三个容器都要修改如下三个IP映射
bash-4.1# vi /etc/hosts
...
172.17.0.2 master
172.17.0.3 slave1
172.17.0.4 slave2
2.3.3 映射设置和免密成功
bash-4.1# ssh master
-bash-4.1# exit
logout
bash-4.1# ssh slave1
-bash-4.1# exit
logout
bash-4.1# ssh slave2
-bash-4.1# exit
logout
bash-4.1#
2.4 修改Hadoop配置文件
find / -name hadoop-env.sh
查找hadoop-env.sh的安装路径
bash-4.1# find / -name hadoop-env.sh
/usr/local/hadoop-2.7.0/etc/hadoop/hadoop-env.sh
bash-4.1# cd /usr/local/hadoop-2.7.0/
bash-4.1# ls
LICENSE.txt NOTICE.txt README.txt bin etc include input lib libexec logs sbin share
bash-4.1# cd etc/
bash-4.1# ls
hadoop
bash-4.1# cd hadoop/
bash-4.1# ls
capacity-scheduler.xml hadoop-env.sh httpfs-log4j.properties kms-site.xml mapred-site.xml.template yarn-site.xml
configuration.xsl hadoop-metrics.properties httpfs-signature.secret log4j.properties slaves
container-executor.cfg hadoop-metrics2.properties httpfs-site.xml mapred-env.cmd ssl-client.xml.example
core-site.xml hadoop-policy.xml kms-acls.xml mapred-env.sh ssl-server.xml.example
core-site.xml.template hdfs-site.xml kms-env.sh mapred-queues.xml.template yarn-env.cmd
hadoop-env.cmd httpfs-env.sh kms-log4j.properties mapred-site.xml yarn-env.sh
bash-4.1#
2.4.1 hadoop-env.sh
## 容器中的Java已经安装好,继续使用docker默认的jdk
bash-4.1# vi hadoop-env.sh
# The java implementation to use.
export JAVA_HOME=/usr/java/default
export HADOOP_PREFIX=/usr/local/hadoop
export HADOOP_HOME=/usr/local/hadoop
2.4.2 core-site.xml
bash-4.1# vi core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop-2.7.0/hdpdata</value>
</property>
</configuration>
2.4.3 hdfs-site.xml
配置备份数量,小于等于slave数量
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
2.4.4 mapred-site.xml
## mr运行场景
bash-4.1# vi mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
2.4.5 yarn-site.xml
bash-4.1# vi yarn-site.xml
<configuration>
<!-- 配置resourcemanager的地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>172.17.0.2</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
2.4.5 将master中的相关配置文件拷贝到slaves
bash-4.1# scp yarn-site.xml slave1:/usr/local/hadoop-2.7.0/etc/hadoop
yarn-site.xml 100% 274 0.3KB/s 00:00
bash-4.1# scp yarn-site.xml slave2:/usr/local/hadoop-2.7.0/etc/hadoop
yarn-site.xml 100% 274 0.3KB/s 00:00
bash-4.1#
2.4.6 格式化namenode
bash-4.1# cd /usr/local/hadoop-2.7.0/bin
bash-4.1# ./hadoop namenode -format
DEPRECATED: Use of this script to execute hdfs command is deprecated.
Instead use the hdfs command for it.
21/08/09 01:30:05 INFO namenode.NameNode: STARTUP_MSG:
21/08/09 01:30:14 INFO common.Storage: Storage directory /usr/local/hadoop-2.7.0/hdpdata/dfs/name has been successfully formatted.
21/08/09 01:30:14 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0
21/08/09 01:30:14 INFO util.ExitUtil: Exiting with status 0
21/08/09 01:30:14 INFO namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at master/172.17.0.2
************************************************************/
2.5 启动hadoop
bash-4.1# cd /usr/local/hadoop-2.7.0/sbin
## 启动hadoop
bash-4.1# ./start-all.sh
This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
Starting namenodes on [master]
master: namenode running as process 129. Stop it first.
localhost: datanode running as process 220. Stop it first.
Starting secondary namenodes [0.0.0.0]
0.0.0.0: secondarynamenode running as process 399. Stop it first.
starting yarn daemons
resourcemanager running as process 549. Stop it first.
localhost: nodemanager running as process 642. Stop it first.
bash-4.1# jps
220 DataNode
549 ResourceManager
399 SecondaryNameNode
129 NameNode
2058 Jps
642 NodeManager