Docker安装Hadoop分布式集群

一、准备环境

bash 复制代码
docker search hadoop
docker pull sequenceiq/hadoop-docker
docker images

二、Hadoop集群搭建

1. 运行hadoop102容器

bash 复制代码
docker run --name hadoop102 -d -h hadoop102 -p 9870:9870 -p 19888:19888 -v /opt/data/hadoop:/opt/data/hadoop sequenceiq/hadoop-docker
bash 复制代码
docker exec -it hadoop102 bash  #进入该容器
ssh-keygen -t rsa  #生成密钥,一直回车,有一个根据提示输入y
cd /root/.ssh/ && cat id_rsa.pub > authorized_keys #复制公钥到authorized_keys中
cat authorized_keys #复制到其他文档中保存

2. 运行hadoop103容器

bash 复制代码
docker run --name hadoop103 -d -h hadoop103 -p 8088:8088 sequenceiq/hadoop-docker
bash 复制代码
docker exec -it hadoop103 bash  #进入该容器
ssh-keygen -t rsa  #生成密钥
cd /root/.ssh/ && cat id_rsa.pub > authorized_keys #复制公钥到authorized_keys中
cat authorized_keys #复制到其他文档中保存

3. 运行hadoop104容器

bash 复制代码
docker run --name hadoop104 -d -h hadoop104 sequenceiq/hadoop-docker
bash 复制代码
docker exec -it hadoop104 bash  #进入该容器
ssh-keygen -t rsa  #生成密钥
cd /root/.ssh/ && cat id_rsa.pub > authorized_keys #复制公钥到authorized_keys中
cat authorized_keys #复制到其他文档中保存

4.将三个密钥全部复制到authorized_keys文件

bash 复制代码
vi authorized_keys #将hadoop102 hadoop103 hadoop104复制到这一个文件中,在hadoop102 hadoop103 hadoop104中都保存起来

cat > authorized_keys << EOF
ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAx01kz5PaWIU/oV7AuBZkiwQ52EVO61GDgRCsNUQd7GgdFKpCENCbDIRP9IvAumN0rdxloxQd2WMaEWTycHfeu8tsECihb4iIywDSx1XiQHrik+CUBPggCWdzJknZQgNSmMGRd4129r1bJO8lra100QoK/YpSYQY7Odapd1dg3dhrcEf2D59a/gvX8yHnYkBAuFNAm/HXijTNHq5TezoIOo63WT9BCWTQ4DLBmLdIZsOrZzHkNJCFpRgHTbEGX+h0vzqXhGXlkA9IMdrwFuAc5lTuazK9wwXgjmAq9M38Zh3ithr/LYB2VaHyUCXzNB44e1fGQ8+O946XerO8IO8r9w== root@hadoop102
ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAyctJZBLE1pktU8jv2983inSZazyV0v49Dpzt6+z9K3E5JlBuNvM806mMzLhvta+eXnu7Th70fE20vUNukrjRCpZUv/zxzuKKadSLr76xQEsjP/ZnmUVsJfJU4Q/ThRR32TiDRDdnCf0nM/pi/eXePrbgZLxMNd2+r0o65++th6mDMV8Y/lGAxGBYIAsI3xJwEx+6/Ok9h5AlvtEezKWkI6JhpEjpTwncVEdk16vR8WJ51JsnYwszSenRmyOFmcT+O7qU0phpy/FvwlDzDen54nrssqj1vB+yhlkCdlfLN5PQ5nJIPE0sAwTxaFOLqEbLf1mLrtO9iTx+UwboGCxoRQ== root@hadoop103
ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAz7cQxzbxJoFMWVchuFdcCDY3nD8jb4lRZPTH+5+J4iFP1vEIADj18rMlfQy/d+c+v3XrFfNVvlrawKFVals5LcfVd97eEEX1g2KRxsM07aqPibVYGwturFpiu3pFKwI9j2hYwa4uWVM+5VCR49sgAxHce4jmlf/ZpLz5FxxqVYyRvQ3GoN8KsqDH7CwDyqnbgnrgJw7RE0d7nVOdfgXif1VXPPjPTzRUxMJKtyP1Ja+Sym/sxRJwOuEGEqNCg6WHYZ4OFArtWKMliDv3hRCUvJK9LlWu9+aeCHGrA6qJApaQ/vAOdzsSBj1IGzgDttYO4uxNkQavwazw/DHtRA9swQ== root@hadoop104
EOF

5. 配置地址映射

bash 复制代码
cat >> /etc/hosts << EOF
172.17.0.2    hadoop102 #ip是容器的IP
172.17.0.3    hadoop103 #ip是容器的IP
172.17.0.4    hadoop104 #ip是容器的IP
EOF

6.检查ssh是否成功

bash 复制代码
#在hadoop102 hadoop103 hadoop104都测试
ssh hadoop102
ssh hadoop103
ssh hadoop104

三、配置Hadoop

hadoop目录安装在:/usr/local/hadoop-2.7.0/etc/hadoop

1. core-site.xml

bash 复制代码
#在hadoop102容器中执行
cd /usr/local/hadoop-2.7.0/etc/hadoop
cat  > core-site.xml << EOF
<configuration>
   <property> 
      <name>fs.defaultFS</name> 
      <value>hdfs://hadoop102:8020</value> 
   </property> 
    <property>
    <name>hadoop.tmp.dir</name>
    <value>/opt/data/hadoop</value>
    </property>
   <property> 
      <name>hadoop.http.staticuser.user</name> 
      <value>root</value> 
   </property>
</configuration>
EOF

2. hdfs-site.xml

bash 复制代码
#在hadoop102容器中执行
cd /usr/local/hadoop-2.7.0/etc/hadoop
cat > hdfs-site.xml << EOF
<configuration>
    <property>
        <name>dfs.namenode.http-address</name> 
        <value>hadoop102:9870</value> 
    </property> 
</configuration>
EOF

3. yarn-site.xml

bash 复制代码
#在hadoop102容器中执行 注意:还有其他需要编辑
cd /usr/local/hadoop-2.7.0/etc/hadoop
vi yarn-site.xml
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>hadoop103</value> 
    </property>

4. mapred-site.xml

bash 复制代码
cd /usr/local/hadoop-2.7.0/etc/hadoop
cat > mapred-site.xml << EOF
<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
</configuration>
EOF

5. 分发文件

bash 复制代码
cd /usr/local/hadoop-2.7.0/etc/hadoop
scp /usr/local/hadoop-2.7.0/etc/hadoop/core-site.xml hadoop103:/usr/local/hadoop-2.7.0/etc/hadoop
scp /usr/local/hadoop-2.7.0/etc/hadoop/hdfs-site.xml hadoop103:/usr/local/hadoop-2.7.0/etc/hadoop
scp /usr/local/hadoop-2.7.0/etc/hadoop/yarn-site.xml hadoop103:/usr/local/hadoop-2.7.0/etc/hadoop

scp /usr/local/hadoop-2.7.0/etc/hadoop/core-site.xml hadoop104:/usr/local/hadoop-2.7.0/etc/hadoop
scp /usr/local/hadoop-2.7.0/etc/hadoop/hdfs-site.xml hadoop104:/usr/local/hadoop-2.7.0/etc/hadoop
scp /usr/local/hadoop-2.7.0/etc/hadoop/yarn-site.xml hadoop104:/usr/local/hadoop-2.7.0/etc/hadoop

四、启动集群

1. 配置slaves文件

bash 复制代码
cd /usr/local/hadoop-2.7.0/etc/hadoop
cat > slaves << EOF
hadoop102
hadoop103
hadoop104
EOF

2. 发送到其他节点

bash 复制代码
scp /usr/local/hadoop-2.7.0/etc/hadoop/slaves hadoop103:/usr/local/hadoop-2.7.0/etc/hadoop
scp /usr/local/hadoop-2.7.0/etc/hadoop/slaves hadoop104:/usr/local/hadoop-2.7.0/etc/hadoop

3. 格式化文件系统

bash 复制代码
cd /usr/local/hadoop-2.7.0/bin
./hadoop namenode -format

4. 在hadoop102启动hdfs

bash 复制代码
cd /usr/local/hadoop-2.7.0/sbin
./start-all.sh

5. 在hadoop103启动yarn

bash 复制代码
cd /usr/local/hadoop-2.7.0/sbin
./start-yarn.sh

6.访问验证

bash 复制代码
#关闭防火墙
systemctl stop firewalld
systemctl disable firewalld

#关闭selinux
sed -i 's/enforcing/disabled/' /etc/selinux/config # 永久
setenforce 0 # 临时

sysctl net.ipv4.ip_forward  #启用IP_FORWARD
systemctl restart docker
docker start hadoop102  hadoop103  hadoop104

访问Hadoop102:9870,查看是否能够看到hdfs界面

访问hadoop103:8088,查看能够看到yarn界面

五 案例

1. 执行一些hdfs命令

bash 复制代码
cat >> /root/.bashrc << EOF
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
EOF

source /root/.bashrc

hadoop fs -ls /
hadoop fs -mkdir /hadoop
hadoop fs -ls /

2. 上传文件到hdfs上

bash 复制代码
hadoop fs -put word.txt /hadoop
hadoop fs -ls /hadoop

3. 执行wordcount案例

bash 复制代码
cd /usr/local/hadoop
./bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.0.jar wordcount /hadoop/word.txt /output

在yarn上可以看到执行情况

五、关闭集群

bash 复制代码
#hadoop102上
stop-dfs.sh

#hadoop103上:
stop-yarn.sh
相关推荐
❀͜͡傀儡师8 小时前
docker部署Rainbond 版本v6.5.0 UI 焕新升级,故障定位更精准
运维·docker·容器
T0uken8 小时前
Docker 在国内网络环境下的使用
网络·docker·容器
竹杖芒鞋轻胜马,谁怕?一蓑烟雨任平生。10 小时前
Docker部署Fio磁盘读写测试
运维·docker·容器
最贪吃的虎12 小时前
Java基础之泛型
java·运维·jvm·分布式·后端
走遍西兰花.jpg13 小时前
如何启动azkaban
hadoop·azkaban
工具罗某人14 小时前
CentOS7安装docker
docker·云原生·centos
小代码201614 小时前
loki 环境搭建
spring boot·docker·log4j·grafana
我爱学习好爱好爱16 小时前
Prometheus监控栈 监控docker和进程
docker·容器·prometheus
hhzz16 小时前
Docker 搭建 NextCloud + OnlyOffice 完整教程(Linux Centos7系统)
linux·docker·容器·onlyoffice·nextcloud
雪碧聊技术16 小时前
基于Redis的分布式锁
数据库·redis·分布式