Docker安装Hadoop分布式集群

一、准备环境

bash 复制代码
docker search hadoop
docker pull sequenceiq/hadoop-docker
docker images

二、Hadoop集群搭建

1. 运行hadoop102容器

bash 复制代码
docker run --name hadoop102 -d -h hadoop102 -p 9870:9870 -p 19888:19888 -v /opt/data/hadoop:/opt/data/hadoop sequenceiq/hadoop-docker
bash 复制代码
docker exec -it hadoop102 bash  #进入该容器
ssh-keygen -t rsa  #生成密钥,一直回车,有一个根据提示输入y
cd /root/.ssh/ && cat id_rsa.pub > authorized_keys #复制公钥到authorized_keys中
cat authorized_keys #复制到其他文档中保存

2. 运行hadoop103容器

bash 复制代码
docker run --name hadoop103 -d -h hadoop103 -p 8088:8088 sequenceiq/hadoop-docker
bash 复制代码
docker exec -it hadoop103 bash  #进入该容器
ssh-keygen -t rsa  #生成密钥
cd /root/.ssh/ && cat id_rsa.pub > authorized_keys #复制公钥到authorized_keys中
cat authorized_keys #复制到其他文档中保存

3. 运行hadoop104容器

bash 复制代码
docker run --name hadoop104 -d -h hadoop104 sequenceiq/hadoop-docker
bash 复制代码
docker exec -it hadoop104 bash  #进入该容器
ssh-keygen -t rsa  #生成密钥
cd /root/.ssh/ && cat id_rsa.pub > authorized_keys #复制公钥到authorized_keys中
cat authorized_keys #复制到其他文档中保存

4.将三个密钥全部复制到authorized_keys文件

bash 复制代码
vi authorized_keys #将hadoop102 hadoop103 hadoop104复制到这一个文件中,在hadoop102 hadoop103 hadoop104中都保存起来

cat > authorized_keys << EOF
ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAx01kz5PaWIU/oV7AuBZkiwQ52EVO61GDgRCsNUQd7GgdFKpCENCbDIRP9IvAumN0rdxloxQd2WMaEWTycHfeu8tsECihb4iIywDSx1XiQHrik+CUBPggCWdzJknZQgNSmMGRd4129r1bJO8lra100QoK/YpSYQY7Odapd1dg3dhrcEf2D59a/gvX8yHnYkBAuFNAm/HXijTNHq5TezoIOo63WT9BCWTQ4DLBmLdIZsOrZzHkNJCFpRgHTbEGX+h0vzqXhGXlkA9IMdrwFuAc5lTuazK9wwXgjmAq9M38Zh3ithr/LYB2VaHyUCXzNB44e1fGQ8+O946XerO8IO8r9w== root@hadoop102
ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAyctJZBLE1pktU8jv2983inSZazyV0v49Dpzt6+z9K3E5JlBuNvM806mMzLhvta+eXnu7Th70fE20vUNukrjRCpZUv/zxzuKKadSLr76xQEsjP/ZnmUVsJfJU4Q/ThRR32TiDRDdnCf0nM/pi/eXePrbgZLxMNd2+r0o65++th6mDMV8Y/lGAxGBYIAsI3xJwEx+6/Ok9h5AlvtEezKWkI6JhpEjpTwncVEdk16vR8WJ51JsnYwszSenRmyOFmcT+O7qU0phpy/FvwlDzDen54nrssqj1vB+yhlkCdlfLN5PQ5nJIPE0sAwTxaFOLqEbLf1mLrtO9iTx+UwboGCxoRQ== root@hadoop103
ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAz7cQxzbxJoFMWVchuFdcCDY3nD8jb4lRZPTH+5+J4iFP1vEIADj18rMlfQy/d+c+v3XrFfNVvlrawKFVals5LcfVd97eEEX1g2KRxsM07aqPibVYGwturFpiu3pFKwI9j2hYwa4uWVM+5VCR49sgAxHce4jmlf/ZpLz5FxxqVYyRvQ3GoN8KsqDH7CwDyqnbgnrgJw7RE0d7nVOdfgXif1VXPPjPTzRUxMJKtyP1Ja+Sym/sxRJwOuEGEqNCg6WHYZ4OFArtWKMliDv3hRCUvJK9LlWu9+aeCHGrA6qJApaQ/vAOdzsSBj1IGzgDttYO4uxNkQavwazw/DHtRA9swQ== root@hadoop104
EOF

5. 配置地址映射

bash 复制代码
cat >> /etc/hosts << EOF
172.17.0.2    hadoop102 #ip是容器的IP
172.17.0.3    hadoop103 #ip是容器的IP
172.17.0.4    hadoop104 #ip是容器的IP
EOF

6.检查ssh是否成功

bash 复制代码
#在hadoop102 hadoop103 hadoop104都测试
ssh hadoop102
ssh hadoop103
ssh hadoop104

三、配置Hadoop

hadoop目录安装在:/usr/local/hadoop-2.7.0/etc/hadoop

1. core-site.xml

bash 复制代码
#在hadoop102容器中执行
cd /usr/local/hadoop-2.7.0/etc/hadoop
cat  > core-site.xml << EOF
<configuration>
   <property> 
      <name>fs.defaultFS</name> 
      <value>hdfs://hadoop102:8020</value> 
   </property> 
    <property>
    <name>hadoop.tmp.dir</name>
    <value>/opt/data/hadoop</value>
    </property>
   <property> 
      <name>hadoop.http.staticuser.user</name> 
      <value>root</value> 
   </property>
</configuration>
EOF

2. hdfs-site.xml

bash 复制代码
#在hadoop102容器中执行
cd /usr/local/hadoop-2.7.0/etc/hadoop
cat > hdfs-site.xml << EOF
<configuration>
    <property>
        <name>dfs.namenode.http-address</name> 
        <value>hadoop102:9870</value> 
    </property> 
</configuration>
EOF

3. yarn-site.xml

bash 复制代码
#在hadoop102容器中执行 注意:还有其他需要编辑
cd /usr/local/hadoop-2.7.0/etc/hadoop
vi yarn-site.xml
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>hadoop103</value> 
    </property>

4. mapred-site.xml

bash 复制代码
cd /usr/local/hadoop-2.7.0/etc/hadoop
cat > mapred-site.xml << EOF
<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
</configuration>
EOF

5. 分发文件

bash 复制代码
cd /usr/local/hadoop-2.7.0/etc/hadoop
scp /usr/local/hadoop-2.7.0/etc/hadoop/core-site.xml hadoop103:/usr/local/hadoop-2.7.0/etc/hadoop
scp /usr/local/hadoop-2.7.0/etc/hadoop/hdfs-site.xml hadoop103:/usr/local/hadoop-2.7.0/etc/hadoop
scp /usr/local/hadoop-2.7.0/etc/hadoop/yarn-site.xml hadoop103:/usr/local/hadoop-2.7.0/etc/hadoop

scp /usr/local/hadoop-2.7.0/etc/hadoop/core-site.xml hadoop104:/usr/local/hadoop-2.7.0/etc/hadoop
scp /usr/local/hadoop-2.7.0/etc/hadoop/hdfs-site.xml hadoop104:/usr/local/hadoop-2.7.0/etc/hadoop
scp /usr/local/hadoop-2.7.0/etc/hadoop/yarn-site.xml hadoop104:/usr/local/hadoop-2.7.0/etc/hadoop

四、启动集群

1. 配置slaves文件

bash 复制代码
cd /usr/local/hadoop-2.7.0/etc/hadoop
cat > slaves << EOF
hadoop102
hadoop103
hadoop104
EOF

2. 发送到其他节点

bash 复制代码
scp /usr/local/hadoop-2.7.0/etc/hadoop/slaves hadoop103:/usr/local/hadoop-2.7.0/etc/hadoop
scp /usr/local/hadoop-2.7.0/etc/hadoop/slaves hadoop104:/usr/local/hadoop-2.7.0/etc/hadoop

3. 格式化文件系统

bash 复制代码
cd /usr/local/hadoop-2.7.0/bin
./hadoop namenode -format

4. 在hadoop102启动hdfs

bash 复制代码
cd /usr/local/hadoop-2.7.0/sbin
./start-all.sh

5. 在hadoop103启动yarn

bash 复制代码
cd /usr/local/hadoop-2.7.0/sbin
./start-yarn.sh

6.访问验证

bash 复制代码
#关闭防火墙
systemctl stop firewalld
systemctl disable firewalld

#关闭selinux
sed -i 's/enforcing/disabled/' /etc/selinux/config # 永久
setenforce 0 # 临时

sysctl net.ipv4.ip_forward  #启用IP_FORWARD
systemctl restart docker
docker start hadoop102  hadoop103  hadoop104

访问Hadoop102:9870,查看是否能够看到hdfs界面

访问hadoop103:8088,查看能够看到yarn界面

五 案例

1. 执行一些hdfs命令

bash 复制代码
cat >> /root/.bashrc << EOF
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
EOF

source /root/.bashrc

hadoop fs -ls /
hadoop fs -mkdir /hadoop
hadoop fs -ls /

2. 上传文件到hdfs上

bash 复制代码
hadoop fs -put word.txt /hadoop
hadoop fs -ls /hadoop

3. 执行wordcount案例

bash 复制代码
cd /usr/local/hadoop
./bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.0.jar wordcount /hadoop/word.txt /output

在yarn上可以看到执行情况

五、关闭集群

bash 复制代码
#hadoop102上
stop-dfs.sh

#hadoop103上:
stop-yarn.sh
相关推荐
Johny_Zhao44 分钟前
Docker + CentOS 部署 Zookeeper 集群 + Kubernetes Operator 自动化运维方案
linux·网络安全·docker·信息安全·zookeeper·kubernetes·云计算·系统运维
zwjapple1 小时前
docker-compose一键部署全栈项目。springboot后端,react前端
前端·spring boot·docker
代码老y3 小时前
Docker:容器化技术的基石与实践指南
运维·docker·容器
DuelCode4 小时前
Windows VMWare Centos Docker部署Springboot 应用实现文件上传返回文件http链接
java·spring boot·mysql·nginx·docker·centos·mybatis
掘金-我是哪吒8 小时前
分布式微服务系统架构第156集:JavaPlus技术文档平台日更-Java线程池使用指南
java·分布式·微服务·云原生·架构
亲爱的非洲野猪8 小时前
Kafka消息积压的多维度解决方案:超越简单扩容的完整策略
java·分布式·中间件·kafka
活跃家族8 小时前
分布式压测
分布式
杨浦老苏9 小时前
开源服务运行监控工具Lunalytics
docker·群晖·网站监控
前端世界10 小时前
HarmonyOS开发实战:鸿蒙分布式生态构建与多设备协同发布全流程详解
分布式·华为·harmonyos
DavidSoCool10 小时前
RabbitMQ使用topic Exchange实现微服务分组订阅
分布式·微服务·rabbitmq