let's go ! 纯小白跟着上手部署hadoop集群

hadoop部署

少年我看你骨骼惊奇,是个练武奇才,是不是想要学习hadoop大数据啊,但是苦于没有一个起步的hadoop分布式集群环境,没关系,老夫帮你一把,桀桀桀桀~

环境介绍

hadoop-master 192.168.186.132 4G内存 60G磁盘

hadoop-slave-1 192.168.186.133 4G内存 60G磁盘

hadoop-slave-2 192.168.186.134 4G内存 60G磁盘

环境配置

windows环境配置

VM环境配置

虚拟机环境配置

注意,修改配置文件之前,如果没有把握,请备份配置文件

bash 复制代码
cp 配置文件 配置文件.bak

ip地址配置文件路径

bash 复制代码
/etc/sysconfig/network-scripts/ifcfg-ens33

命令:

csharp 复制代码
[root@localhost ~]# cd /etc/sysconfig/network-scripts/
[root@localhost network-scripts]# ls
ifcfg-ens33  ifdown-ppp       ifup-ib      ifup-Team
ifcfg-lo     ifdown-routes    ifup-ippp    ifup-TeamPort
ifdown       ifdown-sit       ifup-ipv6    ifup-tunnel
ifdown-bnep  ifdown-Team      ifup-isdn    ifup-wireless
ifdown-eth   ifdown-TeamPort  ifup-plip    init.ipv6-global
ifdown-ib    ifdown-tunnel    ifup-plusb   network-functions
ifdown-ippp  ifup             ifup-post    network-functions-ipv6
ifdown-ipv6  ifup-aliases     ifup-ppp
ifdown-isdn  ifup-bnep        ifup-routes
ifdown-post  ifup-eth         ifup-sit
[root@localhost network-scripts]# cp ifcfg-ens33 ifcfg-ens33.bak
[root@localhost network-scripts]# vim ifcfg-ens33
​

配置:

hadoop-master

ini 复制代码
TYPE="Ethernet"
PROXY_METHOD="none"
BROWSER_ONLY="no"
BOOTPROTO="static"
DEFROUTE="yes"
IPV4_FAILURE_FATAL="no"
IPV6INIT="yes"
IPV6_AUTOCONF="yes"
IPV6_DEFROUTE="yes"
IPV6_FAILURE_FATAL="no"
IPV6_ADDR_GEN_MODE="stable-privacy"
NAME="ens33"
UUID="87b5b6e3-3bfe-4ce1-b196-3c9f39fd57e3"
DEVICE="ens33"
ONBOOT="yes"
IPADDR=192.168.186.132
GATEWAY=192.168.186.2
DNS1=192.168.186.2
​

配置完成重启网卡,查看ip地址

ini 复制代码
[root@localhost network-scripts]# systemctl restart network
[root@localhost network-scripts]# ifconfig
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.186.132  netmask 255.255.255.0  broadcast 192.168.186.255
        inet6 fe80::5c58:e4b8:19b:d6fc  prefixlen 64  scopeid 0x20<link>
        ether 00:0c:29:ba:1b:7a  txqueuelen 1000  (Ethernet)
        RX packets 106  bytes 20241 (19.7 KiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 183  bytes 21886 (21.3 KiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

hadoop-slave1

切到 hadoop-slave1虚拟机上,同样切换到配置ip的位置,备份,配置网卡配置文件

ini 复制代码
TYPE="Ethernet"
PROXY_METHOD="none"
BROWSER_ONLY="no"
BOOTPROTO="static"
DEFROUTE="yes"
IPV4_FAILURE_FATAL="no"
IPV6INIT="yes"
IPV6_AUTOCONF="yes"
IPV6_DEFROUTE="yes"
IPV6_FAILURE_FATAL="no"
IPV6_ADDR_GEN_MODE="stable-privacy"
NAME="ens33"
UUID="87b5b6e3-3bfe-4ce1-b196-3c9f39fd57e3"
DEVICE="ens33"
ONBOOT="yes"
IPADDR=192.168.186.133
GATEWAY=192.168.186.2
DNS1=192.168.186.2

配置完成重启网卡,查看ip地址

ruby 复制代码
[root@localhost network-scripts]# systemctl restart network
[root@localhost network-scripts]# ifconfig
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.186.133  netmask 255.255.255.0  broadcast 192.168.186.255
        inet6 fe80::9c42:f21:d495:52cd  prefixlen 64  scopeid 0x20<link>
        ether 00:0c:29:89:c5:b3  txqueuelen 1000  (Ethernet)
        RX packets 95  bytes 26800 (26.1 KiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 168  bytes 19298 (18.8 KiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

hadoop-slave2

切到 hadoop-slave1虚拟机上,同样切换到配置ip的位置,备份,配置网卡配置文件

ini 复制代码
TYPE="Ethernet"
PROXY_METHOD="none"
BROWSER_ONLY="no"
BOOTPROTO="static"
DEFROUTE="yes"
IPV4_FAILURE_FATAL="no"
IPV6INIT="yes"
IPV6_AUTOCONF="yes"
IPV6_DEFROUTE="yes"
IPV6_FAILURE_FATAL="no"
IPV6_ADDR_GEN_MODE="stable-privacy"
NAME="ens33"
UUID="87b5b6e3-3bfe-4ce1-b196-3c9f39fd57e3"
DEVICE="ens33"
ONBOOT="yes"
IPADDR=192.168.186.134
GATEWAY=192.168.186.2
DNS1=192.168.186.2

配置完成重启网卡,查看ip地址

ini 复制代码
[root@localhost network-scripts]# systemctl restart network
[root@localhost network-scripts]# ifconfig
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.186.134  netmask 255.255.255.0  broadcast 192.168.186.255
        inet6 fe80::48b5:734d:aed4:c4c7  prefixlen 64  scopeid 0x20<link>
        ether 00:0c:29:2c:44:30  txqueuelen 1000  (Ethernet)
        RX packets 83749  bytes 122190622 (116.5 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 9869  bytes 622244 (607.6 KiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 

配置主机名称映射

使用host名称代替ip地址,保证之后修改ip不会出现需要多出修改的问题,类似变量的原理,方便项目维护。

makefile 复制代码
127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.186.132    hadoopmaster
192.168.186.133    hadoopslave1
192.168.186.134    hadoopslave2

三台服务器都需要配置,配置内容相同

关闭防火墙

arduino 复制代码
systemctl stop firewalld
systemctl disable firewalld

三台服务器都做

测试效果

重启三台服务器

ping hadoopmaster
ping hadoopslave1
ping hadoopslave2

远程工具

MobaXterm

添加hadoop用户

bash 复制代码
useradd -u 8080 hadoop; echo 123 | passwd --stdin hadoop

三台服务器都执行

root账号ssh授权

在hadoop-master上生成ssh密钥,然后分配给slave端,保证root用户可以无密码登录slave

生成ssh证书

vbnet 复制代码
[root@localhost ~]# ssh-keygen
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
Created directory '/root/.ssh'.
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
SHA256:lY/2eq9g6KIHNqknTw9TSuvIK38T8c1SEoY0gh9iXwM root@localhost.localdomain
The key's randomart image is:
+---[RSA 2048]----+
| ..E+.           |
|o....+o    .     |
|.o......  o      |
|  ... . .. o     |
|    .+.=S o .    |
|   .*+o oo .     |
|   o*+ .. o .    |
|..o+++o. . o.    |
| o=*++... ...o.  |
+----[SHA256]-----+
[root@localhost ~]#
​

发布给slave服务器(192.168.186.133,192.168.186.134)

这里使用的是上面配置的host映射

192.168.186.133( hadoopslave1)

vbnet 复制代码
[root@localhost ~]# ssh-copy-id root@hadoopslave1
/usr/bin/ssh-copy-id: INFO: Source of key(s) to be installed: "/root/.ssh/id_rsa.pub"
The authenticity of host 'hadoop_slave_1 (192.168.186.133)' can't be established.
ECDSA key fingerprint is SHA256:owdLajW8eOr/KAguiSiZso/BHYZcNj/btN6IUoLo0KE.
ECDSA key fingerprint is MD5:3e:c1:1e:b0:a2:58:4d:01:4e:df:0c:b8:2b:d9:a3:30.
Are you sure you want to continue connecting (yes/no)? yes
/usr/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any that are already installed
/usr/bin/ssh-copy-id: INFO: 1 key(s) remain to be installed -- if you are prompted now it is to install the new keys
root@hadoop_slave_1's password:
​
Number of key(s) added: 1
​
Now try logging into the machine, with:   "ssh 'root@hadoopslave1'"
and check to make sure that only the key(s) you wanted were added.
​
[root@localhost ~]#
​

192.168.186.134( hadoopslave2)

vbnet 复制代码
[root@localhost ~]# ssh-copy-id root@hadoopslave2
/usr/bin/ssh-copy-id: INFO: Source of key(s) to be installed: "/root/.ssh/id_rsa.pub"
The authenticity of host 'hadoop_slave_2 (192.168.186.134)' can't be established.
ECDSA key fingerprint is SHA256:QhQzOZf/7O2KclyVp4hnLkRgZs7ttSTIK2vyIiBSYP8.
ECDSA key fingerprint is MD5:8e:6e:ab:68:81:bc:82:fb:69:80:f9:ef:d8:72:4f:fc.
Are you sure you want to continue connecting (yes/no)? yes
/usr/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any that are already installed
/usr/bin/ssh-copy-id: INFO: 1 key(s) remain to be installed -- if you are prompted now it is to install the new keys
root@hadoop_slave_2's password:
​
Number of key(s) added: 1
​
Now try logging into the machine, with:   "ssh 'root@hadoopslave2'"
and check to make sure that only the key(s) you wanted were added.
​
[root@localhost ~]#

安装JDK1.8

清除之前的java版本

arduino 复制代码
yum -y remove java-*

安装新的jdk

rpm -ivh jdk-8u191-linux-x64.rpm

测试安装

查看java安装的路径

bash 复制代码
rpm -pql /opt/jdk-8u191-linux-x64.rpm | head
warning: /opt/jdk-8u191-linux-x64.rpm: Header V3 RSA/SHA256 Signature, key ID ec551f03: NOKEY
/usr
/usr/java
/usr/java/jdk1.8.0_191-amd64
/usr/java/jdk1.8.0_191-amd64/.java
/usr/java/jdk1.8.0_191-amd64/.java/.systemPrefs
/usr/java/jdk1.8.0_191-amd64/.java/.systemPrefs/.system.lock
/usr/java/jdk1.8.0_191-amd64/.java/.systemPrefs/.systemRootModFile
/usr/java/jdk1.8.0_191-amd64/.java/init.d
/usr/java/jdk1.8.0_191-amd64/.java/init.d/jexec
/usr/java/jdk1.8.0_191-amd64/COPYRIGHT

三台服务器都安装java

安装hadoop

切换账号进行安装

hadoop-master:192.168.186.133

ruby 复制代码
[root@localhost opt]# su hadoop
[hadoop@localhost opt]$ ls
jdk-8u191-linux-x64.rpm  rh
[hadoop@localhost opt]$ cd /home/hadoop/
[hadoop@localhost ~]$ pwd
/home/hadoop
[hadoop@localhost ~]$

解压hadoop包

tar -xvf hadoop-3.0.0.tar.gz

创建数据目录

arduino 复制代码
mkdir -p /home/hadoop/dfs/name  /home/hadoop/dfs/data /home/hadoop/tmp

修改配置文件

hadoop配置文件位置

bash 复制代码
cd /home/hadoop/hadoop-3.0.0/etc/hadoop

1、修改hadoop采用的java环境变量

配置文件:hadoop-env.sh

命令:vim hadoop-env.sh export JAVA_HOME=/usr/java/jdk1.8.0_191-amd64

2、配置hadoop WEB界面地址

配置文件:core-site.xml

命令:vim core-site.xml

xml 复制代码
<configuration>
 <property>
      <name>fs.defaultFS</name>
          <value>hdfs://hadoopmaster:9000</value>
 </property>
​
 <property>
     <name>io.file.buffer.size</name>
         <value>13107</value>
 </property>
​
 <property>
     <name>hadoop.tmp.dir</name>
         <value>file:/home/hadoop/tmp</value>
             <description>Abase for other temporary directories.</description>
 </property>
</configuration>

3、配置hdfs-site

配置文件:hdfs-site.xml

命令:vim hdfs-site.xml

xml 复制代码
<configuration>
  <property>
      <name>dfs.namenode.secondary.http-address</name>
      <value>hadoopmaster:9001</value>  
  </property>
​
  <property>
      <name>dfs.namenode.name.dir</name>
      <value>file:/home/hadoop/dfs/name</value>
  </property>
 <property>
    <name>dfs.datanode.data.dir</name>
        <value>file:/home/hadoop/dfs/data</value>
 </property>
​
 <property>
    <name>dfs.replication</name>
    <value>2</value>    
 </property>
​
 <property>
    <name>dfs.webhdfs.enabled</name>
        <value>true</value>
 </property>
​
</configuration>

4、配置map-reduce配置

配置文件:mapred-site.xml

命令:vim mapred-site.xml

xml 复制代码
<configuration>
 <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
 </property>
​
 <property>
     <name>mapreduce.jobhistory.address</name>
     <value>0.0.0.0:10020</value>
 </property>
​
 <property>
     <name>mapreduce.jobhistory.webapp.address</name>
     <value>0.0.0.0:19888</value>
 </property>
</configuration>

5、配置历史服务器

Hadoop自带了一个历史服务器,可以通过历史服务器查看已经运行完的Mapreduce作业记录,比如用了多少个Map、用了多少个Reduce、作业提交时间、作业启动时间、作业完成时间等信息。默认情况下,Hadoop历史服务器是没有启动的,我们可以通过下面的命令来启动Hadoop历史服务器

bash 复制代码
mkdir -p /home/hadoop/hadoop-3.0.0/logs/
/home/hadoop/hadoop-3.0.0/sbin/mr-jobhistory-daemon.sh  start historyserver

6、配置节点

配置文件:yarn-site.xml

命令:vim yarn-site.xml

ruby 复制代码
<property>
     <name>yarn.nodemanager.aux-services</name>
     <value>mapreduce_shuffle</value>
 </property>
​
 <property>
     <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
     <value>org.apache.hadoop.mapred.ShuffleHandler</value>
 </property>
​
 <property>
    <name>yarn.resourcemanager.address</name>
    <value>hadoopmaster:8032</value>
 </property>
​
 <property>
     <name>yarn.resourcemanager.scheduler.address</name>
         <value>hadoopmaster:8030</value>
 </property>
​
 <property>
     <name>yarn.resourcemanager.resource-tracker.address</name>
     <value>hadoopmaster:8031</value>
 </property>
​
 <property>
    <name>yarn.resourcemanager.admin.address</name>
    <value>hadoopmaster:8033</value>
 </property>
​
 <property>
    <name>yarn.resourcemanager.webapp.address</name>
    <value>hadoopmaster:8088</value>
 </property>
<property>
<name>yarn.application.classpath</name>
<value>/home/hadoop/hadoop-3.0.0/etc/hadoop:/home/hadoop/hadoop-3.0.0/share/hadoop/common/lib/*:/home/hadoop/hadoop-3.0.0/share/hadoop/common/*:/home/hadoop/hadoop-3.0.0/share/hadoop/hdfs:/home/hadoop/hadoop-3.0.0/share/hadoop/hdfs/lib/*:/home/hadoop/hadoop-3.0.0/share/hadoop/hdfs/*:/home/hadoop/hadoop-3.0.0/share/hadoop/mapreduce/*:/home/hadoop/hadoop-3.0.0/share/hadoop/yarn:/home/hadoop/hadoop-3.0.0/share/hadoop/yarn/lib/*:/home/hadoop/hadoop-3.0.0/share/hadoop/yarn/* </value>
</property>

7、配置datanode

配置文件:workers

命令:vim workers

hadoopslave1
hadoopslave2

8、设置master无密码ssh登录

go 复制代码
su hadoop
ssh-keygen
ssh-copy-id hadoopslave1
ssh-copy-id hadoopslave2

注意:中间需要输入远端的密码

配置节点

修改权限

bash 复制代码
chown hadoop:hadoop -R /home/hadoop/hadoop-3.0.0

复制到子节点

ruby 复制代码
su hadoop
scp -r /home/hadoop/hadoop-3.0.0 hadoop@hadoopmaster:/home/hadoop
scp -r /home/hadoop/hadoop-3.0.0 hadoop@hadoopslave1:/home/hadoop
scp -r /home/hadoop/hadoop-3.0.0 hadoop@hadoopslave2:/home/hadoop

初始化(格式化node)

bash 复制代码
/home/hadoop/hadoop-3.0.0/bin/hdfs namenode -format

启动各项服务

bash 复制代码
cd /home/hadoop/hadoop-3.0.0/sbin

hdfs

bash 复制代码
./start-dfs.sh

分布式计算yarn

bash 复制代码
./start-yarn.sh

注意 start-dfs.shstart-yarn.sh 这两个脚本可用start-all.sh代替,即:

bash 复制代码
./start-all.sh

查看状态

命令行查看

bash 复制代码
/home/hadoop/hadoop-3.0.0/bin/hdfs dfsadmin -report
yaml 复制代码
Live datanodes (3):
​
Name: 192.168.186.132:9866 (hadoopmaster)
Hostname: hadoopmaster
Decommission Status : Normal
Configured Capacity: 59917819904 (55.80 GB)
DFS Used: 4096 (4 KB)
Non DFS Used: 5775724544 (5.38 GB)
DFS Remaining: 54142091264 (50.42 GB)
DFS Used%: 0.00%
DFS Remaining%: 90.36%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Thu Mar 07 06:30:53 PST 2024
Last Block Report: Thu Mar 07 06:30:08 PST 2024
​
​
Name: 192.168.186.133:9866 (hadoopslave1)
Hostname: hadoopslave1
Decommission Status : Normal
Configured Capacity: 59917819904 (55.80 GB)
DFS Used: 4096 (4 KB)
Non DFS Used: 6568022016 (6.12 GB)
DFS Remaining: 53349793792 (49.69 GB)
DFS Used%: 0.00%
DFS Remaining%: 89.04%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Thu Mar 07 06:30:53 PST 2024
Last Block Report: Thu Mar 07 06:30:02 PST 2024
​
​
Name: 192.168.186.134:9866 (hadoopslave2)
Hostname: hadoopslave2
Decommission Status : Normal
Configured Capacity: 59917819904 (55.80 GB)
DFS Used: 4096 (4 KB)
Non DFS Used: 6303846400 (5.87 GB)
DFS Remaining: 53613969408 (49.93 GB)
DFS Used%: 0.00%
DFS Remaining%: 89.48%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Thu Mar 07 06:30:53 PST 2024
Last Block Report: Thu Mar 07 06:30:02 PST 2024

WEB界面查看

bash 复制代码
http://192.168.186.132:9870/dfshealth.html#tab-datanode

突然发布hadoop相关的文章是因为最近想基于scrapy+hadoop+hbase+spark做爬虫数据分析这方面的思考,这次部署大概尝试了6次重来,还请给位有兴趣的朋友和大佬多多指点。如果想上手的小伙伴可以联系作者,给大家分享下载好的安装包。

相关推荐
littlegirll几秒前
KADB支持arm架构Pro*c
c语言·开发语言·架构
.Ayang11 小时前
微服务介绍
网络·安全·网络安全·微服务·云原生·架构·安全架构
风虎云龙科研服务器12 小时前
GPU服务器厂家:科研服务器领域机遇与博弈,AMD 新UDNA 架构
运维·服务器·架构
好看资源平台14 小时前
网络爬虫——分布式爬虫架构
分布式·爬虫·架构
JoyousHorse14 小时前
单体架构、集群架构和分布式架构概述
分布式·架构·软件工程·软考·系统架构设计师
yuguo.im16 小时前
Nginx 架构与设计
运维·nginx·架构
Yimuzhizi16 小时前
《企业网络安全架构与实战指南:从蓝队防御到零信任网络部署》
网络·人工智能·安全·web安全·搜索引擎·网络安全·架构
材料苦逼不会梦到计算机白富美16 小时前
极客时间 《Redis核心技术与实战》01 | 基本架构:一个键值数据库包含什么?知识总结
数据库·redis·架构
wclass-zhengge16 小时前
07架构面试题
架构
流着口水看上帝17 小时前
微服务系统架构图
微服务·架构·系统架构