Linux Hadoop平台伪分布式安装

Linux Hadoop 伪分布式安装

    • [1. JDK](#1. JDK)
    • [2. Hadoop](#2. Hadoop)
    • [3. Mysql+Hive](#3. Mysql+Hive)
      • [3.1 Mysql8安装](#3.1 Mysql8安装)
      • [3.2 Hive安装](#3.2 Hive安装)
    • [4. Spark](#4. Spark)
      • [4.1 Maven安装](#4.1 Maven安装)
      • [4.2 Scala安装](#4.2 Scala安装)
      • [4.3 Spark编译并安装](#4.3 Spark编译并安装)
    • [5. Zookeeper](#5. Zookeeper)
    • [6. HBase](#6. HBase)

版本概要:

  • jdk: jdk-8u391-linux-x64.tar.gz
  • hadoop:hadoop-3.3.1.tar.gz
  • hive:apache-hive-3.1.2-bin.tar.g
  • mysql:mysql-8.0.27-1.el7.x86_64.rpm-bundle.tar
  • maven:apache-maven-3.5.4-bin.tar.gz
  • scala:scala-2.11.12.tgz
  • spark:spark-2.4.5.tgz
  • zookeeper:zookeeper-3.4.10.tar.gz
  • hbase:hbase-2.4.12-bin.tar.gz

1. JDK

JDK下载:https://www.oracle.com/java/technologies/downloads/

shell 复制代码
# 解压
[root@sole install]# tar -zxvf jdk-8u391-linux-x64.tar.gz -C /opt/software/
##########################################################################################
# 编辑环境变量
[root@sole ~]# vi /etc/profile.d/my.sh
# JAVA_HOME
export JAVA_HOME=/opt/software/jdk1.8.0_391
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib:$CLASSPATH
export JAVA_PATH=${JAVA_HOME}/bin:${JRE_HOME}/bin
export PATH=$PATH:${JAVA_PATH}

# 重新加载
[root@sole ~]# source /etc/profile

2. Hadoop

下载地址:https://archive.apache.org/dist/

shell 复制代码
# 解压
[root@sole install]# tar -zxvf hadoop-3.3.1.tar.gz -C /opt/software/
##########################################################################################
# 编辑环境变量
[root@sole ~]# vi /etc/profile.d/my.sh
# HADOOP_HOME
export HADOOP_HOME=/opt/software/hadoop-3.3.1
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
# 重新加载
[root@sole ~]# source /etc/profile
shell 复制代码
# Hadoop配置修改
[root@sole software]# cd hadoop-3.3.1/etc/hadoop/

[root@sole hadoop]# vi hadoop-env.sh
export JAVA_HOME=/opt/software/jdk1.8.0_391
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
shell 复制代码
[root@sole hadoop]# vi core-site.xml
xml 复制代码
<configuration>
<!-- 指定Hadoop所使用的文件系统schema(URL),HDFS的老大(NameNode)的地址 -->
<property>
    <name>fs.defaultFS</name>
    <value>hdfs://sole:9000</value>
</property>
<!-- 指定Hadoop运行时产生文件的储存目录,默认是/tmp/hadoop-${user.name} -->
<property>
    <name>hadoop.tmp.dir</name>
    <value>/opt/software/hadoop-3.3.1/hadoopdata</value>  
</property>
</configuration>
shell 复制代码
[root@sole hadoop]# vi hdfs-site.xml 
xml 复制代码
<configuration>
<!-- 指定HDFS副本的数量 -->
<property>
    <name>dfs.replication</name>
    <value>1</value>
</property>
<property>
    <name>dfs.namenode.name.dir</name>
    <value>/opt/software/hadoop-3.3.1/tmp/name</value>
</property>
<property>
    <name>dfs.datanode.data.dir</name>
    <value>/opt/software/hadoop-3.3.1/tmp/data</value>
</property>
</configuration>
shell 复制代码
[root@sole hadoop]# vi yarn-site.xml 
xml 复制代码
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
</property>
<property>
    <name>yarn.resourcemanager.address</name>
    <value>sole:18040</value>
</property>
<property>
    <name>yarn.resourcemanager.scheduler.address</name>
    <value>sole:18030</value>
</property>
<property>
    <name>yarn.resourcemanager.resource-tracker.address</name>
    <value>sole:18025</value>
</property></property>
<property>
    <name>yarn.resourcemanager.admin.address</name>
    <value>sole:18141</value>
</property>
<property>
    <name>yarn.resourcemanager.webapp.address</name>
    <value>sole:18088</value>
</property>
</configuration>
shell 复制代码
[root@sole hadoop]# vi mapred-site.xml 
xml 复制代码
<configuration>
<!-- 指定mr运行时框架,这里指定在yarn上,默认是local -->
<property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
</property>
</configuration>

shell 复制代码
# 初始化,进到${HADOOP_HOME}/sbin目录下
[root@sole sbin]# pwd
/opt/software/hadoop-3.3.1/sbin
[root@sole sbin]# hdfs namenode -format

# 启动服务
[root@sole sbin]# start-dfs.sh
[root@sole sbin]# start-yarn.sh

3. Mysql+Hive

3.1 Mysql8安装

shell 复制代码
# 卸载已有mariadb服务,如果已安装过MySQL,则将旧的MySQL服务全部卸载再安装
[root@sole ~]# rpm  -qa|grep mariadb
[root@sole ~]# yum remove mariadb-libs

MySQL 8.0.27 tar包下载:https://downloads.mysql.com/archives/community/


shell 复制代码
[root@sole ~]# tar -xvf mysql-8.0.27-1.el7.x86_64.rpm-bundle.tar

shell 复制代码
# 为了避免安装过程中报错,提前安装好以下依赖
[root@sole ~]#  -y install libaio
[root@sole ~]# yum install openssl-devel.x86_64 openssl.x86_64 -y
[root@sole ~]# yum -y install autoconf
[root@sole ~]# yum install perl.x86_64 perl-devel.x86_64 -y
[root@sole ~]# yum install perl-JSON.noarch -y
[root@sole ~]# yum install perl-Test-Simple
[root@sole ~]# yum install net-tools
shell 复制代码
# 安装mysql
[root@sole ~]# rpm -ivh mysql-community-common-8.0.27-1.el7.x86_64.rpm
[root@sole ~]# rpm -ivh mysql-community-client-plugins-8.0.27-1.el7.x86_64.rpm
[root@sole ~]# rmp -ivh mysql-community-libs-8.0.27-1.el7.x86_64.rpm
[root@sole ~]# rpm -ivh mysql-community-client-8.0.27-1.el7.x86_64.rpm
[root@sole ~]# rpm -ivh mysql-community-server-8.0.27-1.el7.x86_64.rpm
[root@sole ~]# rpm -ivh mysql-community-libs-compat-8.0.27-1.el7.x86_64.rpm
[root@sole ~]# rpm -ivh mysql-community-embedded-compat-8.0.27-1.el7.x86_64.rpm 
[root@sole ~]# rpm -ivh mysql-community-devel-8.0.27-1.el7.x86_64.rpm 
shell 复制代码
#启动MySQL
[root@sole ~]# mysqld --initialize --console
[root@sole ~]# chown -R mysql:mysql /var/lib/mysql/
[root@sole ~]# systemctl start mysqld.service
[root@sole ~]# systemctl status mysqld.service
[root@sole ~]# cat /var/log/mysqld.log |grep password  --查看临时密码
mysql 复制代码
# 修改密码&远程登录权限
mysql> alter user 'root'@'localhost' identified by 'root';
mysql> CREATE USER 'root'@'%' IDENTIFIED BY 'root';
mysql> GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' WITH GRANT OPTION;
mysql> FLUSH PRIVILEGES;
shell 复制代码
# MySQL字符集修改,最后添加配置如下
[root@sole ~]# vi /etc/my.cnf

[mysql.server]
default-character-set = utf8
[client]
default-character-set = utf8


#添加完重启服务
[root@sole ~]# service mysqld restart

3.2 Hive安装

下载地址:https://archive.apache.org/dist/

shell 复制代码
# 解压
[root@sole install]# tar -zxvf apache-hive-3.1.2-bin.tar.gz -C /opt/software/
##########################################################################################
# 编辑环境变量
[root@sole ~]# vi /etc/profile.d/my.sh
# HIVE_HOME
export HIVE_HOME=/opt/software/apache-hive-3.1.2-bin
export PATH=$HIVE_HOME/bin:$PATH
# 重新加载
[root@sole ~]# source /etc/profile
shell 复制代码
# Hive配置修改,进入${HIVE_HOME}/conf
[root@sole conf]# cp hive-env.sh.template hive-env.sh
[root@sole conf]# vi hive-env.sh

export JAVA_HOME=/opt/software/jdk1.8.0_391
export HADOOP_HOME=/opt/software/hadoop-3.3.1
export HIVE_CONF_DIR=/opt/software/apache-hive-3.1.2-bin/conf
export HIVE_AUX_JARS_PATH=/opt/software/apache-hive-3.1.2-bin/lib
shell 复制代码
[root@sole conf]# vi hive-site.xml 
xml 复制代码
<configuration>
    <property>
        <name>hive.metastore.warehouse.dir</name>
        <value>/user/hive/warehouse</value>
    </property>
    <property>
        <name>hive.exec.scratchdir</name>
        <value>/tmp_local/hive</value>
    </property>
    <property>
        <name>hive.metastore.local</name>
        <value>true</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionURL</name>
        <value>jdbc:mysql://sole:3306/hive?createDatabaseIfNotExist=true&amp;useSSL=false&amp;useUnicode=true&amp;characterEncoding=UTF-8&amp;allowPublicKeyRetrieval=true</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionDriverName</name>
        <value>com.mysql.cj.jdbc.Driver</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionUserName</name>
        <value>root</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionPassword</name>
        <value>root</value>
    </property>
    <property>
        <name>hive.cli.print.header</name>
        <value>true</value>
    </property>
    <property>
        <name>hive.cli.print.current.db</name>
        <value>true</value>
    </property>    
    <property>
        <name>hive.exec.mode.local.auto</name>
        <value>true</value>
    </property>
    <property>
        <name>hive.exec.dynamic.partition</name>
        <value>true</value>
    </property>
    <property>
        <name>hive.exec.dynamic.partition.mode</name>
        <value>nonstrict</value>
    </property>
    <property>
        <name>hive.support.concurrency</name>
        <value>true</value>
    </property>
    <property>
        <name>hive.txn.manager</name>
        <value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
    </property>
    <property>
         <name>hive.compactor.initiator.on</name>
         <value>true</value>
    </property>
    <property>
          <name>hive.compactor.worker.threads</name>
          <value>1</value>
    </property>
</configuration>
shell 复制代码
# MySQL驱动
[root@sole install]# cp mysql-connector-j-8.0.33.jar /opt/software/apache-hive-3.1.2-bin/lib/

MySQL驱动网页下载:https://downloads.mysql.com/archives/c-j/

Maven下载:

xml 复制代码
<dependency>
  <groupId>com.mysql</groupId>
  <artifactId>mysql-connector-j</artifactId>
  <version>8.0.33</version>
</dependency>
shell 复制代码
# 解决Hadoop和Hive的两个guava.jar版本冲突问题:
# 删除${HIVE_HOME}/lib中的guava-19.0.jar
# 并将${HADOOP_HOME}/share/hadoop/common/lib/guava-27.0-jre.jar复制到${HIVE_HOME}/lib下

[root@sole install]# cd /opt/software/apache-hive-3.1.2-bin/lib/
[root@sole lib]# rm -f guava-19.0.jar 
[root@sole lib]# cp /opt/software/hadoop-3.3.1/share/hadoop/common/lib/guava-27.0-jre.jar ./

shell 复制代码
# 初始化元数据库
[root@sole bin]# ./schematool -dbType mysql -initSchema

# 启动服务 metastore & hiveserver2
[root@sole bin]# nohup hive --service metastore>hive.log 2>&1 &
[root@sole bin]# nohup hive --service hiveserver2>/dev/null 2>&1 &

4. Spark

下载地址:https://archive.apache.org/dist/

https://www.scala-lang.org/download/2.11.12.html




4.1 Maven安装

shell 复制代码
# 解压
[root@sole install]# tar -zxvf apache-maven-3.5.4-bin.tar.gz -C /opt/software
[root@sole software]# mv apache-maven-3.5.4/ maven-3.5.4
##########################################################################################
# 编辑环境变量
[root@sole ~]# vi /etc/profile.d/my.sh
# MAVEN_HOME
export MAVEN_HOME=/opt/software/maven-3.5.4
export PATH=$MAVEN_HOME/bin:$PATH

# 重新加载环境变量并测试maven
[root@sole ~]# source /etc/profile
[root@sole ~]# mvn -v
shell 复制代码
# 配置阿里云镜像
[root@sole software]# vi maven-3.5.4/conf/settings.xml
xml 复制代码
<mirror>
	<id>alimaven</id>
	<name>aliyun maven</name>
	<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
	<mirrorOf>central</mirrorOf>
</mirror>

4.2 Scala安装

shell 复制代码
# 解压
[root@sole install]# tar -zxvf scala-2.11.12.tgz -C /opt/software
##########################################################################################
# 编辑环境变量
[root@sole ~]# vi /etc/profile.d/my.sh
#SCALA_HOME
export SCALA_HOME=/opt/software/scala-2.11.12
export PATH=$SCALA_HOME/bin:$PATH

# 重新加载并测试
[root@sole ~]# source /etc/profile
[root@sole ~]# scala -version

4.3 Spark编译并安装

shell 复制代码
# 解压
[root@sole install]# tar -zxvf spark-2.4.5.tgz -C /opt/software
# 编辑配置文件
[root@sole ~]# vi /opt/software/dev/make-distribution.sh
shell 复制代码
#以下内容注释,并添加以下配置
#VERSION=$("$MVN" help:evaluate -Dexpression=project.version $@ 2>/dev/null\
#    | grep -v "INFO"\
#    | grep -v "WARNING"\
#    | tail -n 1)
#SCALA_VERSION=$("$MVN" help:evaluate -Dexpression=scala.binary.version $@ 2>/dev/null\
#    | grep -v "INFO"\
#    | grep -v "WARNING"\
#    | tail -n 1)
#SPARK_HADOOP_VERSION=$("$MVN" help:evaluate -Dexpression=hadoop.version $@ 2>/dev/null\
#    | grep -v "INFO"\
#    | grep -v "WARNING"\
#    | tail -n 1)
#SPARK_HIVE=$("$MVN" help:evaluate -Dexpression=project.activeProfiles -pl sql/hive $@ 2>/dev/null\
#    | grep -v "INFO"\
#    | grep -v "WARNING"\
#    | fgrep --count "<id>hive</id>";\
    # Reset exit status to 0, otherwise the script stops here if the last grep finds nothing\
    # because we use "set -o pipefail"
    echo -n)

# 设置版本信息
VERSION=2.4.5
SCALA_VERSION=2.11.12
SPARK_HADOOP_VERSION=3.3.1
SPARK_HIVE=3.1.2
shell 复制代码
# 修改${SPARK_HOME}/poml.xml中得<hadoop.version>
# <hadoop.version>2.6.5</hadoop.version>  --> <hadoop.version>3.3.1</hadoop.version>
shell 复制代码
[root@sole dev]# ./make-distribution.sh --name build --tgz -Phadoop-3.3 -Dhadoop.version=3.3.1 -Dscala-2.11 -DskipTests -Pyarn -Phive -Phive-thriftserver

# --name --tgz :是最后生成的包名,以及采用上面格式打包,比如,编译的是spark-2.4.5,那么最后编译成功后就会在 spark-2.4.5这个目录下生成 spark--bin-build.tgz
# -Pyarn: 表示支持yarn
# --Phadoop-3.3 :指定hadoop的主版本号
# -Dhadoop.version: 指定hadoop的子版本号
# -Phive -Phive-thriftserver:开启HDBC和Hive功能
# -Dscala-2.11 :指定scala版本
# -DskipTests :忽略测试过程

#还可以加上:
#  clean package:clean和package是编译目标。clean执行清理工作,比如清除旧打包痕迹,package用于编译和打包

shell 复制代码
# 解压
[root@sole spark-2.4.5]# tar -zxvf spark--bin-build.tgz -C /opt/software
shell 复制代码
# 配置
[root@sole spark--bin-build]# vi /etc/profile.d/my.sh 

#SPARK_HOME
export SPARK_HOME=/opt/software/spark--bin-build
export SPARK_CLASSPATH=$SPARK_HOME/jars
export PATH=$SPARK_HOME/bin:$PATH
##########################################################################################
[root@sole conf]# vi spark-env.sh

export JAVA_HOME=/opt/software/jdk1.8.0_391
export HADOOP_HOME=/opt/software/hadoop-3.3.1
export HADOOP_CONF_DIR=/opt/software/hadoop-3.3.1/etc/hadoop
export SCALA_HOME=/opt/software/scala-2.11.12
export SPARK_HOME=/opt/software/spark--bin-build
export SPARK_MASTER_IP=192.168.229.130
export SPARK_MASTER_PORT=7077
##########################################################################################
[root@sole conf]# cp slaves.template slaves
##########################################################################################
# 将hive-site.xml拷贝至该目录下
[root@sole conf]# cp /opt/software/apache-hive-3.1.2-bin/conf/hive-site.xml ./
##########################################################################################
#spark-shell/spark-sql启动提示 Unrecognized Hadoop major version number: 3.3.1
[root@sole conf]# vi common-version-info.properties
version=2.7.6 #版本信息设置成2和3都可以

编译参考:https://blog.csdn.net/qq_43591172/article/details/126575084


shell 复制代码
# 启动&测试
[root@sole ~]# sh /opt/software/spark--bin-build/sbin/start-all.sh
[root@sole ~]# spark-sql



5. Zookeeper

下载地址:https://archive.apache.org/dist/zookeeper/zookeeper-3.4.10/

shell 复制代码
# 解压
[root@sole install]# tar -zxvf zookeeper-3.4.10.tar.gz -C /opt/software/
##########################################################################################
# 编辑环境变量
[root@sole ~]# vi /etc/profile.d/my.sh
# ZK_HOME
export ZK_HOME=/opt/software/zookeeper-3.4.10
export PATH=$ZK_HOME/bin:$ZK_HOME/sbin:$PATH
# 重新加载
[root@sole ~]# source /etc/profile
shell 复制代码
# 配置
[root@sole install]# cd /opt/software/zookeeper-3.4.10/
# 在zookeeper的根目录下新建文件夹mydata
[root@sole zookeeper-3.4.10]# mkdir mydata
[root@sole zookeeper-3.4.10]# touch myid
[root@sole zookeeper-3.4.10]# echo "1" >> myid
##########################################################################################
[root@sole zookeeper-3.4.10]# cd /opt/software/zookeeper-3.4.10/conf
[root@sole conf]# cp zoo_sample.cfg zoo.cfg
[root@sole conf]# vi zoo.cfg

# 在zoo.cfg这个文件中,配置集群信息是存在一定的格式:service.N =YYY:A:B
# N:代表服务器编号(也就是myid里面的值);
# YYY:服务器地址/hostname;
# A:表示 Flower 跟 Leader的通信端口,简称服务端内部通信的端口(默认2888);
# B:表示 是选举端口(默认是3888);
dataDir=/opt/software/zookeeper-3.4.10/mydata
server.1=sole:2888:3888
shell 复制代码
# 启动服务
[root@sole ~]# zkServer.sh start

6. HBase

下载地址:https://archive.apache.org/dist/zookeeper/zookeeper-3.4.10/

shell 复制代码
# 解压
[root@sole install]# tar -zxvf hbase-2.4.12-bin.tar.gz -C /opt/software/
##########################################################################################
# 编辑环境变量
[root@sole ~]# vi /etc/profile.d/my.sh
#HBASE_HOME
export HBASE_HOME=/opt/software/hbase-2.4.12
export PATH=$HBASE_HOME/bin:$PATH
# 重新加载
[root@sole ~]# source /etc/profile
shell 复制代码
# 配置
[root@sole ~]# cd /opt/software/hbase-2.4.12/conf/
[root@sole conf]# vi hbase-env.sh

export JAVA_HOME=/opt/software/jdk1.8.0_391
shell 复制代码
[root@sole conf]# vi hbase-site.xml
xml 复制代码
<configuration>
  <property>
    <name>hbase.rootdir</name>
    <value>hdfs://sole:9000/hbase</value>
  </property>
  <property>
    <name>hbase.cluster.distributed</name>
    <value>true</value>
  </property>
  <property>
    <name>hbase.tmp.dir</name>
    <value>./tmp</value>
  </property>
  <property>
    <name>hbase.zookeeper.property.clientPort</name>
    <value>2181</value>
  </property>
  <property>
    <name>hbase.zookeeper.property.dataDir</name>
    <value>/opt/software/zookeeper-3.4.10/mydata</value>
  </property>
  <property>
    <name>hbase.unsafe.stream.capability.enforce</name>
    <value>false</value>
  </property>
</configuration>
shell 复制代码
# 启动服务
[root@sole ~]# start-hbase.sh 

PS:如果有写错或者写的不好的地方,欢迎各位大佬在评论区留下宝贵的意见或者建议,敬上!如果这篇博客对您有帮助,希望您可以顺手帮我点个赞!不胜感谢!

|-------------------------------------------------------|
| 原创作者:wsjslient |


相关推荐
数据智能老司机6 小时前
CockroachDB权威指南——CockroachDB SQL
数据库·分布式·架构
数据智能老司机7 小时前
CockroachDB权威指南——开始使用
数据库·分布式·架构
cg50177 小时前
Spring Boot 的配置文件
java·linux·spring boot
数据智能老司机7 小时前
CockroachDB权威指南——CockroachDB 架构
数据库·分布式·架构
IT成长日记8 小时前
【Kafka基础】Kafka工作原理解析
分布式·kafka
暮云星影8 小时前
三、FFmpeg学习笔记
linux·ffmpeg
rainFFrain8 小时前
单例模式与线程安全
linux·运维·服务器·vscode·单例模式
GalaxyPokemon8 小时前
Muduo网络库实现 [九] - EventLoopThread模块
linux·服务器·c++
mingqian_chu9 小时前
ubuntu中使用安卓模拟器
android·linux·ubuntu