文章目录
- 一、技术
- 二、构建SpringBoot工程
-
- [2.1 创建maven工程并配置 pom.xml文件](#2.1 创建maven工程并配置 pom.xml文件)
- [2.2 编写配置文件 application.yml](#2.2 编写配置文件 application.yml)
- [2.3 编写配置文件 application.propertites](#2.3 编写配置文件 application.propertites)
- [2.4 开发主启动类](#2.4 开发主启动类)
- [2.5 开发配置类](#2.5 开发配置类)
- 三、测试抽取Hive、HDFS元数据
- 四、将抽取的元数据存储到MySQL
-
- [4.1 引入依赖](#4.1 引入依赖)
- [4.2 配置application.yml](#4.2 配置application.yml)
- [4.3 创建元数据信息Bean](#4.3 创建元数据信息Bean)
- [4.4 定义Service](#4.4 定义Service)
- [4.5 创建Mapper](#4.5 创建Mapper)
- [4.6 测试](#4.6 测试)
一、技术
SpringBoot + Mybatis Plus
二、构建SpringBoot工程
2.1 创建maven工程并配置 pom.xml文件
xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.7.17</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.songshuang</groupId>
<artifactId>dwmeta</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<!-- 必须 ,用于开发一个web项目-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<!-- 测试必须加 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<!-- 连接hive的元数据服务 -->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<version>3.1.2</version>
</dependency>
<!-- json处理 -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.72</version>
</dependency>
</dependencies>
</project>
2.2 编写配置文件 application.yml
null
2.3 编写配置文件 application.propertites
hive.client.uri:hive元数据服务metastore地址
hdfs.admin.user:hdfs用户
hdfs.uri:hdfs NameNode RPC端口
bash
hive.client.uri=thrift://hadoop102:9083
hdfs.admin.user=hadoop
hdfs.uri=hdfs://hadoop102:9820
2.4 开发主启动类
java
package com.songshuang.dga;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
//当前这个类是App的主启动类
@SpringBootApplication
public class MainApp {
public static void main(String[] args) {
//启动app
SpringApplication.run(MainApp.class, args);
}
}
2.5 开发配置类
java
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Scope;
import java.net.URI;
/*
所有的客户端,都应该随用随建,用完就关。
*/
@Configuration
public class DgaConfig {
@Value("${hive.client.uri}")
private String hiveUri;
@Bean
@Scope("prototype")
public HiveMetaStoreClient createHiveMetastoreClient(){
org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
//客户端连接服务端,配置地址和端口
conf.set("hive.metastore.uris",hiveUri);
try {
HiveMetaStoreClient client = new HiveMetaStoreClient(conf);
return client;
} catch (MetaException e) {
throw new RuntimeException(e);
}
}
@Value("${hdfs.admin.user}")
private String hdfsAdmin;
@Value("${hdfs.uri}")
private String hdfsUri;
@Bean
@Scope("prototype")
public FileSystem createHDFSClient(){
try {
FileSystem hdfsClient = FileSystem.get(new URI(hdfsUri), new org.apache.hadoop.conf.Configuration(), hdfsAdmin);
return hdfsClient;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
三、测试抽取Hive、HDFS元数据
连接Metastore服务抽取Hive元数据;连接NameNode抽取HDFS元数据;
java
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsStatus;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.thrift.TException;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.context.ApplicationContext;
import java.io.IOException;
/**
* @date 2024/1/29 16:27
*/
@SpringBootTest
public class MetaTest {
@Autowired
private ApplicationContext context;
@Test
public void testHiveClient() throws TException {
HiveMetaStoreClient client = context.getBean(HiveMetaStoreClient.class);
//获取库下所有的表
System.out.println(client.getAllTables("dw_ods"));
//获取某张表的元数据信息
System.out.println(client.getTable("dw_ods", "ods_activity_info_full"));
client.close();
}
@Test
public void testHDFSClient() throws IOException {
//1.获取hdfs客户端
FileSystem hdfsClient = context.getBean(FileSystem.class);
//2.遍历tableMetaInfos,为每一个TableMetaInfo补充hdfs的元数据信息
FsStatus status = hdfsClient.getStatus();
long capacity = status.getCapacity();
long remaining = status.getRemaining();
long used = status.getUsed();
System.out.println("capacity:" + capacity + "remaining:" + remaining + "used:" + used );
}
}
四、将抽取的元数据存储到MySQL
4.1 引入依赖
xml
<!-- 使用springboot插件,不会和springboot的其他插件冲突了 -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid-spring-boot-starter</artifactId>
<version>1.2.15</version>
</dependency>
<!-- 驱动 -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.27</version>
</dependency>
<!-- 动态数据源切换,允许使用一个注解,可以切换Dao查询的数据源
内置了数据库连接池,会和之前配置的Druid冲突
-->
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>dynamic-datasource-spring-boot-starter</artifactId>
<version>2.5.8</version>
</dependency>
<!-- 注释掉mybatis,否则会冲突 -->
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-boot-starter</artifactId>
<version>3.4.1</version>
</dependency>
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-generator</artifactId>
<version>3.5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.velocity</groupId>
<artifactId>velocity-engine-core</artifactId>
<version>2.3</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-freemarker</artifactId>
</dependency>
4.2 配置application.yml
yaml
spring:
datasource:
dynamic:
primary: dga #设置默认的数据源或者数据源组
strict: false #严格匹配数据源,默认false. true未匹配到指定数据源时抛异常,false使用默认数据源
datasource:
dga:
url: jdbc:mysql://mall:3306/dga?useSSL=false&useUnicode=true&characterEncoding=UTF-8
username: root
password: "123456"
driver-class-name: com.mysql.cj.jdbc.Driver
druid:
initial-size: 5
max-active: 20
max-wait: 60000
min-idle: 5
test-on-borrow: true
test-on-return: false
test-while-idle: true
autoconfigure:
exclude: com.alibaba.druid.spring.boot.autoconfigure.DruidDataSourceAutoConfigure
mybatis-plus:
mapper-locations: classpath*:/sqls/*Mapper.xml
configuration:
mapUnderscoreToCamelCase: true
logging:
level:
com:
songshuang:
dga:
meta:
mapper: debug
server:
port: 80
4.3 创建元数据信息Bean
java
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import java.io.Serializable;
import java.sql.Timestamp;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* <p>
* 元数据表附加信息
* </p>
*
* @since 2024-01-29
*/
@Data
@NoArgsConstructor
@AllArgsConstructor
@TableName("table_meta_info_extra")
public class TableMetaInfoExtra implements Serializable {
private static final long serialVersionUID = 1L;
/**
* id
*/
@TableId(value = "id", type = IdType.AUTO)
private Long id;
/**
* 表名
*/
private String tableName;
/**
* 库名
*/
private String schemaName;
/**
* 技术负责人
*/
private String tecOwnerUserName;
/**
* 业务负责人
*/
private String busiOwnerUserName;
/**
* 存储周期类型
*/
private String lifecycleType;
/**
* 生命周期(天)
*/
private Long lifecycleDays;
/**
* 安全级别
*/
private String securityLevel;
/**
* 数仓所在层级
*/
private String dwLevel;
/**
* 创建时间 (自动生成)
*/
private Timestamp createTime;
/**
* 更新时间 (自动生成)
*/
private Timestamp updateTime;
}
4.4 定义Service
java
import com.baomidou.mybatisplus.extension.service.IService;
import com.songshuang.dga.meta.bean.TableMetaInfoExtra;
import org.apache.hadoop.hive.metastore.api.MetaException;
public interface TableMetaInfoExtraService extends IService<TableMetaInfoExtra> {
//生成所有表的辅助信息。
void initMetaInfoExtra(String db) throws MetaException;
}
java
import com.songshuang.dga.config.MetaConstant;
import com.songshuang.dga.meta.bean.TableMetaInfoExtra;
import com.songshuang.dga.meta.mapper.TableMetaInfoExtraMapper;
import com.songshuang.dga.meta.service.TableMetaInfoExtraService;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import org.apache.commons.lang3.RandomUtils;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.stereotype.Service;
import java.sql.Timestamp;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
/**
* <p>
* 元数据表附加信息 服务实现类
* </p>
*
* @since 2024-01-29
*/
@Service
public class TableMetaInfoExtraServiceImpl extends ServiceImpl<TableMetaInfoExtraMapper, TableMetaInfoExtra> implements TableMetaInfoExtraService {
@Autowired
private ApplicationContext context;
/*
辅助信息不经常变动。
只有当你去创建新表的时候,才需要想数据库中写入新表的辅助信息。
如果一张表已经有了辅助信息,无需写入
调用initMetaInfoExtra(),只需要写入新表(今天刚创建表的元数据信息)
*/
@Override
public void initMetaInfoExtra(String db) throws MetaException {
//查询当前db中的新表
//第一步: 先查询table_meta_info_extra中当前db已经有信息的表。 老表
Set<String> existsTableNames = list(new QueryWrapper<TableMetaInfoExtra>().eq("schema_name", db))
.stream()
.map(info -> info.getTableName())
.collect(Collectors.toSet());
//第二步: 查询db下所有的表,根据老表,过滤得到新表
HiveMetaStoreClient client = context.getBean(HiveMetaStoreClient.class);
List<String> allTables = client.getAllTables(db);
List<String> newTables = allTables.stream()
.filter(name -> !existsTableNames.contains(name))
.collect(Collectors.toList());
//为新表生成辅助信息,存入到数据库中
List<TableMetaInfoExtra> infos = newTables.stream()
.map(name -> {
TableMetaInfoExtra extra = new TableMetaInfoExtra();
extra.setSchemaName(db);
extra.setTableName(name);
//其他的信息应该由员工手动录入,这里为了后续方便,初始化一些默认值,假设员工已经录入了
initExtraInfo(extra);
extra.setCreateTime(new Timestamp(System.currentTimeMillis()));
return extra;
})
.collect(Collectors.toList());
saveBatch(infos);
}
private void initExtraInfo(TableMetaInfoExtra extra) {
String [] bon = {"张三","李四","王五","赵六"};
String [] ton = {"张小三","李中四","王大五","赵老六"};
extra.setBusiOwnerUserName(bon[RandomUtils.nextInt(0,bon.length)]);
extra.setTecOwnerUserName(ton[RandomUtils.nextInt(0,ton.length)]);
extra.setLifecycleType(MetaConstant.LIFECYCLE_TYPE_UNSET);
extra.setLifecycleDays(-1l);
extra.setSecurityLevel(MetaConstant.SECURITY_LEVEL_UNSET);
extra.setDwLevel(extra.getTableName().substring(0,3).toUpperCase());
}
}
4.5 创建Mapper
java
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.songshuang.dga.meta.bean.TableMetaInfoExtra;
import org.apache.ibatis.annotations.Mapper;
/**
* @date 2024/1/29 19:44
*/
@Mapper
public interface TableMetaInfoExtraMapper extends BaseMapper<TableMetaInfoExtra> {
}
4.6 测试
java
@Autowired
private TableMetaInfoExtraService extraService;
@Test
public void testExtraInfo() throws Exception {
extraService.initMetaInfoExtra("dw_ods");
}