1 pom.xml配置,要与服务器上的版本要一致,并将hive-site.xml 文件放入resources文件夹中
<dependencies>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>3.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>3.1.3</version>
<exclusions>
<exclusion>
<artifactId>hadoop-annotations</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-yarn-server-common</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-yarn-server-applicationhistoryservice</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-yarn-server-web-proxy</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-yarn-common</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>jackson-core-asl</artifactId>
<groupId>org.codehaus.jackson</groupId>
</exclusion>
<exclusion>
<artifactId>jackson-mapper-asl</artifactId>
<groupId>org.codehaus.jackson</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-core</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-server</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>zookeeper</artifactId>
<groupId>org.apache.zookeeper</groupId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
2 pojo类封装
import lombok.Data;
import java.io.Serializable;
import java.util.Date;
@Data
public class TableMetaInfo implements Serializable {
private static final long serialVersionUID = 1L;
/**
* 表id
*/
private Long id;
/**
* 表名
*/
private String tableName;
/**
* 库名
*/
private String schemaName;
/**
* 字段名json ( 来源:hive)
*/
private String colNameJson;
/**
* 分区字段名json( 来源:hive)
*/
private String partitionColNameJson;
/**
* hdfs所属人 ( 来源:hive)
*/
private String tableFsOwner;
/**
* 参数信息 ( 来源:hive)
*/
private String tableParametersJson;
/**
* 表备注 ( 来源:hive)
*/
private String tableComment;
/**
* hdfs路径 ( 来源:hive)
*/
private String tableFsPath;
/**
* 输入格式( 来源:hive)
*/
private String tableInputFormat;
/**
* 输出格式 ( 来源:hive)
*/
private String tableOutputFormat;
/**
* 行格式 ( 来源:hive)
*/
private String tableRowFormatSerde;
/**
* 表创建时间 ( 来源:hive)
*/
private String tableCreateTime;
/**
* 表类型 ( 来源:hive)
*/
private String tableType;
/**
* 分桶列 ( 来源:hive)
*/
private String tableBucketColsJson;
/**
* 分桶个数 ( 来源:hive)
*/
private Long tableBucketNum;
/**
* 排序列 ( 来源:hive)
*/
private String tableSortColsJson;
/**
* 数据量大小 ( 来源:hdfs)
*/
private Long tableSize=0L;
/**
* 所有副本数据总量大小 ( 来源:hdfs)
*/
private Long tableTotalSize=0L;
/**
* 最后修改时间 ( 来源:hdfs)
*/
private Date tableLastModifyTime;
/**
* 最后访问时间 ( 来源:hdfs)
*/
private Date tableLastAccessTime;
/**
* 当前文件系统容量 ( 来源:hdfs)
*/
private Long fsCapcitySize;
/**
* 当前文件系统使用量 ( 来源:hdfs)
*/
private Long fsUsedSize;
/**
* 当前文件系统剩余量 ( 来源:hdfs)
*/
private Long fsRemainSize;
}
3 核心类
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.support.spring.PropertyPreFilters;
import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.RetryingMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.thrift.TException;
import java.io.IOException;
import java.net.URI;
import java.util.Date;
/**
* hive表元数据信息获取
*/
public class HiveMetaTest {
IMetaStoreClient hiveClient = getHiveClient();
// 初始化 hive 客户端
private IMetaStoreClient getHiveClient() {
// 把本地的hive文件加载为hiveConf对象
HiveConf hiveConf = new HiveConf();
hiveConf.addResource(Thread.currentThread().getContextClassLoader().
getResourceAsStream("hive-site.xml"));
// hiveConf.addResource("hive-site.xml");
// hiveConf.addResource(new URL("file:///home/atguigu/dga/hive-site.xml"));
IMetaStoreClient client = null;
try { //创建客户端
client = RetryingMetaStoreClient.getProxy(hiveConf, true);
} catch (Exception e) {
throw new RuntimeException(e);
}
return client;
}
//根据库名 表名 获得某个表的元数据
private TableMetaInfo getTableMeta(String schemaName, String tableName){
TableMetaInfo tableMetaInfo = new TableMetaInfo();
try {
Table table = hiveClient.getTable(schemaName, tableName);
// 把table中的元数据 提取到 tableMetaInfo
System.out.println(table);
System.out.println();
//表名
tableMetaInfo.setTableName(tableName);
//库名
tableMetaInfo.setSchemaName(schemaName);
// 过滤掉不需要的
PropertyPreFilters.MySimplePropertyPreFilter mySimplePropertyPreFilter = new PropertyPreFilters().addFilter("comment", "name", "type");
//字段名json ( 来源:hive)
tableMetaInfo.setColNameJson(JSON.toJSONString(table.getSd().getCols(),mySimplePropertyPreFilter) ); // 字段名 ,字段类型,备注
//分区字段名json( 来源:hive)
tableMetaInfo.setPartitionColNameJson(JSON.toJSONString(table.getPartitionKeys(),mySimplePropertyPreFilter));
//hdfs所属人 ( 来源:hive)
tableMetaInfo.setTableFsOwner(table.getOwner());
//参数信息 ( 来源:hive)
tableMetaInfo.setTableParametersJson(JSON.toJSONString(table.getParameters()));
//表备注
tableMetaInfo.setTableComment(table.getParameters().get("comment"));
//hdfs路径
tableMetaInfo.setTableFsPath(table.getSd().getLocation());
//输入格式
tableMetaInfo.setTableInputFormat(table.getSd().getInputFormat());
//输出格式
tableMetaInfo.setTableOutputFormat(table.getSd().getOutputFormat());
//行格式
tableMetaInfo.setTableRowFormatSerde(table.getSd().getSerdeInfo().getSerializationLib());
String tableCreateDate = DateFormatUtils.format(new Date(table.getCreateTime() * 1000L), "yyyy-MM-dd HH:mm:ss");
// Date date = DateUtils.parseDate(tableCreateDate, "yyyy-MM-dd HH:mm:ss");
//表创建时间
tableMetaInfo.setTableCreateTime( tableCreateDate );
//表类型
tableMetaInfo.setTableType(table.getTableType());
if(table.getSd().getBucketCols().size()>0){
//分桶列
tableMetaInfo.setTableBucketColsJson(JSON.toJSONString(table.getSd().getBucketCols()));
//分桶个数
tableMetaInfo.setTableBucketNum(table.getSd().getNumBuckets()+0L);
//排序列
tableMetaInfo.setTableSortColsJson(JSON.toJSONString(table.getSd().getSortCols()));
}
} catch (TException e) {
throw new RuntimeException(e);
}
return tableMetaInfo;
}
public void addHdfsInfo(TableMetaInfo tableMetaInfo){
try {
FileSystem fileSystem = FileSystem.get(new URI(tableMetaInfo.getTableFsPath()), new Configuration(), tableMetaInfo.getTableFsOwner());
FileStatus[] fileStatuses = fileSystem.listStatus(new Path(tableMetaInfo.getTableFsPath())); //listStatus获取某个目录下的文件或文件夹集合
//进行递归 遍历,hdfs文件 总副本大小等等信息
addFileInfo(fileStatuses,tableMetaInfo,fileSystem);
//作用不大
//当前文件系统容量
//tableMetaInfo.setFsCapcitySize( fileSystem.getStatus().getCapacity() );
//当前文件系统剩余量
//tableMetaInfo.setFsRemainSize( fileSystem.getStatus().getRemaining() );
//当前文件系统使用量
//tableMetaInfo.setFsUsedSize( fileSystem.getStatus().getUsed() );
} catch (Exception e) {
throw new RuntimeException(e);
}
}
// 作业 : tableMetaInfo 最近访问时间 , 最近修改时间, 总副本大小
// 递归处理 , 每个递归方法 一定是一个 叶子节点的收敛处理 和 分支节点的继续下探 ( java 下探就增加方法栈的深度, 默认1w个左右 如果过深会造成栈溢出)
public void addFileInfo(FileStatus[] fileStatuses , TableMetaInfo tableMetaInfo,FileSystem fileSystem) throws IOException {
//遍历所有 filestatus
for (FileStatus fileStatus : fileStatuses) {
if(! fileStatus.isDirectory()){ // 文件 取大小 累加tableSize
long accessTime = fileStatus.getAccessTime();
long modificationTime = fileStatus.getModificationTime();
short replication = fileStatus.getReplication();
long filesize = fileStatus.getLen();//文件字节数
tableMetaInfo.setTableSize(tableMetaInfo.getTableSize()+filesize); //累加到表的总大小
tableMetaInfo.setTableTotalSize(tableMetaInfo.getTableTotalSize()+ filesize*replication); //总副本大小
//取最近的修改时间
if(tableMetaInfo.getTableLastModifyTime()==null){
tableMetaInfo.setTableLastModifyTime( new Date(modificationTime));
} else if (tableMetaInfo.getTableLastModifyTime().getTime()<modificationTime) {
tableMetaInfo.setTableLastModifyTime(new Date(modificationTime));
}
//取最近的访问时间
if(tableMetaInfo.getTableLastAccessTime()==null){
tableMetaInfo.setTableLastAccessTime( new Date(accessTime));
} else if (tableMetaInfo.getTableLastAccessTime().getTime()<accessTime) {
tableMetaInfo.setTableLastAccessTime(new Date(accessTime));
}
} else{// 文件夹 获得文件夹下的Filestatus 进行递归
FileStatus[] subFileStatus = fileSystem.listStatus(fileStatus.getPath());
addFileInfo(subFileStatus,tableMetaInfo,fileSystem);
}
}
}
public static void main(String[] args) {
HiveMetaTest hiveMetaTest = new HiveMetaTest();
TableMetaInfo tableMeta = hiveMetaTest.getTableMeta("test_db_1", "orders");
hiveMetaTest.addHdfsInfo(tableMeta);
System.out.println("tableMeta = " + tableMeta);
}
}