创建一个学生信息表,用来存储学生的姓名(姓名作为行键,且假设姓名不会重复)以及考试成绩,其中考试成绩(score)是一个列族,存储了各个科目的考试成绩。然后向student中添加数据
1、HBase依赖
java
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.4.13</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.4.13</version>
</dependency>
2、HBase数据源
java
package com.example.demo.config;
import com.example.demo.service.ICodeService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.core.annotation.Order;
import org.springframework.stereotype.Component;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Component
@Order(1)
public class NmsHBaseSource implements ApplicationRunner {
// 管理HBase的配置信息
public static Configuration conf;
// 管理HBase的连接
public static Connection conn;
// 管理HBase数据库的连接
public static Admin admin;
@Override
public void run(ApplicationArguments args) throws Exception {
conf = HBaseConfiguration.create();
System.setProperty("HADOOP_USER_NAME", "hadoop");
conf.set("HADOOP_USER_NAME", "hadoop");
conf.set("hbase.root.dir", "hdfs://master:9000/hbase");
conf.set("hbase.zookeeper.quorum", "master");//配置Zookeeper的ip地址
conf.set("hbase.zookeeper.property.clientPort", "2181");//配置zookeeper的端口
conn = ConnectionFactory.createConnection(conf);
admin = conn.getAdmin();
}
/**
* 关闭所有连接
*
* @throws IOException 可能出现的异常
*/
public static void close() throws IOException {
if (admin != null)
admin.close();
if (conn != null)
conn.close();
}
/**
* 创建表
* @param myTableName 表名
* @param colFamily 列族名的数组
* @throws IOException 可能出现的异常
*/
public static void createTable(String myTableName, String[] colFamily) throws IOException {
TableName tableName = TableName.valueOf(myTableName);
if (admin.tableExists(tableName)) {
logger.info(myTableName + "表已经存在");
} else {
HTableDescriptor hTableDescriptor = new HTableDescriptor(tableName);
for (String str : colFamily) {
HColumnDescriptor hColumnDescriptor = new HColumnDescriptor(str);
hTableDescriptor.addFamily(hColumnDescriptor);
}
admin.createTable(hTableDescriptor);
}
}
/**
* 添加数据
* @param tableName 表名
* @param rowkey 行键
* @param colFamily 列族
* @param col 列
* @param value 值
* @throws IOException 可能出现的异常
*/
public static void insertData(String tableName,String rowkey,String colFamily,String col,String value) throws IOException {
Table table = conn.getTable(TableName.valueOf(tableName));
Put put = new Put(rowkey.getBytes());
put.addColumn(colFamily.getBytes(),col.getBytes(),value.getBytes());
table.put(put);
table.close();
}
/**
* 根据行键删除数据
* @param tableName 表名
* @param rowkey 行键
* @throws IOException 可能出现的异常
*/
public static void deleteData(String tableName,String rowkey) throws IOException {
Table table = conn.getTable(TableName.valueOf(tableName));
Delete delete = new Delete(rowkey.getBytes());
table.delete(delete);
table.close();
}
/**
* 获取数据
* @param tableName 表名
* @param rowkey 行键
* @param colFamily 列族
* @param col 列
* @throws IOException 可能出现的异常
*/
public static void getData(String tableName,String rowkey,String colFamily,String col) throws IOException {
Table table = conn.getTable(TableName.valueOf(tableName));
Get get = new Get(rowkey.getBytes());
get.addColumn(colFamily.getBytes(),col.getBytes());
Result result = table.get(get);
System.out.println(new String(result.getValue(colFamily.getBytes(),col.getBytes())));
table.close();
}
public static void main(String[] args) throws IOException {
init();
createTable("student",new String[]{"score"});
insertData("student","zhangsan","score","English","69");
insertData("student","zhangsan","score","Math","86");
insertData("student","zhangsan","score","Computer","77");
getData("student","zhangsan","score","Computer");
close();
}
}
3、Hbase过滤器查询
过滤器可以分为两种:比较过滤器和专用过滤器
比较过滤器
LESS ------ 小于
LESS_OR_EQUAL ------ 小于等于
EQUAL ------ 等于
NOT_EQUAL ------ 不等于
GREATER_OR_EQUAL ------ 大于等于
GREATER ------ 大于
NO_OP ------ 排除所有
专用过滤器
BinaryComparator ------ 按字节索引顺序比较指定字节数组,采用Bytes.compareTo(byte[])
BinaryPrefixComparator ------ 跟前面相同,只是比较左端的数据是否相同
NullComparator ------ 判断给定的是否为空
BitComparator ------ 按位比较
RegexStringComparator ------ 提供一个正则的比较器,仅支持 EQUAL 和非EQUAL
SubstringComparator ------ 判断提供的子串是否出现在value中
3.1、ResultScanner结果处理handleResultScanner
java
/**
* ResultScanner结果解析
*/
public void handleResultScanner(ResultScanner scanner) throws IOException {
//因为ResultScanner类继承了迭代器
//使用增强for循环遍历
for (Result rs : scanner) {
String id = Bytes.toString(rs.getRow());
System.out.println("当前行的rowkey为:" + id);
//继续增强for循环得到每一行中的每一个单元格(列)
//获取一行中的所有单元格
for (Cell cell : rs.listCells()) {
//获取该单元格属于的列簇
String family = Bytes.toString(CellUtil.cloneFamily(cell));
//获取该单元格的列名
String colName = Bytes.toString(CellUtil.cloneQualifier(cell));
//获取该单元格的列值
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println(family + ":" + colName + "的值为:" + value);
}
String name = Bytes.toString(rs.getValue("info".getBytes(), "name".getBytes()));
String age = Bytes.toString(rs.getValue("info".getBytes(), "age".getBytes()));
String gender = Bytes.toString(rs.getValue("info".getBytes(), "gender".getBytes()));
String clazz = Bytes.toString(rs.getValue("info".getBytes(), "clazz".getBytes()));
System.out.println("学号:" + id + ",姓名:" + name + ",年龄:" + age + ",性别:" + gender + ",班级:" + clazz);
}
3.2、rowKey过滤器RowFilter
java
/**
* 行键过滤器
* 通过RowFilter与BinaryComparator过滤比rowKey 1500100010小的所有值出来
*/
@Test
public void RowFilter1(){
try {
//获取表的实例
HTableInterface students = conn.getTable("students");
BinaryComparator binaryComparator = new BinaryComparator("1500100010".getBytes());
//创建一个行键过滤器的对象
RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.LESS, binaryComparator);
Scan scan = new Scan();
scan.setFilter(rowFilter);
ResultScanner scanner = students.getScanner(scan);
handleResultScanner(scanner);
} catch (IOException e) {
e.printStackTrace();
}
}
3.3、列族过滤器FamilyFilter
java
/**
* 通过FamilyFilter与SubstringComparator查询列簇名包含in的所有列簇下面的数据
*/
@Test
public void FamilyFilter1(){
try {
//获取表的实例
HTableInterface students = conn.getTable("students");
//创建一个比较器对象
//只要列簇名中包含了in,就把该列簇下的所有列查询出来
SubstringComparator substringComparator = new SubstringComparator("in");
//创建列簇过滤器
FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, substringComparator);
Scan scan = new Scan();
scan.setFilter(familyFilter);
//获取数据
ResultScanner scanner = students.getScanner(scan);
handleResultScanner(scanner);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 通过FamilyFilter与 BinaryPrefixComparator 过滤出列簇以i开头的列簇下的所有数据
*
*/
@Test
public void FamilyFilter2(){
try {
//获取表的实例
HTableInterface students = conn.getTable("students");
//创建前缀比较器
BinaryPrefixComparator binaryPrefixComparator = new BinaryPrefixComparator("i".getBytes());
//创建列簇过滤器
FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, binaryPrefixComparator);
Scan scan = new Scan();
scan.setFilter(familyFilter);
ResultScanner scanner = students.getScanner(scan);
handleResultScanner(scanner);
} catch (IOException e) {
e.printStackTrace();
}
}
3.4、列过滤器QualifierFilter
java
/**
* 通过QualifierFilter与SubstringComparator查询列名包含ge的列的值
*
*/
@Test
public void QualifierFilter1(){
try {
//获取表的实例
HTableInterface students = conn.getTable("students");
//创建包含比较器
//age
//gender
SubstringComparator substringComparator = new SubstringComparator("ge");
//创建一个列过滤器
QualifierFilter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, substringComparator);
Scan scan = new Scan();
scan.setFilter(qualifierFilter);
ResultScanner scanner = students.getScanner(scan);
handleResultScanner(scanner);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
*
* 通过QualifierFilter与SubstringComparator查询列名包含ge的列的值
*/
@Test
public void QualifierFilter2(){
try {
//获取表的实例
HTableInterface students = conn.getTable("students");
SubstringComparator substringComparator = new SubstringComparator("am");
//创建列过滤器
QualifierFilter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, substringComparator);
Scan scan = new Scan();
scan.setFilter(qualifierFilter);
ResultScanner scanner = students.getScanner(scan);
handleResultScanner(scanner);
} catch (IOException e) {
e.printStackTrace();
}
}
3.5、列值过滤器ValueFilter
java
/**
* 通过ValueFilter与BinaryPrefixComparator过滤出所有的cell中值以 "张" 开头的学生
*/
@Test
public void ValueFilter1() {
try {
//获取表的实例
HTableInterface students = conn.getTable("students");
//创建前缀比较器
BinaryPrefixComparator binaryPrefixComparator = new BinaryPrefixComparator("张".getBytes());
//创建列值过滤器的对象
ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, binaryPrefixComparator);
Scan scan = new Scan();
scan.setFilter(valueFilter);
ResultScanner scanner = students.getScanner(scan);
//因为ResultScanner类继承了迭代器
//使用增强for循环遍历
// for (Result rs : scanner) {
// String id = Bytes.toString(rs.getRow());
// System.out.println("当前行的rowkey为:" + id);
// //继续增强for循环得到每一行中的每一个单元格(列)
// //获取一行中的所有单元格
// for (Cell cell : rs.listCells()) {
// //获取该单元格属于的列簇
// String family = Bytes.toString(CellUtil.cloneFamily(cell));
// //获取该单元格的列名
// String colName = Bytes.toString(CellUtil.cloneQualifier(cell));
// //获取该单元格的列值
// String value = Bytes.toString(CellUtil.cloneValue(cell));
// System.out.println(family + ":" + colName + "的值为:" + value);
// }
// }
handleResultScanner(scanner);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 过滤出文科的学生,只会返回以文科开头的数据列,其他列的数据不符合条件,不会返回
*/
@Test
public void ValueFilter12(){
try {
//获取表的实例
HTableInterface students = conn.getTable("students");
//创建正则比较器
RegexStringComparator regexStringComparator = new RegexStringComparator("^文科.*");
//创建列值过滤器
ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, regexStringComparator);
Scan scan = new Scan();
scan.setFilter(valueFilter);
ResultScanner scanner = students.getScanner(scan);
handleResultScanner(scanner);
} catch (IOException e) {
e.printStackTrace();
}
}
3.6、单列值过滤器 SingleColumnValueFilter
java
/**
* 单列值过滤器
* SingleColumnValueFilter会返回满足条件的cell所在行的所有cell的值(即会返回一行数据)
*
* 通过SingleColumnValueFilter与查询文科班所有学生信息
*/
@Test
public void SingleColumnValueFilter(){
try {
//获取表的实例
HTableInterface students = conn.getTable("students");
//创建一个正则比较器
RegexStringComparator regexStringComparator = new RegexStringComparator("^文科.*");
//创建单列值过滤器对象
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(
"info".getBytes(),
"clazz".getBytes(),
CompareFilter.CompareOp.EQUAL,
regexStringComparator
);
Scan scan = new Scan();
scan.setFilter(singleColumnValueFilter);
ResultScanner scanner = students.getScanner(scan);
handleResultScanner(scanner);
} catch (IOException e) {
e.printStackTrace();
}
}
3.7、列值排除过滤器SingleColumnValueExcludeFilter
java
/**
* 列值排除过滤器
* 与SingleColumnValueFilter相反,会排除掉指定的列,其他的列全部返回
*
* 通过SingleColumnValueExcludeFilter与BinaryComparator查询文科一班所有学生信息,最终不返回clazz列
*/
@Test
public void SingleColumnValueExcludeFilter(){
try {
//获取表的实例
HTableInterface students = conn.getTable("students");
//创建一个二进制比较器
BinaryComparator binaryComparator = new BinaryComparator("文科一班".getBytes());
//创建一个列值排除过滤器
SingleColumnValueExcludeFilter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter(
"info".getBytes(),
"clazz".getBytes(),
CompareFilter.CompareOp.EQUAL,
binaryComparator
);
Scan scan = new Scan();
scan.setFilter(singleColumnValueExcludeFilter);
ResultScanner scanner = students.getScanner(scan);
handleResultScanner(scanner);
} catch (IOException e) {
e.printStackTrace();
}
}
3.8、rowKey前缀过滤器PrefixFilter
java
/**
* rowkey前缀过滤器
*
* 通过PrefixFilter查询以150010008开头的所有前缀的rowkey
*/
@Test
public void PrefixFilter(){
try {
//获取表的实例
HTableInterface students = conn.getTable("students");
//创建rowkey前缀过滤器
PrefixFilter prefixFilter = new PrefixFilter("150010008".getBytes());
Scan scan = new Scan();
scan.setFilter(prefixFilter);
ResultScanner scanner = students.getScanner(scan);
handleResultScanner(scanner);
} catch (IOException e) {
e.printStackTrace();
}
}
3.9、分页过滤器PageFilter
java
/**
* 分页过滤器
* 分页有两个条件
* pageNum 第几页
* pageSize 每页有几条
*/
@Test
public void pageFilter() throws IOException {
int pageNum = 3;
int pageSize = 2;
/*
分为两种情况判断:
第一页
其他页
*/
if (pageNum == 1){
Scan scan = new Scan();
//设置起始rowKey
scan.setStartRow("".getBytes());
//设置最大的返回结果,返回pageSize条
scan.setMaxResultSize(pageSize);
//分页过滤器
PageFilter pageFilter = new PageFilter(pageSize);
scan.setFilter(pageFilter);
ResultScanner resultScanner = table.getScanner(scan);
for (Result result : resultScanner) {
byte[] row = result.getRow();
System.out.println("数据的rowKey为" + Bytes.toString(row));
List<Cell> cells = result.listCells();
for (Cell cell : cells) {
byte[] qualifier = cell.getQualifier();
byte[] family = cell.getFamily();
byte[] value = cell.getValue();
//id列和age列是整型数据
if ("f1".equals(Bytes.toString(family)) && "id".equals(Bytes.toString(qualifier)) || "age".equals(Bytes.toString(value))){
System.out.println("列族为"+Bytes.toString(family)+"列名为"+Bytes.toString(qualifier)+"列值为"+Bytes.toInt(value));
} else {
System.out.println("列族为"+Bytes.toString(family)+"列名为"+Bytes.toString(qualifier)+"列值为"+Bytes.toString(value));
}
}
}
} else {
String startRow = "";
Scan scan = new Scan();
/*
第二页的起始rowKey = 第一页的结束rowKey + 1
第三页的起始rowKey = 第二页的结束rowKey + 1
*/
int resultSize = (pageNum - 1) * pageSize + 1;
scan.setMaxResultSize(resultSize);
//设置一次性往前扫描5条,最后一个rowKey是第三页起始rowKey
PageFilter pageFilter = new PageFilter(resultSize);
scan.setFilter(pageFilter);
//resultScanner里面有5条数据
ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner) {
//获取rowKey
byte[] row = result.getRow();
//最后一次循环遍历 rowKey为0005
startRow = Bytes.toString(row);
}
Scan scan1 = new Scan();
scan1.setStartRow(startRow.getBytes());
scan1.setMaxResultSize(pageSize);
PageFilter pageFilter1 = new PageFilter(pageSize);
scan1.setFilter(pageFilter1);
ResultScanner scanner1 = table.getScanner(scan1);
for (Result result : scanner1) {
byte[] row = result.getRow();
System.out.println("数据的rowKey为" + Bytes.toString(row));
List<Cell> cells = result.listCells();
for (Cell cell : cells) {
// byte[] qualifier = cell.getQualifier();
// byte[] family = cell.getFamily();
// byte[] value = cell.getValue();
String family = Bytes.toString(CellUtil.cloneFamily(cell));
//获取该单元格的列名
String colName = Bytes.toString(CellUtil.cloneQualifier(cell));
//获取该单元格的列值
String value = Bytes.toString(CellUtil.cloneValue(cell));
//id列和age列是整型数据
if ("f1".equals(Bytes.toString(family)) && "id".equals(Bytes.toString(qualifier)) || "age".equals(Bytes.toString(value))){
System.out.println("列族为"+Bytes.toString(family)+"列名为"+Bytes.toString(qualifier)+"列值为"+Bytes.toInt(value));
} else {
System.out.println("列族为"+Bytes.toString(family)+"列名为"+Bytes.toString(qualifier)+"列值为"+Bytes.toString(value));
}
}
}
}
}
3.10、多过滤器综合查询FilterList
java
/**
* 通过运用4种比较器过滤出姓于,年纪大于23岁,性别为女,且是理科的学生。
*
* 正则比较器 RegexStringComparator
* 包含比较器 SubstringComparator
* 二进制前缀比较器 BinaryPrefixComparator
* 二进制比较器 BinaryComparator
*
*/
@Test
public void FilterData1(){
try {
//获取表的实例
HTableInterface students = conn.getTable("students");
/**
* 第一个过滤器,过滤出是理科开头的班级
*/
RegexStringComparator regexStringComparator = new RegexStringComparator("^理科.*");
//单列值过滤器
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes(), "clazz".getBytes(),
CompareFilter.CompareOp.EQUAL, regexStringComparator);
/**
* 第二个过滤器,过滤出性别是女生的
*/
SubstringComparator substringComparator = new SubstringComparator("女");
SingleColumnValueFilter singleColumnValueFilter1 = new SingleColumnValueFilter("info".getBytes(), "gender".getBytes(),
CompareFilter.CompareOp.EQUAL, substringComparator);
/**
* 第三个过滤器,过滤出年龄大于23岁的
*/
BinaryComparator binaryComparator = new BinaryComparator("20".getBytes());
SingleColumnValueFilter singleColumnValueFilter2 = new SingleColumnValueFilter("info".getBytes(), "age".getBytes(),
CompareFilter.CompareOp.GREATER, binaryComparator);
/**
* 第四个过滤器,过滤出姓于的学生
*/
BinaryPrefixComparator binaryPrefixComparator = new BinaryPrefixComparator("于".getBytes());
SingleColumnValueFilter singleColumnValueFilter3 = new SingleColumnValueFilter("info".getBytes(), "name".getBytes(),
CompareFilter.CompareOp.EQUAL, binaryPrefixComparator);
Scan scan = new Scan();
//要想实现多个需求同时过滤,就需要创建多个过滤器,添加到一个过滤器列表中
//然后将过滤器列表传给扫描器scan
FilterList filterList = new FilterList();
filterList.addFilter(singleColumnValueFilter);
filterList.addFilter(singleColumnValueFilter1);
filterList.addFilter(singleColumnValueFilter2);
filterList.addFilter(singleColumnValueFilter3);
scan.setFilter(filterList);
ResultScanner scanner = students.getScanner(scan);
handleResultScanner(scanner);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 过滤出学号是以15001001开头的文科学生
*/
@Test
public void filterData2(){
try {
//获取表的实例
HTableInterface students = conn.getTable("students");
/**
* 创建第一个过滤器,过滤是以15001001开头的rowkey
*/
BinaryPrefixComparator binaryPrefixComparator = new BinaryPrefixComparator("15001001".getBytes());
//创建行键过滤器
RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, binaryPrefixComparator);
/**
* 创建第二个过滤器,过滤出文科的学生
*/
RegexStringComparator regexStringComparator = new RegexStringComparator("^文科.*");
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes(), "clazz".getBytes(),
CompareFilter.CompareOp.EQUAL,
regexStringComparator);
FilterList filterList = new FilterList();
filterList.addFilter(rowFilter);
filterList.addFilter(singleColumnValueFilter);
Scan scan = new Scan();
scan.setFilter(filterList);
ResultScanner scanner = students.getScanner(scan);
handleResultScanner(scanner);
} catch (IOException e) {
e.printStackTrace();
}
}