Hbase布隆过滤器

Hbase布隆过滤器

小白的Hbase学习笔记

目录

Hbase布隆过滤器

[1.过滤表中所有Value中 >23 的内容](#1.过滤表中所有Value中 >23 的内容)

2.获取表中age列大于23的所有RowKey值(1的改进)

3.比较以某个Value值开头的列

[4.按前缀 准确值 后缀查找](#4.按前缀 准确值 后缀查找)

5.获取RowKey中包含15001000的所有RowKey(速度更快)

6.过滤列族名称以2结尾的RowKey数据

[7.获取列名称以 na 开头的所有RowKey](#7.获取列名称以 na 开头的所有RowKey)

[8.对学生表中的信息进行过滤 条件有:1.所有性别为男性 2.所有文科班 3.年龄大于23岁](#8.对学生表中的信息进行过滤 条件有:1.所有性别为男性 2.所有文科班 3.年龄大于23岁)


1.过滤表中所有Value中 >23 的内容

java 复制代码
package com.shujia.comparator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
//过滤器

/**
 * 需求:
 *      过滤表中所有Value中 >23 的内容
 */
public class Code01ComparatorValue {
    public static void main(String[] args) throws IOException {

        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum","node1,node2,master");
        Connection conn = ConnectionFactory.createConnection(conf);

        Table table = conn.getTable(TableName.valueOf("jan:tbl1"));
        Scan scan=new Scan();
        /**
         * (CompareOp valueCompareOp, ByteArrayComparable valueComparator)
         */
        //创建字节比较器 参数传入具体比较的值
        BinaryComparator binaryComparator = new BinaryComparator(Bytes.toBytes("23"));

        //该过滤器是针对于当前表中所有的值进行过滤 只要满足则返回一行 并且 如果不满足返回NULL
        //put 'jan:tbl1','1001','info:name','25'

        ValueFilter filter = new ValueFilter(CompareFilter.CompareOp.GREATER, binaryComparator);
        //设置过滤器
        scan.setFilter(filter);

        //获取扫描器对象
        ResultScanner scanner = table.getScanner(scan);
        for (Result result : scanner) {
            String rowKey = Bytes.toString(result.getRow());
            String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));
            String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));
            String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));
            String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));
            System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);
        }

        table.close();
        conn.close();

    }
}

2.获取表中age列大于23的所有RowKey值(1的改进)

java 复制代码
package com.shujia.comparator;

//需求:获取表中age列大于23的所有RowKey值
//01的改进代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;

public class Code02ComparatorSingleColumns {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum","node1,node2,master");
        Connection conn = ConnectionFactory.createConnection(conf);

        Table table = conn.getTable(TableName.valueOf("jan:tbl1"));
        Scan scan=new Scan();
        /**
         * 单列过滤器:
         *      用于过滤单列值
         *      返回的数据是满足条件的所有RowKey
         *注意:
         *      如果一条RowKey用于比较的列不存在 那么该RowKey也会被返回
         */
        SingleColumnValueFilter filter = new SingleColumnValueFilter(
                Bytes.toBytes("info"),
                Bytes.toBytes("age"),
                CompareFilter.CompareOp.GREATER,
                Bytes.toBytes(23));
        //设置过滤器
        scan.setFilter(filter);

        //获取扫描器对象
        ResultScanner scanner = table.getScanner(scan);
        for (Result result : scanner) {
            String rowKey = Bytes.toString(result.getRow());
            String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));
            String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));
            String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));
            String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));
            System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);
        }

        table.close();
        conn.close();

    }
}

3.比较以某个Value值开头的列

java 复制代码
package com.shujia.comparator;

//该比较器用于比较以某个Value值开头的列
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;

public class Code03ComparatorSingleColumns {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum","node1,node2,master");
        Connection conn = ConnectionFactory.createConnection(conf);

        Table table = conn.getTable(TableName.valueOf("jan:tbl1"));
        Scan scan=new Scan();
        /**
         * 单列过滤器:
         *      用于过滤单列值
         *      返回的数据是满足条件的所有RowKey
         *注意:
         *      如果一条RowKey用于比较的列不存在 那么该RowKey也会被返回
         */
        SingleColumnValueFilter filter = new SingleColumnValueFilter(
                Bytes.toBytes("info"),
                Bytes.toBytes("clazz"),
                CompareFilter.CompareOp.EQUAL,
                //该比较器用于比较以某个Value值开头的列
                new BinaryPrefixComparator(Bytes.toBytes("文科")));//二进制前缀比较器
                //new BinaryPrefixComparator(Bytes.toBytes("文科六")));


        //设置过滤器
        scan.setFilter(filter);

        //获取扫描器对象
        ResultScanner scanner = table.getScanner(scan);
        for (Result result : scanner) {
            String rowKey = Bytes.toString(result.getRow());
            String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));
            String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));
            String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));
            String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));
            System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);
        }

        table.close();
        conn.close();

    }
}

4.按前缀 准确值 后缀查找

java 复制代码
package com.shujia.comparator;

//需求:获取RowKey中包含15001000的所有RowKey

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;

public class Code04ComparatorRowKey {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum","node1,node2,master");
        Connection conn = ConnectionFactory.createConnection(conf);

        Table table = conn.getTable(TableName.valueOf("jan:tbl1"));
        Scan scan=new Scan();

        RowFilter filter = new RowFilter(
                CompareFilter.CompareOp.EQUAL
                //RowKey中的值以15001000为开头的
                , new BinaryPrefixComparator(Bytes.toBytes("15001000"))
                //如果我们想按照准确的信息查找
                //, new BinaryComparator(Bytes.toBytes("1500100001"))

                //通过RegexStringComparator的正则表达式过滤以98为结尾的内容
                //,new RegexStringComparator(".*02$")
        );

        //设置过滤器
        scan.setFilter(filter);

        //获取扫描器对象
        ResultScanner scanner = table.getScanner(scan);
        for (Result result : scanner) {
            String rowKey = Bytes.toString(result.getRow());
            String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));
            String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));
            String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));
            String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));
            System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);
        }

        table.close();
        conn.close();

    }
}

5.获取RowKey中包含15001000的所有RowKey(速度更快)

java 复制代码
package com.shujia.comparator;

//需求:获取RowKey中包含15001000的所有RowKey

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;

public class Code05ComparatorPrefix {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum","node1,node2,master");
        Connection conn = ConnectionFactory.createConnection(conf);

        Table table = conn.getTable(TableName.valueOf("jan:tbl1"));
        Scan scan=new Scan();

        /**
         *相比于在RowFilter中添加 BinaryComparator(Bytes.toBytes("15001000"))
         * PrefixFilter 执行速度更快 效率更高
         */

        PrefixFilter filter = new PrefixFilter(Bytes.toBytes("15001000"));
        //设置过滤器
        scan.setFilter(filter);

        //获取扫描器对象
        ResultScanner scanner = table.getScanner(scan);
        for (Result result : scanner) {
            String rowKey = Bytes.toString(result.getRow());
            String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));
            String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));
            String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));
            String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));
            System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);
        }

        table.close();
        conn.close();

    }
}

6.过滤列族名称以2结尾的RowKey数据

java 复制代码
package com.shujia.comparator;

//需求:获取RowKey中包含15001000的所有RowKey

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.List;

//需求:
//      过滤列族名称以2结尾的RowKey数据

public class Code06ComparatorFamily {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum","node1,node2,master");
        Connection conn = ConnectionFactory.createConnection(conf);

        Table table = conn.getTable(TableName.valueOf("jan:tbl1"));
        Scan scan=new Scan();

        FamilyFilter filter = new FamilyFilter(
                CompareFilter.CompareOp.EQUAL
                , new RegexStringComparator(".*2$")
        );
        //desc 'jan:tbl1'
        //添加列族 alter 'jan:tbl1',{NAME => 'info2',VERSIONS => 1}
        //put 'jan:tbl1','1001','info2:name','zhangsan'
        //put 'jan:tbl1','1002','info2:name','zhangsan'

        //设置过滤器
        scan.setFilter(filter);

        //获取扫描器对象
        ResultScanner scanner = table.getScanner(scan);

        for (Result result : scanner) {
            List<Cell> cells = result.listCells();
            String rowKey = Bytes.toString(result.getRow());
            for (Cell cell : cells) {
                String family = Bytes.toString(CellUtil.cloneFamily(cell));
                String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.println(rowKey+","+family+","+qualifier+","+value);
            }

        }

        table.close();
        conn.close();

    }
}

7.获取列名称以 na 开头的所有RowKey

java 复制代码
package com.shujia.comparator;

//需求:获取RowKey中包含15001000的所有RowKey

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.List;

//需求:
//      获取列名称以 na 开头的所有RowKey

public class Code07ComparatorColumns {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum","node1,node2,master");
        Connection conn = ConnectionFactory.createConnection(conf);

        Table table = conn.getTable(TableName.valueOf("jan:tbl1"));
        Scan scan=new Scan();

        ColumnPrefixFilter filter = new ColumnPrefixFilter(Bytes.toBytes("na"));

        //设置过滤器
        scan.setFilter(filter);

        //获取扫描器对象
        ResultScanner scanner = table.getScanner(scan);

        for (Result result : scanner) {
            List<Cell> cells = result.listCells();
            String rowKey = Bytes.toString(result.getRow());
            for (Cell cell : cells) {
                String family = Bytes.toString(CellUtil.cloneFamily(cell));
                String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.println(rowKey+","+family+","+qualifier+","+value);
            }

        }

        table.close();
        conn.close();

    }
}

8.对学生表中的信息进行过滤 条件有:1.所有性别为男性 2.所有文科班 3.年龄大于23岁

java 复制代码
package com.shujia.comparator;

//需求:
//      对学生表中的信息进行过滤 条件有:1.所有性别为男性 2.所有文科班 3.年龄大于23岁

import com.sun.xml.internal.bind.v2.runtime.unmarshaller.XsiNilLoader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

//需求:
//      获取列名称以 na 开头的所有RowKey

public class Code08Comparator {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum","node1,node2,master");
        Connection conn = ConnectionFactory.createConnection(conf);

        Table table = conn.getTable(TableName.valueOf("jan:tbl1"));
        Scan scan=new Scan();

        //1.所有性别为男性
        SingleColumnValueFilter filter1 = new SingleColumnValueFilter(
                Bytes.toBytes("info")
                , Bytes.toBytes("gender")
                , CompareFilter.CompareOp.EQUAL
                , new BinaryPrefixComparator(Bytes.toBytes("男"))
        );
        //2.所有文科班
        SingleColumnValueFilter filter2 = new SingleColumnValueFilter(
                Bytes.toBytes("info")
                , Bytes.toBytes("clazz")
                , CompareFilter.CompareOp.EQUAL
                , new BinaryPrefixComparator(Bytes.toBytes("文科"))
        );
        //3.年龄大于23岁
        SingleColumnValueFilter filter3 = new SingleColumnValueFilter(
                Bytes.toBytes("info")
                , Bytes.toBytes("age")
                , CompareFilter.CompareOp.GREATER
                , new BinaryPrefixComparator(Bytes.toBytes("23"))
        );

        List<Filter> filters = new ArrayList<>();
        filters.add(filter1);
        filters.add(filter2);
        filters.add(filter3);


        FilterList filter = new FilterList(filters);

        //设置过滤器
        scan.setFilter(filter);

        //获取扫描器对象
        ResultScanner scanner = table.getScanner(scan);

        for (Result result : scanner) {
                String rowKey = Bytes.toString(result.getRow());
                String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));
                String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));
                String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));
                String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));
                System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);

        }

        table.close();
        conn.close();

    }
}
相关推荐
java1234_小锋3 分钟前
Elasticsearch中的节点(比如共20个),其中的10个选了一个master,另外10个选了另一个master,怎么办?
大数据·elasticsearch·jenkins
Elastic 中国社区官方博客4 分钟前
Elasticsearch 开放推理 API 增加了对 IBM watsonx.ai Slate 嵌入模型的支持
大数据·数据库·人工智能·elasticsearch·搜索引擎·ai·全文检索
我的运维人生4 分钟前
Elasticsearch实战应用:构建高效搜索与分析平台
大数据·elasticsearch·jenkins·运维开发·技术共享
企鹅侠客8 分钟前
ETCD调优
数据库·etcd
Json_1817901448014 分钟前
电商拍立淘按图搜索API接口系列,文档说明参考
前端·数据库
大数据编程之光20 分钟前
Flink Standalone集群模式安装部署全攻略
java·大数据·开发语言·面试·flink
B站计算机毕业设计超人22 分钟前
计算机毕业设计SparkStreaming+Kafka旅游推荐系统 旅游景点客流量预测 旅游可视化 旅游大数据 Hive数据仓库 机器学习 深度学习
大数据·数据仓库·hadoop·python·kafka·课程设计·数据可视化
煎饼小狗26 分钟前
Redis五大基本类型——Zset有序集合命令详解(命令用法详解+思维导图详解)
数据库·redis·缓存
永乐春秋42 分钟前
WEB-通用漏洞&SQL注入&CTF&二次&堆叠&DNS带外
数据库·sql
打鱼又晒网1 小时前
【MySQL】数据库精细化讲解:内置函数知识穿透与深度学习解析
数据库·mysql