JavaAPI操作HBase-Day2

Java代码操作HBase

pom依赖,依赖版本要和软件一致
xml 复制代码
<dependencies>
    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-client</artifactId>
        <version>2.5.5</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-common</artifactId>
        <version>2.5.5</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-server</artifactId>
        <version>2.5.5</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-protocol</artifactId>
        <version>2.5.5</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase</artifactId>
        <version>2.5.5</version>
        <type>pom</type>
        <exclusions>
            <exclusion>
                <groupId>org.glassfish</groupId>
                <artifactId>javax.el</artifactId>
            </exclusion>
        </exclusions>
    </dependency>
    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-mapreduce</artifactId>
        <version>2.5.5</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-zookeeper</artifactId>
        <version>2.5.5</version>
    </dependency>
    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.13.2</version>
    </dependency>
    <!--日志打印-->
    <dependency>
        <groupId>org.apache.logging.log4j</groupId>
        <artifactId>log4j-slf4j-impl</artifactId>
        <version>2.12.0</version>
    </dependency>
    <!--Hadoop通用包-->
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>3.2.4</version>
    </dependency>
    <!--Hadoop客户端-->
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>3.2.4</version>
    </dependency>
    <!--Hadoop HDFS-->
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-hdfs</artifactId>
        <version>3.2.4</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-auth</artifactId>
        <version>3.2.4</version>
    </dependency>
</dependencies>
操作命名空间
java 复制代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;


    private Connection connection;
    private Admin admin;

    // 连接HBase

	//Socket error occurred: localhost/127.0.0.1:2181: 拒绝连接. 不指定就访问本地2181Port
	//因为connection = ConnectionFactory.createConnection();没有conf参数

    @Before
    public void connect() throws IOException {

        // 获取配置
        Configuration conf = HBaseConfiguration.create();
        // 指定Zookeeper的地址
        // hadoop在 /etc/hosts文件做了ip映射
        conf.set("hbase.zookeeper.quorum", "hadoop:2181");
        // 获取连接
         connection = ConnectionFactory.createConnection(conf);
         // 获取管理权
         admin = connection.getAdmin();

    }

// 在Test执行之后执行.关闭连接
 @After
    public void close() throws IOException {
        if (admin != null) admin.close();
        if (connection != null) connection.close();
    }




// 创建名称空间
// create_namespace 'api'
@Test
public void createNamespace() throws IOException {
    // 构建空间描述器
    NamespaceDescriptor nd = NamespaceDescriptor.create("api").build();
    // 创建名称空间
    admin.createNamespace(nd);
}

// 列出所有的名称空间
// list_namespace
@Test
public void listNamespace() throws IOException {
    // 将名称空间的名字放到数组中返回
    String[] namespaces = admin.listNamespaces();
    for (String namespace : namespaces) {
        System.out.println(namespace);
    }
}

// 删除名称空间
// drop_namespace
@Test
public void dropNamespace() throws IOException {
    // 要求空间为空
    // 如果不为空,那么会抛出异常 - ConstraintException
    admin.deleteNamespace("api");
}
操作表
java 复制代码
private Connection connection;

private Admin admin;

private Table table;




@Test
public void createTable() throws Exception{

    ColumnFamilyDescriptor cl1 = ColumnFamilyDescriptorBuilder.newBuilder("basic".getBytes()).build();
    ColumnFamilyDescriptor cl2 = ColumnFamilyDescriptorBuilder.newBuilder("info".getBytes()).build();

    // Demo:user   在Demo命名空间内创建user表      user 在default空间内
    TableDescriptor  table1 = TableDescriptorBuilder.newBuilder(TableName.valueOf("user"))
            .setColumnFamily(cl1)
            .setColumnFamily(cl2)
            .build();
    admin.createTable(table1);
}

@Test
public void append() throws IOException {
    //RowKEY
    Append append = new Append("u1".getBytes());
    //ColumnFamily  Column   value
    byte[] basic = "basic".getBytes();
    append.addColumn(basic,"name".getBytes(),"hsk".getBytes());
    append.addColumn(basic,"age".getBytes(),"15".getBytes());
    byte[] info = "info".getBytes();
    append.addColumn(info,"phone".getBytes(),"nonono".getBytes());
    append.addColumn(info,"address".getBytes(),"beijing".getBytes());
    table.append(append);

}
//put一百万数据
//26s即可。效率很高 。  Rowkey有序但不是大小排序。而是字典排序(一个字符一个字符来排序  9 在 1221之后,因为9比1大)

@Test
public void putMillions() throws Exception{

    ArrayList<Put> list = new ArrayList<Put>();

    long start = System.currentTimeMillis();

    //CoulumnFamily
    byte[] basic = "basic".getBytes();
    //Column
    byte[] password = "passWoed".getBytes();

    for (int i = 0; i < 1000000; i++) {

        //RowKey
        byte[] rowKey = ("m" + i).getBytes();
        Put put = new Put(rowKey);
        put.addColumn(basic,password,reducePassword());
        list.add(put);
        if(i==10000){
            table.put(list);
            list.clear();
        }




    }
    long end = System.currentTimeMillis();
    System.out.println(end-start);
}

@Test
public void delete() throws Exception{
    //确定行
    Delete delete = new Delete("u1".getBytes());
    //ColumnFamily
    delete.addFamily("basic".getBytes());

    table.delete(delete);
}

@Test
public void deleteAll() throws Exception{
    Delete delete = new Delete("u1".getBytes());
    table.delete(delete);
}

//获取Cell
@Test
public void getCell() throws Exception{
    Get get = new Get("u1".getBytes());
    get.addColumn("basic".getBytes() ,"name".getBytes());
    Result result = table.get(get);
    byte[] value = result.getValue("basic".getBytes(), "name".getBytes());
    System.out.println(new String(value));
}

//getColumnFamily
@Test
public void getColumn() throws Exception{
    Get get = new Get("u1".getBytes());
    get.addFamily("basic".getBytes());
    Result result = table.get(get);
    NavigableMap<byte[], byte[]> familyMap = result.getFamilyMap("basic".getBytes());
    for (Map.Entry<byte[], byte[]> entry : familyMap.entrySet()) {
        System.out.println(new String(entry.getKey())+"  "+new String(entry.getValue()));
    }
}

//getLine
//结果map嵌套map
@Test
public void getLineByFor() throws IOException{
    Get get = new Get("u1".getBytes());
    Result result = table.get(get);
    NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> map = result.getMap();
    for (Map.Entry<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> navigableMapEntry : map.entrySet()) {
        System.out.println("Column Family\t:"+new String(navigableMapEntry.getKey()));
        for (Map.Entry<byte[], NavigableMap<Long, byte[]>> mapEntry : navigableMapEntry.getValue().entrySet()) {
            System.out.println("Column Name\t:"+new String(mapEntry.getKey()));
            for (Map.Entry<Long, byte[]> longEntry : mapEntry.getValue().entrySet()) {
                System.out.println("\t\tTimeStamp\t:"+longEntry.getKey());
                System.out.println("\t\tCellValue\t:"+new String(longEntry.getValue()));
            }
        }

    }
}


//Lambda方式获取整行数据
@Test
public void getLineByLambda() throws IOException{
    Get get = new Get("u1".getBytes());
    Result result = table.get(get);
    NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> row = result.getMap();
    row.forEach((key,value) ->{
        System.out.println("Column Family\t:"+new String(key));
        value.forEach((keyy,valuee)->{
            System.out.println("\tColumn Name\t:"+new String(keyy));
            valuee.forEach((keyyy,valueee)->{
                System.out.println("\t\tTimstamp\t:"+keyyy+ "\t\tvalue\t:"+new String(valueee));
            });
        });
    });

}

//整表查询
@Test
public void scan() throws IOException{
    Scan scan = new Scan();
    ResultScanner results = table.getScanner(scan);
    Iterator<Result> iterator = results.iterator();
    while(iterator.hasNext() ){
        Result next = iterator.next();
        // 根据需求来确定获取指定列,还是获取指定列族或者是整表数据
        // 获取整表数据
        // r.getMap();
        // 获取指定列族的数据
        // r.getFamilyMap(ColumnFamily );
        // 获取指定列的数据
        // r.getvalue(ColumnFamily , ColumnName)

        //todo 获取不到每行的RowKey
        NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> row = next.getMap();
        row.forEach((key,value) ->{
            System.out.println("Column Family\t:"+new String(key));
            value.forEach((keyy,valuee)->{
                System.out.println("\tColumn Name\t:"+new String(keyy));
                valuee.forEach((keyyy,valueee)->{
                    System.out.println("\t\tTimstamp\t:"+keyyy+ "\t\tvalue\t:"+new String(valueee));
                });
            });
        });

        System.out.println("----------------------------------------------------------");
        NavigableMap<byte[], byte[]> familyMap = next.getFamilyMap("basic".getBytes());
        familyMap.forEach((k,v)->{
            System.out.println("ColumnName"+new String(k)+"\t--"+new String(v));
        });

        System.out.println("----------------------------------------------------------");
        byte[] value = next.getValue("basic".getBytes(), "password".getBytes());
        System.out.println(value == null ? "null":new String(value));
    }
}

// 基于Scan操作的前提下,还可以对结果来进行过滤
@Test
public void filter() throws IOException {
    Scan scan = new Scan();
    // 构建Filter对象
    // 过滤密码中含有AAA的数据
    // .*AAA.*
    // CompareOperator - 比较方式
    // ByteArrayComparable - 比较规则
    Filter f = new ValueFilter(CompareOperator.EQUAL, new RegexStringComparator(".*A.*"));
    scan.setFilter(f);
    // 获取结果集
    ResultScanner rs = table.getScanner(scan);
    for (Result r : rs) {
        byte[] value = r.getValue("basic".getBytes(), "passWoed".getBytes());
        System.out.println(value == null ? "null" : new String(value));
    }

}



public byte[] reducePassword(){
    StringBuilder stringBuilder = new StringBuilder();

    for (int i = 0; i < 6; i++) {
        stringBuilder.append((char)(Math.random()*26+65));
    }

    return  stringBuilder.toString().getBytes();
}
相关推荐
青云交3 分钟前
大数据新视界 -- 大数据大厂之 Impala 性能优化:跨数据中心环境下的挑战与对策(上)(27 / 30)
大数据·性能优化·impala·案例分析·代码示例·跨数据中心·挑战对策
soso196839 分钟前
DataWorks快速入门
大数据·数据仓库·信息可视化
The_Ticker1 小时前
CFD平台如何接入实时行情源
java·大数据·数据库·人工智能·算法·区块链·软件工程
java1234_小锋1 小时前
Elasticsearch中的节点(比如共20个),其中的10个选了一个master,另外10个选了另一个master,怎么办?
大数据·elasticsearch·jenkins
Elastic 中国社区官方博客1 小时前
Elasticsearch 开放推理 API 增加了对 IBM watsonx.ai Slate 嵌入模型的支持
大数据·数据库·人工智能·elasticsearch·搜索引擎·ai·全文检索
我的运维人生1 小时前
Elasticsearch实战应用:构建高效搜索与分析平台
大数据·elasticsearch·jenkins·运维开发·技术共享
企鹅侠客1 小时前
ETCD调优
数据库·etcd
Json_181790144801 小时前
电商拍立淘按图搜索API接口系列,文档说明参考
前端·数据库
大数据编程之光1 小时前
Flink Standalone集群模式安装部署全攻略
java·大数据·开发语言·面试·flink
B站计算机毕业设计超人1 小时前
计算机毕业设计SparkStreaming+Kafka旅游推荐系统 旅游景点客流量预测 旅游可视化 旅游大数据 Hive数据仓库 机器学习 深度学习
大数据·数据仓库·hadoop·python·kafka·课程设计·数据可视化