高版本ES适配(集群>7.x)
一、创建父子索引
powershell
curl -k -u elastic:Passw0rd -XPUT "http://xxx.xx.xxx.172:9200/customer_join_index" -H 'Content-Type: application/json' -d '{
"settings": { "number_of_shards":1 ,"number_of_replicas":0},
"mappings": {
"properties": {
"doc_type": { "type": "keyword" },
"cust_join_type": { "type": "join", "relations": { "customer": "label" } },
"customer_id": { "type": "keyword" },
"cust_name": { "type": "keyword" },
"label_code": { "type": "keyword" },
"label_value": { "type": "keyword" }
}
}
}'
powershell
"doc_type": { "type": "keyword" }, # 用于查询只有父或者子的文档
"cust_join_type": { "type": "join", "relations": { "customer": "label" } }, #父:customer,子:label
"customer_id": { "type": "keyword" }, #注意定义的要和hive表插入的类型一样 另外 Elasticsearch官方明确规定:The parent value in the join field must be the _id of the parent document as a string, even if the _id looks like a number。 也就是说:即使用bigint作为_id(es.mapping.id),ES在内部存储和比较时都视为字符串,所以子文档的parent必须传字符串"1001",不能传数字1001,否则会静默失败(关联无效)
#如果customer_id想用long之类的整数型,那在建表里面可以新增 customer_id_str 字符串
父表可以
CREATE EXTERNAL TABLE tmp.customer_parent (
customer_id bigint,
customer_id_str string,
'es.mapping.id'='customer_id_str',
'es.mapping.routing'='customer_id_str',
插入的时候
cast(customer_id as bigint) as customer_id
cast(customer_id as string) as customer_id_str
子表
CREATE EXTERNAL TABLE tmp.customer_child_label (
customer_id bigint,
customer_id_str string,
'es.mapping.routing'='customer_id_str'
插入的时候
cast(customer_id as bigint) as customer_id
cast(customer_id as string) as customer_id_str
named_struct('name','label','parent',cast(1001 as bigint)) as cust_join_type, -- 固定写子类型
数据类型映射关系
yaml
Hive类型 Elasticsearch类型
STRING text, keyword
INT integer
BIGINT long
FLOAT float
DOUBLE double
BOOLEAN boolean
TIMESTAMP date
二、数据表准备
2.1创建父表
sql
CREATE EXTERNAL TABLE tmp.customer_parent (
customer_id string,
doc_type string,
cust_join_type STRUCT<name:STRING>, -- 必须是struct! 固定:父类型 注意是STRUCT<name:STRING> 值为("customer")
cust_name string,
cust_age int--可要可不要
)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES (
'es.nodes'='xxx.xx.xxx.172:9200,xxx.xx.xxx.173:9200,xxx.xx.xxx.174:9200',
'es.net.http.auth.user'='elastic',
'es.net.http.auth.pass'='Passw0rd',
'es.resource'='customer_join_index',
'es.mapping.id'='customer_id',
'es.mapping.routing'='customer_id', -- 必须 建议用字符串做路由,为了以后关联
--'es.mapping.join'='cust_join_type', -- 父子字段
'es.write.operation'='upsert'
);
2.2创建子表
sql
CREATE EXTERNAL TABLE tmp.customer_child_label (
doc_id STRING, -- 子文档唯一ID 如"1001_label_high_value"
customer_id string, -- 父ID 仅用于routing和parent引用
doc_type string,
cust_join_type STRUCT<name:STRING, parent:string>, --必须包含parent! 固定:子类型
label_code string,
label_value string,
label_level int --可要可不要
)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES (
'es.nodes'='xxx.xx.xxx.172:9200,xxx.xx.xxx.173:9200,xxx.xx.xxx.174:9200',
'es.net.http.auth.user'='elastic',
'es.net.http.auth.pass'='Passw0rd',
'es.resource'='customer_join_index',
'es.mapping.id'='doc_id', -- 必须使用唯一的_id,在子表中不能等于父表的es.mapping.id(customer_id) 建议用 label_code + customer_id或UUID
'es.mapping.routing'='customer_id', -- 必须等于父ID
--'es.mapping.join'='cust_join_type', -- 必须
'es.write.operation'='upsert'
);
三、数据更新/插入
sql
-- 写入/更新父文档
INSERT OVERWRITE TABLE tmp.customer_parent
SELECT
cast(1001 as string) as customer_id,
'customer' as doc_type,
named_struct('name', 'customer') as cust_join_type, -- 固定写父类型
'张三' as cust_name,
20 as cust_age
;
-- 写入/更新子文档(关联父)
INSERT OVERWRITE TABLE tmp.customer_child_label
SELECT
concat('1001', '_', 'high_value') AS doc_id,
'1001' as customer_id,
'label' as doc_type,
named_struct('name', 'label', 'parent', cast(1001 as string)) as cust_join_type, -- 固定写子类型
'high_value' as label_code,
'1' as label_value,
2 as label_level
;
四、数据查询
4.1查看索引结构
powershell
curl -s -k -u elastic:Passw0rd -XGET 'xxx.xx.xxx.172:9200/customer_join_index/_mapping?pretty'
4.2关联查询
html
查询目标 使用语法 返回文档
父→是否有符合条件的子 has_child 父
子→父是否符合条件 has_parent 子
4.2.1查有high_value标签的客户(返回父)
powershell
curl -s -k -u elastic:Passw0rd -XGET \
'http://xxx.xx.xxx.172:9200/customer_join_index/_search?pretty' \
-H 'Content-Type: application/json' -d '
{
"query": {
"has_child": {
"type": "label",
"query": {
"term": { "label_code": "high_value" }
},
"inner_hits": {
"_source": ["cust_name", "cust_age","high_value","label_level"]
}
}
}
}'
4.2.2查找属于成年客户的标签(返回子)
powershell
curl -s -k -u elastic:Passw0rd -XGET \
'http://xxx.xx.xxx.172:9200/customer_join_index/_search?pretty' \
-H 'Content-Type: application/json' -d '
{
"query": {
"has_parent": {
"parent_type": "customer",
"query": {
"range": { "cust_age": { "gt": 18 } }
},
"inner_hits": {}
}
}
}'
--只返回部分字段
curl -s -k -u elastic:Passw0rd -XGET \
'http://xxx.xx.xxx.172:9200/customer_join_index/_search?pretty' \
-H 'Content-Type: application/json' -d '
{
"query": {
"has_parent": {
"parent_type": "customer",
"query": {
"range": { "cust_age": { "gt": 18 } }
},
"inner_hits": {
"_source": ["cust_name", "cust_age","high_value","label_level"],
"size": 1
}
}
}
}'
4.3验证结果
powershell
curl -s -k -u elastic:Passw0rd -GET xxx.xx.xxx.172:9200/customer_join_index/_doc/1001
返回:
{"_index":"customer_join_index","_type":"_doc","_id":"1001","_version":1,"_seq_no":0,"_primary_term":1,"_routing":"1001","found":true,"_source":{"customer_id":"1001","doc_type":"customer","cust_join_type":{"name":"customer"},"cust_name":"张三","cust_age":20}}
curl -s -k -u elastic:Passw0rd -GET xxx.xx.xxx.172:9200/customer_join_index/_doc/1001_high_value
返回:{"_index":"customer_join_index","_type":"_doc","_id":"1001_high_value","_version":2,"_seq_no":2,"_primary_term":1,"_routing":"1001","found":true,"_source":{"doc_id":"1001_high_value","customer_id":"1001","doc_type":"label","cust_join_type":{"name":"label","parent":"1001"},"label_code":"high_value","label_value":"1","label_level":2}}
--查找有high_value标签的客户
curl -s -k -u elastic:Passw0rd -XGET 'xxx.xx.xxx.172:9200/customer_join_index/_search' -H 'Content-Type: application/json' -d '
{
"query": {
"has_child": {
"type": "label",
"query": {
"term": {
"label_code": "high_value"
}
}
}
}
}'
返回:
{"took":12,"timed_out":false,"_shards":{"total":1,"successful":1,"skipped":0,"failed":0},"hits":{"total":{"value":1,"relation":"eq"},"max_score":1.0,"hits":[{"_index":"customer_join_index","_type":"_doc","_id":"1001","_score":1.0,"_routing":"1001","_source":{"customer_id":"1001","doc_type":"customer","cust_join_type":{"name":"customer"},"cust_name":"张三","cust_age":20}}]}}
--查找某个客户的标签
curl -s -k -u elastic:Passw0rd -XGET \
'xxx.xx.xxx.172:9200/customer_join_index/_search' \
-H 'Content-Type: application/json' -d '
{
"query": {
"parent_id": {
"type": "label",
"id": "1001"
}
}
}'
返回:
{"took":2,"timed_out":false,"_shards":{"total":1,"successful":1,"skipped":0,"failed":0},"hits":{"total":{"value":1,"relation":"eq"},"max_score":0.18232156,"hits":[{"_index":"customer_join_index","_type":"_doc","_id":"1001_high_value","_score":0.18232156,"_routing":"1001","_source":{"doc_id":"1001_high_value","customer_id":1001,"cust_join_type":{"name":"label","parent":1001},"label_code":"high_value","label_value":"1","label_level":2}}]}}
4.4通用查询
4.4.1简单查看所有文档(推荐用于<1万条)
powershell
curl -s -k -u elastic:Passw0rd -XGET \
'xxx.xx.xxx.172:9200/customer_join_index/_search?pretty' \
-H 'Content-Type: application/json' -d '
{
"query": {
"match_all": {}
},
"size": 1000
}'
参数说明:
"match_all": {} :匹配所有文档(父+子)
"size": 1000 :一次返回最多1000条(最大可设为10000)
?pretty :格式化JSON输出,便于阅读
4.4.2只看父文档( cust_join_type.name = "customer" )
powershell
curl -s -k -u elastic:Passw0rd -XGET \
'xxx.xx.xxx.172:9200/customer_join_index/_search?pretty' \
-H 'Content-Type: application/json' -d '
{ "query": { "term": { "doc_type": "customer" } } }'
4.4.3只看子文档( cust_join_type.name = "label" )
powershell
curl -s -k -u elastic:Passw0rd -XGET \
'xxx.xx.xxx.172:9200/customer_join_index/_search?pretty' \
-H 'Content-Type: application/json' -d '
{ "query": { "term": { "doc_type": "label" } } }'
4.4.4精简返回字段(结构化输出)
powershell
例如只返回关键信息:
curl -s -k -u elastic:Passw0rd -XGET \
'xxx.xx.xxx.172:9200/customer_join_index/_search?filter_path=hits.hits._source' \
-H 'Content-Type: application/json' -d '
{
"_source": ["cust_join_type.name","doc_type", "customer_id", "cust_name", "label_code", "label_value"],
"query": { "match_all": {} },
"size": 100
}'
{"hits":{"hits":[{"_source":{"cust_join_type":{"name":"customer"},"cust_name":"张三","doc_type":"customer","customer_id":"1001"}},{"_source":{"cust_join_type":{"name":"label"},"label_value":"1","doc_type":"label","customer_id":"1001","label_code":"high_value"}}]}}